This patch adds a network interface backend driver to qemu. It is a pure
userspace implemention using the gntdev interface. It uses "qnet" as
backend name in xenstore so it doesn't interfere with the netback
backend (aka "vnif").
The network backend is hooked into the corrosponding qemu vlan, i.e.
vif 0 is hooked into vlan 0. To make the packages actually arrive
somewhere you additionally have to link the vlan to the outside world
using the usual qemu command line options such as "-net tap,...".
Signed-off-by: Gerd Hoffmann <kraxel@xxxxxxxxxx>
---
Makefile.target | 2 +-
hw/xen-backend.h | 1 +
hw/xen-machine.c | 1 +
hw/xen-nic.c | 448 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 451 insertions(+), 1 deletions(-)
create mode 100644 hw/xen-nic.c
diff --git a/Makefile.target b/Makefile.target
index 2d599d2..281d7fa 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -517,7 +517,7 @@ endif
# xen backend driver support
XEN_OBJS := xen-machine.o xen-backend.o
-XEN_OBJS += xen-console.o xen-framebuffer.o xen-disk.o
+XEN_OBJS += xen-console.o xen-framebuffer.o xen-disk.o xen-nic.o
ifeq ($(CONFIG_XEN), yes)
OBJS += $(XEN_OBJS)
LIBS += $(XEN_LIBS)
diff --git a/hw/xen-backend.h b/hw/xen-backend.h
index 941b0a6..2db2c3c 100644
--- a/hw/xen-backend.h
+++ b/hw/xen-backend.h
@@ -120,6 +120,7 @@ struct devops xen_console_ops; /* xen_console.c */
struct devops xen_kbdmouse_ops; /* xen_framebuffer.c */
struct devops xen_framebuffer_ops; /* xen_framebuffer.c */
struct devops xen_blkdev_ops; /* xen_disk.c */
+struct devops xen_netdev_ops; /* xen_nic.c */
void xen_set_display(int domid, DisplayState *ds);
diff --git a/hw/xen-machine.c b/hw/xen-machine.c
index 1b647a2..3fa4079 100644
--- a/hw/xen-machine.c
+++ b/hw/xen-machine.c
@@ -62,6 +62,7 @@ static void xenpv_init(ram_addr_t ram_size, int vga_ram_size,
xen_be_register("vkbd", &xen_kbdmouse_ops);
xen_be_register("vfb", &xen_framebuffer_ops);
xen_be_register("qdisk", &xen_blkdev_ops);
+ xen_be_register("qnic", &xen_netdev_ops);
/* setup framebuffer */
xen_set_display(xen_domid, ds);
diff --git a/hw/xen-nic.c b/hw/xen-nic.c
new file mode 100644
index 0000000..56d1474
--- /dev/null
+++ b/hw/xen-nic.c
@@ -0,0 +1,448 @@
+/*
+ * xen paravirt network card backend
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <pthread.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+
+#include <xs.h>
+#include <xenctrl.h>
+#include <xen/io/xenbus.h>
+#include <xen/io/netif.h>
+
+#include "hw.h"
+#include "net.h"
+#include "qemu-char.h"
+#include "xen-backend.h"
+
+/* ------------------------------------------------------------- */
+
+struct netdev {
+ struct xendev xendev; /* must be first */
+ char *mac;
+ int tx_work;
+ int tx_ring_ref;
+ int rx_ring_ref;
+ struct netif_tx_sring *txs;
+ struct netif_rx_sring *rxs;
+ netif_tx_back_ring_t tx_ring;
+ netif_rx_back_ring_t rx_ring;
+ VLANClientState *vs;
+};
+
+/* ------------------------------------------------------------- */
+
+#define PROTO_TCP 6
+#define PROTO_UDP 17
+
+static uint32_t checksum_add(int len, uint8_t *buf)
+{
+ uint32_t sum = 0;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (i & 1)
+ sum += (uint32_t)buf[i];
+ else
+ sum += (uint32_t)buf[i] << 8;
+ }
+ return sum;
+}
+
+static uint16_t checksum_finish(uint32_t sum)
+{
+ while (sum>>16)
+ sum = (sum & 0xFFFF)+(sum >> 16);
+ return ~sum;
+}
+
+static uint16_t checksum_tcpudp(uint16_t length, uint16_t proto,
+ uint8_t *addrs, uint8_t *buf)
+{
+ uint32_t sum = 0;
+
+ sum += checksum_add(length, buf); // payload
+ sum += checksum_add(8, addrs); // src + dst address
+ sum += proto + length; // protocol & length
+ return checksum_finish(sum);
+}
+
+static void checksum_calculate(uint8_t *data, int length)
+{
+ int hlen, plen, proto, csum_offset;
+ uint16_t csum;
+
+ if ((data[14] & 0xf0) != 0x40)
+ return; /* not IPv4 */
+ hlen = (data[14] & 0x0f) * 4;
+ plen = (data[16] << 8 | data[17]) - hlen;
+ proto = data[23];
+
+ switch (proto) {
+ case PROTO_TCP:
+ csum_offset = 16;
+ break;
+ case PROTO_UDP:
+ csum_offset = 6;
+ break;
+ default:
+ return;
+ }
+
+ if (plen < csum_offset+2)
+ return;
+
+ data[14+hlen+csum_offset] = 0;
+ data[14+hlen+csum_offset+1] = 0;
+ csum = checksum_tcpudp(plen, proto, data+14+12, data+14+hlen);
+ data[14+hlen+csum_offset] = csum >> 8;
+ data[14+hlen+csum_offset+1] = csum & 0xff;
+}
+
+
+/* ------------------------------------------------------------- */
+
+static void net_tx_response(struct netdev *netdev, netif_tx_request_t *txp,
int8_t st)
+{
+ RING_IDX i = netdev->tx_ring.rsp_prod_pvt;
+ netif_tx_response_t *resp;
+ int notify;
+
+ resp = RING_GET_RESPONSE(&netdev->tx_ring, i);
+ resp->id = txp->id;
+ resp->status = st;
+
+#if 0
+ if (txp->flags & NETTXF_extra_info)
+ RING_GET_RESPONSE(&netdev->tx_ring, ++i)->status = NETIF_RSP_NULL;
+#endif
+
+ netdev->tx_ring.rsp_prod_pvt = ++i;
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify);
+ if (notify)
+ xen_be_send_notify(&netdev->xendev);
+
+ if (i == netdev->tx_ring.req_cons) {
+ int more_to_do;
+ RING_FINAL_CHECK_FOR_REQUESTS(&netdev->tx_ring, more_to_do);
+ if (more_to_do)
+ netdev->tx_work++;
+ }
+}
+
+static void net_tx_error(struct netdev *netdev, netif_tx_request_t *txp,
RING_IDX end)
+{
+#if 0
+ /*
+ * Hmm, why netback fails everything in the ring?
+ * Should we do that even when not supporting SG and TSO?
+ */
+ RING_IDX cons = netdev->tx_ring.req_cons;
+
+ do {
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
+ if (cons >= end)
+ break;
+ txp = RING_GET_REQUEST(&netdev->tx_ring, cons++);
+ } while (1);
+ netdev->tx_ring.req_cons = cons;
+ netif_schedule_work(netif);
+ netif_put(netif);
+#else
+ net_tx_response(netdev, txp, NETIF_RSP_ERROR);
+#endif
+}
+
+static void net_tx_packets(struct netdev *netdev)
+{
+ netif_tx_request_t txreq;
+ RING_IDX rc, rp;
+ void *page;
+
+ for (;;) {
+ rc = netdev->tx_ring.req_cons;
+ rp = netdev->tx_ring.sring->req_prod;
+ xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+ while ((rc != rp)) {
+ if (RING_REQUEST_CONS_OVERFLOW(&netdev->tx_ring, rc))
+ break;
+ memcpy(&txreq, RING_GET_REQUEST(&netdev->tx_ring, rc),
sizeof(txreq));
+ netdev->tx_ring.req_cons = ++rc;
+
+#if 1
+ /* should not happen in theory, we don't announce the *
+ * feature-{sg,gso,whatelse} flags in xenstore (yet?) */
+ if (txreq.flags & NETTXF_extra_info) {
+ xen_be_printf(&netdev->xendev, 0, "FIXME: extra info flag\n");
+ net_tx_error(netdev, &txreq, rc);
+ continue;
+ }
+ if (txreq.flags & NETTXF_more_data) {
+ xen_be_printf(&netdev->xendev, 0, "FIXME: more data flag\n");
+ net_tx_error(netdev, &txreq, rc);
+ continue;
+ }
+#endif
+
+ if (txreq.size < 14) {
+ xen_be_printf(&netdev->xendev, 0, "bad packet size: %d\n",
txreq.size);
+ net_tx_error(netdev, &txreq, rc);
+ continue;
+ }
+
+ if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
+ xen_be_printf(&netdev->xendev, 0, "error: page crossing\n");
+ net_tx_error(netdev, &txreq, rc);
+ continue;
+ }
+
+ xen_be_printf(&netdev->xendev, 3, "tx packet ref %d, off %d, len
%d, flags 0x%x%s%s%s%s\n",
+ txreq.gref, txreq.offset, txreq.size, txreq.flags,
+ (txreq.flags & NETTXF_csum_blank) ? " csum_blank"
: "",
+ (txreq.flags & NETTXF_data_validated) ? "
data_validated" : "",
+ (txreq.flags & NETTXF_more_data) ? " more_data"
: "",
+ (txreq.flags & NETTXF_extra_info) ? " extra_info"
: "");
+
+ page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
+ netdev->xendev.dom,
+ txreq.gref, PROT_READ);
+ if (NULL == page) {
+ xen_be_printf(&netdev->xendev, 0, "error: gref dereference
failed\n");
+ net_tx_error(netdev, &txreq, rc);
+ continue;
+ }
+ if (txreq.flags & NETTXF_csum_blank)
+ checksum_calculate(page + txreq.offset, txreq.size);
+ qemu_send_packet(netdev->vs, page + txreq.offset, txreq.size);
+ xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1);
+ net_tx_response(netdev, &txreq, NETIF_RSP_OKAY);
+ }
+ if (!netdev->tx_work)
+ break;
+ netdev->tx_work = 0;
+ }
+}
+
+/* ------------------------------------------------------------- */
+
+static void net_rx_response(struct netdev *netdev,
+ netif_rx_request_t *req, int8_t st,
+ uint16_t offset, uint16_t size,
+ uint16_t flags)
+{
+ RING_IDX i = netdev->rx_ring.rsp_prod_pvt;
+ netif_rx_response_t *resp;
+ int notify;
+
+ resp = RING_GET_RESPONSE(&netdev->rx_ring, i);
+ resp->offset = offset;
+ resp->flags = flags;
+ resp->id = req->id;
+ resp->status = (int16_t)size;
+ if (st < 0)
+ resp->status = (int16_t)st;
+
+ xen_be_printf(&netdev->xendev, 3, "rx response: idx %d, status %d, flags
0x%x\n",
+ i, resp->status, resp->flags);
+
+ netdev->rx_ring.rsp_prod_pvt = ++i;
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify);
+ if (notify)
+ xen_be_send_notify(&netdev->xendev);
+}
+
+#define NET_IP_ALIGN 2
+
+static int net_rx_ok(void *opaque)
+{
+ struct netdev *netdev = opaque;
+ RING_IDX rc, rp;
+
+ if (netdev->xendev.be_state != XenbusStateConnected)
+ return 0;
+
+ rc = netdev->rx_ring.req_cons;
+ rp = netdev->rx_ring.sring->req_prod;
+ xen_rmb();
+
+ if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
+ xen_be_printf(&netdev->xendev, 2, "%s: no rx buffers (%d/%d)\n",
+ __FUNCTION__, rc, rp);
+ return 0;
+ }
+ return 1;
+}
+
+static void net_rx_packet(void *opaque, const uint8_t *buf, int size)
+{
+ struct netdev *netdev = opaque;
+ netif_rx_request_t rxreq;
+ RING_IDX rc, rp;
+ void *page;
+
+ if (netdev->xendev.be_state != XenbusStateConnected)
+ return;
+
+ rc = netdev->rx_ring.req_cons;
+ rp = netdev->rx_ring.sring->req_prod;
+ xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+ if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
+ xen_be_printf(&netdev->xendev, 2, "no buffer, drop packet\n");
+ return;
+ }
+ if (size > XC_PAGE_SIZE - NET_IP_ALIGN) {
+ xen_be_printf(&netdev->xendev, 0, "packet too big (%d > %ld)",
+ size, XC_PAGE_SIZE - NET_IP_ALIGN);
+ return;
+ }
+
+ memcpy(&rxreq, RING_GET_REQUEST(&netdev->rx_ring, rc), sizeof(rxreq));
+ netdev->rx_ring.req_cons = ++rc;
+
+ page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
+ netdev->xendev.dom,
+ rxreq.gref, PROT_WRITE);
+ if (NULL == page) {
+ xen_be_printf(&netdev->xendev, 0, "error: gref dereference failed\n");
+ net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0);
+ return;
+ }
+ memcpy(page + NET_IP_ALIGN, buf, size);
+ xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1);
+ net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);
+}
+
+/* ------------------------------------------------------------- */
+
+static int net_init(struct xendev *xendev)
+{
+ struct netdev *netdev = container_of(xendev, struct netdev, xendev);
+ VLANState *vlan;
+
+ /* read xenstore entries */
+ if (NULL == netdev->mac)
+ netdev->mac = xenstore_read_be_str(&netdev->xendev, "mac");
+
+ /* do we have all we need? */
+ if (NULL == netdev->mac)
+ return -1;
+
+ vlan = qemu_find_vlan(netdev->xendev.dev);
+ netdev->vs = qemu_new_vlan_client(vlan, net_rx_packet, net_rx_ok, netdev);
+ snprintf(netdev->vs->info_str, sizeof(netdev->vs->info_str),
+ "nic: xenbus vif macaddr=%s", netdev->mac);
+
+ /* fill info */
+ xenstore_write_be_int(&netdev->xendev, "feature-rx-copy", 1);
+ xenstore_write_be_int(&netdev->xendev, "feature-rx-flip", 0);
+
+ return 0;
+}
+
+static int net_connect(struct xendev *xendev)
+{
+ struct netdev *netdev = container_of(xendev, struct netdev, xendev);
+ int rx_copy;
+
+ if (-1 == xenstore_read_fe_int(&netdev->xendev, "tx-ring-ref",
+ &netdev->tx_ring_ref))
+ return -1;
+ if (-1 == xenstore_read_fe_int(&netdev->xendev, "rx-ring-ref",
+ &netdev->rx_ring_ref))
+ return 1;
+ if (-1 == xenstore_read_fe_int(&netdev->xendev, "event-channel",
+ &netdev->xendev.remote_port))
+ return -1;
+
+ if (-1 == xenstore_read_fe_int(&netdev->xendev, "request-rx-copy",
&rx_copy))
+ rx_copy = 0;
+ if (0 == rx_copy) {
+ xen_be_printf(&netdev->xendev, 0, "frontend doesn't support
rx-copy.\n");
+ return -1;
+ }
+
+ netdev->txs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
+ netdev->xendev.dom,
+ netdev->tx_ring_ref,
+ PROT_READ | PROT_WRITE);
+ netdev->rxs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
+ netdev->xendev.dom,
+ netdev->rx_ring_ref,
+ PROT_READ | PROT_WRITE);
+ if (!netdev->txs || !netdev->rxs)
+ return -1;
+ BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE);
+ BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE);
+
+ xen_be_bind_evtchn(&netdev->xendev);
+
+ xen_be_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, "
+ "remote port %d, local port %d\n",
+ netdev->tx_ring_ref, netdev->rx_ring_ref,
+ netdev->xendev.remote_port, netdev->xendev.local_port);
+ return 0;
+}
+
+static void net_disconnect(struct xendev *xendev)
+{
+ struct netdev *netdev = container_of(xendev, struct netdev, xendev);
+
+ xen_be_unbind_evtchn(&netdev->xendev);
+
+ if (netdev->txs) {
+ xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->txs, 1);
+ netdev->txs = NULL;
+ }
+ if (netdev->rxs) {
+ xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->rxs, 1);
+ netdev->rxs = NULL;
+ }
+}
+
+static void net_event(struct xendev *xendev)
+{
+ struct netdev *netdev = container_of(xendev, struct netdev, xendev);
+ net_tx_packets(netdev);
+}
+
+/* ------------------------------------------------------------- */
+
+struct devops xen_netdev_ops = {
+ .size = sizeof(struct netdev),
+ .flags = DEVOPS_FLAG_NEED_GNTDEV,
+ .init = net_init,
+ .connect = net_connect,
+ .event = net_event,
+ .disconnect = net_disconnect,
+};
--
1.5.4.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|