Hi Konrad,
first of all, thank you for your review. You noticed quite a few points
I never really looked at because I inherited them from the current
tapdisk code. But probably I should fix these issues as well. ;-)
Konrad Rzeszutek schrieb:
>> + blkif->fds[READ] = open_ctrl_socket(wrctldev);
>> + blkif->fds[WRITE] = open_ctrl_socket(rdctldev);
>
> How about freeing the data here once?
>
>> +
>> + if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) {
>> + free(rdctldev);
>> + free(wrctldev);
>
> And then this is not needed.
>
>> + return -1;
>> + }
>> +
>> + DPRINTF("Attached to qemu blktap pipes\n");
>> + free(rdctldev);
>> + free(wrctldev);
>
> Nor these two lines above.
Hmm, good point. This code looks a bit silly... Will move the free to
the place you suggested.
>> --- a/tools/python/xen/xend/server/BlktapController.py Mon Mar 10
>> 22:51:57 2008 +0000
>> +++ b/tools/python/xen/xend/server/BlktapController.py Thu Mar 13
>> 13:00:18 2008 +0100
>> @@ -13,7 +13,9 @@ blktap_disk_types = [
>> 'vmdk',
>> 'ram',
>> 'qcow',
>> - 'qcow2'
>> + 'qcow2',
>> +
>> + 'ioemu'
>
> Why add the extra \n ?
I wanted to separate the ioemu pseudo driver (which is the only one that
doesn't go through tapdisk) from the "real" tapdisk drivers.
>> +static struct td_state *state_init(void)
>> +{
>> + int i;
>> + struct td_state *s;
>> + blkif_t *blkif;
>> +
>> + s = malloc(sizeof(struct td_state));
>
> Would it make sense to zero out the allocated memory?
This code comes directly from tapdisk and it worked there. On the other
hand, it certainly wouldn't hurt.
>> + switch (req->operation)
>> + {
>> + case BLKIF_OP_WRITE:
>> + aiocb_info = malloc(sizeof(*aiocb_info));
>> +
>> + aiocb_info->s = s;
>> + aiocb_info->sector = sector_nr;
>> + aiocb_info->nr_secs = nsects;
>> + aiocb_info->idx = idx;
>> + aiocb_info->i = i;
>> +
>> + ret = (NULL == bdrv_aio_write(s->bs, sector_nr,
>> + page, nsects,
>> + qemu_send_responses,
>> + aiocb_info));
>
> Who de-allocates aiocb_info?
qemu_send_responses is a callback function which gets aiocb_info as
parameter and frees it when it's done.
I've attached a new version of the patch.
Kevin
diff -r 7530c4dba8a5 tools/blktap/drivers/blktapctrl.c
--- a/tools/blktap/drivers/blktapctrl.c Mon Mar 3 15:19:39 2008
+++ b/tools/blktap/drivers/blktapctrl.c Fri Mar 14 11:14:10 2008
@@ -501,6 +501,80 @@
return 0;
}
+/* Connect to qemu-dm */
+static int connect_qemu(blkif_t *blkif)
+{
+ char *rdctldev, *wrctldev;
+
+ if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d",
+ blkif->domid) < 0)
+ return -1;
+
+ if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d",
+ blkif->domid) < 0) {
+ free(rdctldev);
+ return -1;
+ }
+
+ DPRINTF("Using qemu blktap pipe: %s\n", rdctldev);
+
+ blkif->fds[READ] = open_ctrl_socket(wrctldev);
+ blkif->fds[WRITE] = open_ctrl_socket(rdctldev);
+
+ free(rdctldev);
+ free(wrctldev);
+
+ if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1)
+ return -1;
+
+ DPRINTF("Attached to qemu blktap pipes\n");
+ return 0;
+}
+
+/* Launch tapdisk instance */
+static int connect_tapdisk(blkif_t *blkif, int minor)
+{
+ char *rdctldev = NULL, *wrctldev = NULL;
+ int ret = -1;
+
+ DPRINTF("tapdisk process does not exist:\n");
+
+ if (asprintf(&rdctldev,
+ "%s/tapctrlread%d", BLKTAP_CTRL_DIR, minor) == -1)
+ goto fail;
+
+ if (asprintf(&wrctldev,
+ "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, minor) == -1)
+ goto fail;
+
+ blkif->fds[READ] = open_ctrl_socket(rdctldev);
+ blkif->fds[WRITE] = open_ctrl_socket(wrctldev);
+
+ if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1)
+ goto fail;
+
+ /*launch the new process*/
+ DPRINTF("Launching process, CMDLINE [tapdisk %s %s]\n",
+ wrctldev, rdctldev);
+
+ if (launch_tapdisk(wrctldev, rdctldev) == -1) {
+ DPRINTF("Unable to fork, cmdline: [tapdisk %s %s]\n",
+ wrctldev, rdctldev);
+ goto fail;
+ }
+
+ ret = 0;
+
+fail:
+ if (rdctldev)
+ free(rdctldev);
+
+ if (wrctldev)
+ free(wrctldev);
+
+ return ret;
+}
+
int blktapctrl_new_blkif(blkif_t *blkif)
{
blkif_info_t *blk;
@@ -524,30 +598,14 @@
blkif->cookie = next_cookie++;
if (!exist) {
- DPRINTF("Process does not exist:\n");
- if (asprintf(&rdctldev,
- "%s/tapctrlread%d", BLKTAP_CTRL_DIR,
minor) == -1)
- goto fail;
- if (asprintf(&wrctldev,
- "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR,
minor) == -1) {
- free(rdctldev);
- goto fail;
+ if (type == DISK_TYPE_IOEMU) {
+ if (connect_qemu(blkif))
+ goto fail;
+ } else {
+ if (connect_tapdisk(blkif, minor))
+ goto fail;
}
- blkif->fds[READ] = open_ctrl_socket(rdctldev);
- blkif->fds[WRITE] = open_ctrl_socket(wrctldev);
-
- if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1)
- goto fail;
-
- /*launch the new process*/
- DPRINTF("Launching process, CMDLINE [tapdisk %s
%s]\n",wrctldev, rdctldev);
- if (launch_tapdisk(wrctldev, rdctldev) == -1) {
- DPRINTF("Unable to fork, cmdline: [tapdisk %s
%s]\n",wrctldev, rdctldev);
- goto fail;
- }
-
- free(rdctldev);
- free(wrctldev);
+
} else {
DPRINTF("Process exists!\n");
blkif->fds[READ] = exist->fds[READ];
diff -r 7530c4dba8a5 tools/blktap/drivers/tapdisk.h
--- a/tools/blktap/drivers/tapdisk.h Mon Mar 3 15:19:39 2008
+++ b/tools/blktap/drivers/tapdisk.h Fri Mar 14 11:14:10 2008
@@ -167,6 +167,7 @@
#define DISK_TYPE_RAM 3
#define DISK_TYPE_QCOW 4
#define DISK_TYPE_QCOW2 5
+#define DISK_TYPE_IOEMU 6
/*Define Individual Disk Parameters here */
@@ -227,6 +228,16 @@
0,
#ifdef TAPDISK
&tapdisk_qcow2,
+#endif
+};
+
+static disk_info_t ioemu_disk = {
+ DISK_TYPE_IOEMU,
+ "ioemu disk",
+ "ioemu",
+ 0,
+#ifdef TAPDISK
+ NULL
#endif
};
@@ -238,6 +249,7 @@
&ram_disk,
&qcow_disk,
&qcow2_disk,
+ &ioemu_disk,
};
typedef struct driver_list_entry {
diff -r 7530c4dba8a5 tools/blktap/lib/blktaplib.h
--- a/tools/blktap/lib/blktaplib.h Mon Mar 3 15:19:39 2008
+++ b/tools/blktap/lib/blktaplib.h Fri Mar 14 11:14:10 2008
@@ -221,15 +221,5 @@
((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * getpagesize()) + \
((_seg) * getpagesize()))
-/* Defines that are only used by library clients */
-
-#ifndef __COMPILING_BLKTAP_LIB
-
-static char *blkif_op_name[] = {
- [BLKIF_OP_READ] = "READ",
- [BLKIF_OP_WRITE] = "WRITE",
-};
-
-#endif /* __COMPILING_BLKTAP_LIB */
#endif /* __BLKTAPLIB_H__ */
diff -r 7530c4dba8a5 tools/ioemu/Makefile.target
--- a/tools/ioemu/Makefile.target Mon Mar 3 15:19:39 2008
+++ b/tools/ioemu/Makefile.target Fri Mar 14 11:14:10 2008
@@ -17,6 +17,7 @@
VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_PATH)/hw:$(SRC_PATH)/audio
CPPFLAGS+=-I. -I.. -I$(TARGET_PATH) -I$(SRC_PATH)
CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc
+CPPFLAGS+= -I$(XEN_ROOT)/tools/blktap/lib
CPPFLAGS+= -I$(XEN_ROOT)/tools/xenstore
CPPFLAGS+= -I$(XEN_ROOT)/tools/include
ifdef CONFIG_DARWIN_USER
@@ -429,6 +430,7 @@
VL_OBJS+= usb-uhci.o smbus_eeprom.o
VL_OBJS+= piix4acpi.o
VL_OBJS+= xenstore.o
+VL_OBJS+= xen_blktap.o
VL_OBJS+= xen_platform.o
VL_OBJS+= xen_machine_fv.o
VL_OBJS+= xen_machine_pv.o
diff -r 7530c4dba8a5 tools/ioemu/hw/xen_machine_pv.c
--- a/tools/ioemu/hw/xen_machine_pv.c Mon Mar 3 15:19:39 2008
+++ b/tools/ioemu/hw/xen_machine_pv.c Fri Mar 14 11:14:10 2008
@@ -26,6 +26,9 @@
#include "xen_console.h"
#include "xenfb.h"
+extern void init_blktap(void);
+
+
/* The Xen PV machine currently provides
* - a virtual framebuffer
* - ....
@@ -40,6 +43,10 @@
{
struct xenfb *xenfb;
extern int domid;
+
+
+ /* Initialize tapdisk client */
+ init_blktap();
/* Connect to text console */
if (serial_hds[0]) {
diff -r 7530c4dba8a5 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Mon Mar 3 15:19:39 2008
+++ b/tools/ioemu/vl.c Fri Mar 14 11:14:10 2008
@@ -6266,6 +6266,12 @@
powerdown_requested = 1;
if (cpu_single_env)
cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
+}
+
+static void qemu_sighup_handler(int signal)
+{
+ fprintf(stderr, "Received SIGHUP, terminating.\n");
+ exit(0);
}
void main_loop_wait(int timeout)
@@ -7976,7 +7982,7 @@
#ifndef CONFIG_STUBDOM
/* Unblock SIGTERM and SIGHUP, which may have been blocked by the caller */
- signal(SIGHUP, SIG_DFL);
+ signal(SIGHUP, qemu_sighup_handler);
sigemptyset(&set);
sigaddset(&set, SIGTERM);
sigaddset(&set, SIGHUP);
diff -r 7530c4dba8a5 tools/python/xen/xend/server/BlktapController.py
--- a/tools/python/xen/xend/server/BlktapController.py Mon Mar 3 15:19:39 2008
+++ b/tools/python/xen/xend/server/BlktapController.py Fri Mar 14 11:14:10 2008
@@ -13,7 +13,9 @@
'vmdk',
'ram',
'qcow',
- 'qcow2'
+ 'qcow2',
+
+ 'ioemu'
]
class BlktapController(BlkifController):
diff -r 7530c4dba8a5 tools/ioemu/hw/xen_blktap.c
--- /dev/null Mon Mar 3 15:19:39 2008
+++ b/tools/ioemu/hw/xen_blktap.c Fri Mar 14 11:14:10 2008
@@ -0,0 +1,686 @@
+/* xen_blktap.c
+ *
+ * Interface to blktapctrl to allow use of qemu block drivers with blktap.
+ * This file is based on tools/blktap/drivers/tapdisk.c
+ *
+ * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield.
+ * Copyright (c) 2008 Kevin Wolf
+ */
+
+/*
+ * There are several communication channels which are used by this interface:
+ *
+ * - A pair of pipes for receiving and sending general control messages
+ * (qemu-read-N and qemu-writeN in /var/run/tap, where N is the domain ID).
+ * These control messages are handled by handle_blktap_ctrlmsg().
+ *
+ * - One file descriptor per attached disk (/dev/xen/blktapN) for disk
+ * specific control messages. A callback is triggered on this fd if there
+ * is a new IO request. The callback function is handle_blktap_iomsg().
+ *
+ * - A shared ring for each attached disk containing the actual IO requests
+ * and responses. Whenever handle_blktap_iomsg() is triggered it processes
+ * the requests on this ring.
+ */
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include "vl.h"
+#include "blktaplib.h"
+#include "xen_blktap.h"
+#include "block_int.h"
+
+#define MSG_SIZE 4096
+
+#define BLKTAP_CTRL_DIR "/var/run/tap"
+
+/* If enabled, print debug messages to stderr */
+#if 1
+#define DPRINTF(_f, _a...) fprintf(stderr, __FILE__ ":%d: " _f, __LINE__, ##_a)
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+#if 1
+#define ASSERT(_p) \
+ if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s\n", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+
+extern int domid;
+
+int read_fd;
+int write_fd;
+
+static pid_t process;
+fd_list_entry_t *fd_start = NULL;
+
+static void handle_blktap_iomsg(void* private);
+
+struct aiocb_info {
+ struct td_state *s;
+ uint64_t sector;
+ int nr_secs;
+ int idx;
+ long i;
+};
+
+static void unmap_disk(struct td_state *s)
+{
+ tapdev_info_t *info = s->ring_info;
+ fd_list_entry_t *entry;
+
+ bdrv_close(s->bs);
+
+ if (info != NULL && info->mem > 0)
+ munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);
+
+ entry = s->fd_entry;
+ *entry->pprev = entry->next;
+ if (entry->next)
+ entry->next->pprev = entry->pprev;
+
+ qemu_set_fd_handler2(info->fd, NULL, NULL, NULL, NULL);
+ close(info->fd);
+
+ free(s->fd_entry);
+ free(s->blkif);
+ free(s->ring_info);
+ free(s);
+
+ return;
+}
+
+static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s)
+{
+ fd_list_entry_t **pprev, *entry;
+
+ DPRINTF("Adding fd_list_entry\n");
+
+ /*Add to linked list*/
+ s->fd_entry = entry = malloc(sizeof(fd_list_entry_t));
+ entry->tap_fd = tap_fd;
+ entry->s = s;
+ entry->next = NULL;
+
+ pprev = &fd_start;
+ while (*pprev != NULL)
+ pprev = &(*pprev)->next;
+
+ *pprev = entry;
+ entry->pprev = pprev;
+
+ return entry;
+}
+
+static inline struct td_state *get_state(int cookie)
+{
+ fd_list_entry_t *ptr;
+
+ ptr = fd_start;
+ while (ptr != NULL) {
+ if (ptr->cookie == cookie) return ptr->s;
+ ptr = ptr->next;
+ }
+ return NULL;
+}
+
+static struct td_state *state_init(void)
+{
+ int i;
+ struct td_state *s;
+ blkif_t *blkif;
+
+ s = malloc(sizeof(struct td_state));
+ blkif = s->blkif = malloc(sizeof(blkif_t));
+ s->ring_info = calloc(1, sizeof(tapdev_info_t));
+
+ for (i = 0; i < MAX_REQUESTS; i++) {
+ blkif->pending_list[i].secs_pending = 0;
+ blkif->pending_list[i].submitting = 0;
+ }
+
+ return s;
+}
+
+static int map_new_dev(struct td_state *s, int minor)
+{
+ int tap_fd;
+ tapdev_info_t *info = s->ring_info;
+ char *devname;
+ fd_list_entry_t *ptr;
+ int page_size;
+
+ if (asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME,
minor) == -1)
+ return -1;
+ tap_fd = open(devname, O_RDWR);
+ if (tap_fd == -1)
+ {
+ DPRINTF("open failed on dev %s!\n",devname);
+ goto fail;
+ }
+ info->fd = tap_fd;
+
+ /*Map the shared memory*/
+ page_size = getpagesize();
+ info->mem = mmap(0, page_size * BLKTAP_MMAP_REGION_SIZE,
+ PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0);
+ if ((long int)info->mem == -1)
+ {
+ DPRINTF("mmap failed on dev %s!\n",devname);
+ goto fail;
+ }
+
+ /* assign the rings to the mapped memory */
+ info->sring = (blkif_sring_t *)((unsigned long)info->mem);
+ BACK_RING_INIT(&info->fe_ring, info->sring, page_size);
+
+ info->vstart =
+ (unsigned long)info->mem + (BLKTAP_RING_PAGES * page_size);
+
+ ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process );
+ ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
+ free(devname);
+
+ /*Update the fd entry*/
+ ptr = fd_start;
+ while (ptr != NULL) {
+ if (s == ptr->s) {
+ ptr->tap_fd = tap_fd;
+
+ /* Setup fd_handler for qemu main loop */
+ DPRINTF("set tap_fd = %d\n", tap_fd);
+ qemu_set_fd_handler2(tap_fd, NULL,
&handle_blktap_iomsg, NULL, s);
+
+ break;
+ }
+ ptr = ptr->next;
+ }
+
+
+ DPRINTF("map_new_dev = %d\n", minor);
+ return minor;
+
+ fail:
+ free(devname);
+ return -1;
+}
+
+static int open_disk(struct td_state *s, char *path, int readonly)
+{
+ struct disk_id id;
+ BlockDriverState* bs;
+
+ DPRINTF("Opening %s\n", path);
+ bs = calloc(1, sizeof(*bs));
+
+ memset(&id, 0, sizeof(struct disk_id));
+
+ if (bdrv_open(bs, path, 0) != 0) {
+ fprintf(stderr, "Could not open image file %s\n", path);
+ return -ENOMEM;
+ }
+
+ s->bs = bs;
+ s->flags = readonly ? TD_RDONLY : 0;
+ s->size = bs->total_sectors;
+ s->sector_size = 512;
+
+ s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0);
+
+ return 0;
+}
+
+static inline void write_rsp_to_ring(struct td_state *s, blkif_response_t *rsp)
+{
+ tapdev_info_t *info = s->ring_info;
+ blkif_response_t *rsp_d;
+
+ rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt);
+ memcpy(rsp_d, rsp, sizeof(blkif_response_t));
+ info->fe_ring.rsp_prod_pvt++;
+}
+
+static inline void kick_responses(struct td_state *s)
+{
+ tapdev_info_t *info = s->ring_info;
+
+ if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod)
+ {
+ RING_PUSH_RESPONSES(&info->fe_ring);
+ ioctl(info->fd, BLKTAP_IOCTL_KICK_FE);
+ }
+}
+
+static int send_responses(struct td_state *s, int res,
+ uint64_t sector, int nr_secs, int idx, void *private)
+{
+ pending_req_t *preq;
+ blkif_request_t *req;
+ int responses_queued = 0;
+ blkif_t *blkif = s->blkif;
+ int secs_done = nr_secs;
+
+ if ( (idx > MAX_REQUESTS-1) )
+ {
+ DPRINTF("invalid index returned(%u)!\n", idx);
+ return 0;
+ }
+ preq = &blkif->pending_list[idx];
+ req = &preq->req;
+
+ preq->secs_pending -= secs_done;
+
+ if (res == -EBUSY && preq->submitting)
+ return -EBUSY; /* propagate -EBUSY back to higher layers */
+ if (res)
+ preq->status = BLKIF_RSP_ERROR;
+
+ if (!preq->submitting && preq->secs_pending == 0)
+ {
+ blkif_request_t tmp;
+ blkif_response_t *rsp;
+
+ tmp = preq->req;
+ rsp = (blkif_response_t *)req;
+
+ rsp->id = tmp.id;
+ rsp->operation = tmp.operation;
+ rsp->status = preq->status;
+
+ write_rsp_to_ring(s, rsp);
+ responses_queued++;
+
+ kick_responses(s);
+ }
+
+ return responses_queued;
+}
+
+static void qemu_send_responses(void* opaque, int ret)
+{
+ struct aiocb_info* info = opaque;
+
+ if (ret != 0) {
+ DPRINTF("ERROR: ret = %d (%s)\n", ret, strerror(-ret));
+ }
+
+ send_responses(info->s, ret, info->sector, info->nr_secs,
+ info->idx, (void*) info->i);
+ free(info);
+}
+
+/**
+ * Callback function for the IO message pipe. Reads requests from the ring
+ * and processes them (call qemu read/write functions).
+ *
+ * The private parameter points to the struct td_state representing the
+ * disk the request is targeted at.
+ */
+static void handle_blktap_iomsg(void* private)
+{
+ struct td_state* s = private;
+
+ RING_IDX rp, j, i;
+ blkif_request_t *req;
+ int idx, nsects, ret;
+ uint64_t sector_nr;
+ uint8_t *page;
+ blkif_t *blkif = s->blkif;
+ tapdev_info_t *info = s->ring_info;
+ int page_size = getpagesize();
+
+ struct aiocb_info *aiocb_info;
+
+ if (info->fe_ring.sring == NULL) {
+ DPRINTF(" sring == NULL, ignoring IO request\n");
+ return;
+ }
+
+ rp = info->fe_ring.sring->req_prod;
+ xen_rmb();
+
+ for (j = info->fe_ring.req_cons; j != rp; j++)
+ {
+ int start_seg = 0;
+
+ req = NULL;
+ req = RING_GET_REQUEST(&info->fe_ring, j);
+ ++info->fe_ring.req_cons;
+
+ if (req == NULL)
+ continue;
+
+ idx = req->id;
+
+ ASSERT(blkif->pending_list[idx].secs_pending == 0);
+ memcpy(&blkif->pending_list[idx].req, req, sizeof(*req));
+ blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
+ blkif->pending_list[idx].submitting = 1;
+ sector_nr = req->sector_number;
+
+ /* Don't allow writes on readonly devices */
+ if ((s->flags & TD_RDONLY) &&
+ (req->operation == BLKIF_OP_WRITE)) {
+ blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
+ goto send_response;
+ }
+
+ for (i = start_seg; i < req->nr_segments; i++) {
+ nsects = req->seg[i].last_sect -
+ req->seg[i].first_sect + 1;
+
+ if ((req->seg[i].last_sect >= page_size >> 9) ||
+ (nsects <= 0))
+ continue;
+
+ page = (uint8_t*) MMAP_VADDR(info->vstart,
+ (unsigned long)req->id, i);
+ page += (req->seg[i].first_sect << SECTOR_SHIFT);
+
+ if (sector_nr >= s->size) {
+ DPRINTF("Sector request failed:\n");
+ DPRINTF("%s request, idx [%d,%d] size [%llu], "
+ "sector [%llu,%llu]\n",
+ (req->operation == BLKIF_OP_WRITE ?
+ "WRITE" : "READ"),
+ idx,i,
+ (long long unsigned)
+ nsects<<SECTOR_SHIFT,
+ (long long unsigned)
+ sector_nr<<SECTOR_SHIFT,
+ (long long unsigned) sector_nr);
+ continue;
+ }
+
+ blkif->pending_list[idx].secs_pending += nsects;
+
+ switch (req->operation)
+ {
+ case BLKIF_OP_WRITE:
+ aiocb_info = malloc(sizeof(*aiocb_info));
+
+ aiocb_info->s = s;
+ aiocb_info->sector = sector_nr;
+ aiocb_info->nr_secs = nsects;
+ aiocb_info->idx = idx;
+ aiocb_info->i = i;
+
+ ret = (NULL == bdrv_aio_write(s->bs, sector_nr,
+ page, nsects,
+ qemu_send_responses,
+ aiocb_info));
+
+ if (ret) {
+ blkif->pending_list[idx].status =
BLKIF_RSP_ERROR;
+ DPRINTF("ERROR: bdrv_write() ==
NULL\n");
+ goto send_response;
+ }
+ break;
+
+ case BLKIF_OP_READ:
+ aiocb_info = malloc(sizeof(*aiocb_info));
+
+ aiocb_info->s = s;
+ aiocb_info->sector = sector_nr;
+ aiocb_info->nr_secs = nsects;
+ aiocb_info->idx = idx;
+ aiocb_info->i = i;
+
+ ret = (NULL == bdrv_aio_read(s->bs, sector_nr,
+ page, nsects,
+ qemu_send_responses,
+ aiocb_info));
+
+ if (ret) {
+ blkif->pending_list[idx].status =
BLKIF_RSP_ERROR;
+ DPRINTF("ERROR: bdrv_read() == NULL\n");
+ goto send_response;
+ }
+ break;
+
+ default:
+ DPRINTF("Unknown block operation\n");
+ break;
+ }
+ sector_nr += nsects;
+ }
+ send_response:
+ blkif->pending_list[idx].submitting = 0;
+
+ /* force write_rsp_to_ring for synchronous case */
+ if (blkif->pending_list[idx].secs_pending == 0)
+ send_responses(s, 0, 0, 0, idx, (void *)(long)0);
+ }
+}
+
+/**
+ * Callback function for the qemu-read pipe. Reads and processes control
+ * message from the pipe.
+ *
+ * The parameter private is unused.
+ */
+static void handle_blktap_ctrlmsg(void* private)
+{
+ int length, len, msglen;
+ char *ptr, *path;
+ image_t *img;
+ msg_hdr_t *msg;
+ msg_newdev_t *msg_dev;
+ msg_pid_t *msg_pid;
+ int ret = -1;
+ struct td_state *s = NULL;
+ fd_list_entry_t *entry;
+
+ char buf[MSG_SIZE];
+
+ length = read(read_fd, buf, MSG_SIZE);
+
+ if (length > 0 && length >= sizeof(msg_hdr_t))
+ {
+ msg = (msg_hdr_t *)buf;
+ DPRINTF("blktap: Received msg, len %d, type %d, UID %d\n",
+ length,msg->type,msg->cookie);
+
+ switch (msg->type) {
+ case CTLMSG_PARAMS:
+ ptr = buf + sizeof(msg_hdr_t);
+ len = (length - sizeof(msg_hdr_t));
+ path = calloc(1, len + 1);
+
+ memcpy(path, ptr, len);
+ DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path);
+
+ /* Allocate the disk structs */
+ s = state_init();
+
+ /*Open file*/
+ if (s == NULL || open_disk(s, path, msg->readonly)) {
+ msglen = sizeof(msg_hdr_t);
+ msg->type = CTLMSG_IMG_FAIL;
+ msg->len = msglen;
+ } else {
+ entry = add_fd_entry(0, s);
+ entry->cookie = msg->cookie;
+ DPRINTF("Entered cookie %d\n", entry->cookie);
+
+ memset(buf, 0x00, MSG_SIZE);
+
+ msglen = sizeof(msg_hdr_t) + sizeof(image_t);
+ msg->type = CTLMSG_IMG;
+ img = (image_t *)(buf + sizeof(msg_hdr_t));
+ img->size = s->size;
+ img->secsize = s->sector_size;
+ img->info = s->info;
+ DPRINTF("Writing (size, secsize, info) = "
+ "(%#" PRIx64 ", %#" PRIx64 ", %d)\n",
+ s->size, s->sector_size, s->info);
+ }
+ len = write(write_fd, buf, msglen);
+ free(path);
+ break;
+
+ case CTLMSG_NEWDEV:
+ msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));
+
+ s = get_state(msg->cookie);
+ DPRINTF("Retrieving state, cookie %d.....[%s]\n",
+ msg->cookie, (s == NULL ? "FAIL":"OK"));
+ if (s != NULL) {
+ ret = ((map_new_dev(s, msg_dev->devnum)
+ == msg_dev->devnum ? 0: -1));
+ }
+
+ memset(buf, 0x00, MSG_SIZE);
+ msglen = sizeof(msg_hdr_t);
+ msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP
+ : CTLMSG_NEWDEV_FAIL);
+ msg->len = msglen;
+
+ len = write(write_fd, buf, msglen);
+ break;
+
+ case CTLMSG_CLOSE:
+ s = get_state(msg->cookie);
+ if (s) unmap_disk(s);
+ break;
+
+ case CTLMSG_PID:
+ memset(buf, 0x00, MSG_SIZE);
+ msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t);
+ msg->type = CTLMSG_PID_RSP;
+ msg->len = msglen;
+
+ msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t));
+ process = getpid();
+ msg_pid->pid = process;
+
+ len = write(write_fd, buf, msglen);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+/**
+ * Opens a control socket, i.e. a pipe to communicate with blktapctrl.
+ *
+ * Returns the file descriptor number for the pipe; -1 in error case
+ */
+static int open_ctrl_socket(char *devname)
+{
+ int ret;
+ int ipc_fd;
+
+ if (mkdir(BLKTAP_CTRL_DIR, 0755) == 0)
+ DPRINTF("Created %s directory\n", BLKTAP_CTRL_DIR);
+
+ ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO);
+ if ( (ret != 0) && (errno != EEXIST) ) {
+ DPRINTF("ERROR: pipe failed (%d)\n", errno);
+ return -1;
+ }
+
+ ipc_fd = open(devname,O_RDWR|O_NONBLOCK);
+
+ if (ipc_fd < 0) {
+ DPRINTF("FD open failed\n");
+ return -1;
+ }
+
+ return ipc_fd;
+}
+
+/**
+ * Unmaps all disks and closes their pipes
+ */
+void shutdown_blktap(void)
+{
+ fd_list_entry_t *ptr;
+ struct td_state *s;
+ char *devname;
+
+ DPRINTF("Shutdown blktap\n");
+
+ /* Unmap all disks */
+ ptr = fd_start;
+ while (ptr != NULL) {
+ s = ptr->s;
+ unmap_disk(s);
+ close(ptr->tap_fd);
+ ptr = ptr->next;
+ }
+
+ /* Delete control pipes */
+ if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) {
+ DPRINTF("Delete %s\n", devname);
+ if (unlink(devname))
+ DPRINTF("Could not delete: %s\n", strerror(errno));
+ free(devname);
+ }
+
+ if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) {
+ DPRINTF("Delete %s\n", devname);
+ if (unlink(devname))
+ DPRINTF("Could not delete: %s\n", strerror(errno));
+ free(devname);
+ }
+}
+
+/**
+ * Initialize the blktap interface, i.e. open a pair of pipes in /var/run/tap
+ * and register a fd handler.
+ *
+ * Returns 0 on success.
+ */
+int init_blktap(void)
+{
+ char* devname;
+
+ DPRINTF("Init blktap pipes\n");
+
+ /* Open the read pipe */
+ if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) {
+ read_fd = open_ctrl_socket(devname);
+ free(devname);
+
+ if (read_fd == -1) {
+ fprintf(stderr, "Could not open %s/qemu-read-%d\n",
+ BLKTAP_CTRL_DIR, domid);
+ return -1;
+ }
+ }
+
+ /* Open the write pipe */
+ if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) {
+ write_fd = open_ctrl_socket(devname);
+ free(devname);
+
+ if (write_fd == -1) {
+ fprintf(stderr, "Could not open %s/qemu-write-%d\n",
+ BLKTAP_CTRL_DIR, domid);
+ close(read_fd);
+ return -1;
+ }
+ }
+
+ /* Attach a handler to the read pipe (called from qemu main loop) */
+ qemu_set_fd_handler2(read_fd, NULL, &handle_blktap_ctrlmsg, NULL, NULL);
+
+ /* Register handler to clean up when the domain is destroyed */
+ atexit(&shutdown_blktap);
+
+ return 0;
+}
diff -r 7530c4dba8a5 tools/ioemu/hw/xen_blktap.h
--- /dev/null Mon Mar 3 15:19:39 2008
+++ b/tools/ioemu/hw/xen_blktap.h Fri Mar 14 11:14:10 2008
@@ -0,0 +1,57 @@
+/* xen_blktap.h
+ *
+ * Generic disk interface for blktap-based image adapters.
+ *
+ * (c) 2006 Andrew Warfield and Julian Chesterfield
+ */
+
+#ifndef XEN_BLKTAP_H_
+#define XEN_BLKTAP_H_
+
+#include <stdint.h>
+#include <syslog.h>
+#include <stdio.h>
+
+#include "block_int.h"
+
+/* Things disks need to know about, these should probably be in a higher-level
+ * header. */
+#define MAX_SEGMENTS_PER_REQ 11
+#define SECTOR_SHIFT 9
+#define DEFAULT_SECTOR_SIZE 512
+
+#define MAX_IOFD 2
+
+#define BLK_NOT_ALLOCATED 99
+#define TD_NO_PARENT 1
+
+typedef uint32_t td_flag_t;
+
+#define TD_RDONLY 1
+
+struct disk_id {
+ char *name;
+ int drivertype;
+};
+
+/* This structure represents the state of an active virtual disk. */
+struct td_state {
+ BlockDriverState* bs;
+ td_flag_t flags;
+ void *blkif;
+ void *image;
+ void *ring_info;
+ void *fd_entry;
+ uint64_t sector_size;
+ uint64_t size;
+ unsigned int info;
+};
+
+typedef struct fd_list_entry {
+ int cookie;
+ int tap_fd;
+ struct td_state *s;
+ struct fd_list_entry **pprev, *next;
+} fd_list_entry_t;
+
+#endif /*XEN_BLKTAP_H_*/
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|