[PATCH 4/4] (Refactored) Add libvdisk, and vdisk_tool
vdisk-support.patch
provides libvdisk, and vdisk_tool, as described in [PATCH 0/4]
Signed-off-by: Boris Ostrovsky <bostrovsky@xxxxxxxxxxxxxxx>
Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxxx>
diff -r 75c61490cc06 tools/Makefile
--- a/tools/Makefile Thu Jun 21 13:05:29 2007 -0400
+++ b/tools/Makefile Thu Jun 21 13:05:31 2007 -0400
@@ -17,6 +17,7 @@ SUBDIRS-$(VTPM_TOOLS) += vtpm
SUBDIRS-$(VTPM_TOOLS) += vtpm
SUBDIRS-y += xenstat
SUBDIRS-y += libaio
+SUBDIRS-y += vdisk
SUBDIRS-y += blktap
SUBDIRS-y += libfsimage
SUBDIRS-$(XENFB_TOOLS) += xenfb
diff -r 75c61490cc06 tools/vdisk/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/Makefile Thu Jun 21 13:05:45 2007 -0400
@@ -0,0 +1,65 @@
+#
+# Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+#
+# Portions have been modified by Virtual Iron Software, Inc.
+# (c) 2007. This file and the modifications can be redistributed and/or
+# modified under the terms and conditions of the GNU General Public
+# License, version 2.1 and not any later version of the GPL, as published
+# by the Free Software Foundation.
+#
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+LIBVHD_SRC = vhd.c vhd_utils.c
+LIBVDISK_SRC = vdisk_utils.c vdisk_common.c
+TOOL_SRC = vdisk_tool.c
+
+LIBAIO_DIR = ../libaio/src
+BLKTAP_DIR = ../blktap/drivers
+
+CFLAGS = -O2 -fno-strict-aliasing -fPIC -Wall -Werror -rdynamic \
+ -D_FILE_OFFSET_BITS=64 \
+ -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -I./ \
+ -I$(LIBAIO_DIR) \
+ -I$(BLKTAP_DIR)
+
+LIB_LDFLAGS = -dy -shared -L$(LIBAIO_DIR) -laio
+
+INSTALL = /usr/bin/install
+
+all: default
+default: vdisk_tool libvdisk_vhd.so libvdisk.so
+
+
+%.o: %.c
+ $(CC) $(CFLAGS) -rdynamic -c $< -o $@
+
+vdisk_tool: $(TOOL_SRC:%.c=%.o) libvdisk_vhd.so libvdisk.so
+ gcc $(LOCAL_CFLAGS) -o vdisk_tool -g $(TOOL_SRC) -L./ \
+ -I$(LIBAIO_DIR) \
+ -I$(BLKTAP_DIR) \
+ -L$(LIBAIO_DIR) -L. -lvdisk -ldl -laio
+
+libvdisk_vhd.so: $(LIBVHD_SRC:%.c=%.o) libvdisk.so
+ $(LD) $(LIB_LDFLAGS) -o $@ $^
+
+libvdisk.so: $(LIBVDISK_SRC:%.c=%.o)
+ $(LD) $(LIB_LDFLAGS) -o $@ $^
+
+install: all
+ $(INSTALL) -d $(DESTDIR)/usr/bin
+ $(INSTALL) -d $(DESTDIR)/usr/lib64
+ $(INSTALL) vdisk_tool $(DESTDIR)/usr/bin
+ $(INSTALL) libvdisk_vhd.so libvdisk.so $(DESTDIR)/usr/lib64
+ $(INSTALL) -d $(DESTDIR)/usr/include
+ for header in *.h; do $(INSTALL) $$header $(DESTDIR)/usr/include; done
+
+clean:
+ /bin/rm -f *.o libvdisk_vhd.so vdisk_tool libvdisk.so
+
+depend .depend dep:
+ $(CC) $(CFLAGS) -M $(LIBVDISK_SRC) $(LIBVHD_SRC) $(TOOL_SRC)> .depend
+
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff -r 75c61490cc06 tools/vdisk/list.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/list.h Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,168 @@
+// Copy of /usr/include/linux/list.h that does not
+// depend on __KERNEL__ and _LVM_H_INCLUDE
+
+#ifndef _LIST_H
+#define _LIST_H
+
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add(struct list_head * new,
+ struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static __inline__ void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static __inline__ void list_add_tail(struct list_head *new, struct list_head
*head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_del(struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in
an undefined state.
+ */
+static __inline__ void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = entry->prev = 0;
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static __inline__ void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static __inline__ int list_empty(struct list_head *head)
+{
+ return head->next == head;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static __inline__ void list_splice(struct list_head *list, struct list_head
*head)
+{
+ struct list_head *first = list->next;
+
+ if (first != list) {
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); \
+ pos = pos->next)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of
list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+
+
+#endif
diff -r 75c61490cc06 tools/vdisk/vdisk.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk.h Thu Jun 21 13:05:48 2007 -0400
@@ -0,0 +1,215 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VDISK_H
+#define __VDISK_H
+
+#include <sys/types.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <linux/limits.h>
+#include <syslog.h>
+#include <libaio.h>
+#include "list.h"
+#include "tapaio.h"
+
+// vdisk_tool's operations
+#define VDISK_OP_CREATE (1<<0)
+#define VDISK_OP_HEADERS (1<<1)
+#define VDISK_OP_DUMP (1<<2)
+#define VDISK_OP_MODIFY (1<<3)
+
+// Return codes
+#define VID_BLOCK_MAPPED (0)
+#define VID_BLOCK_NOTMAPPED (-1)
+#define VID_BLOCK_TOOBIG (-2)
+#define VID_BLOCK_MAPERR (-3)
+
+// IO operation codes
+#define VDISK_READ (0)
+#define VDISK_WRITE (1)
+
+// Async IO macros
+#define VDISK_HASH_SZ (2048)
+#define VDISK_HASH_IDX(x) ((x) & (VDISK_HASH_SZ-1))
+#define VDISK_INVALID_HASH (-1)
+#define REQUEST_ASYNC_FD (1) // Should really be defined in kernel
+
+#define SECTOR_SIZE (512)
+
+// vdisk device flags
+#define VDISK_SYNCIO_BUF (1<<0)
+#define VDISK_RO (1<<1)
+
+// vdisk file flags
+#define VDF_LEAF (1<<0) // last COW child (writeable)
+
+// Statistics gathering
+#define VDISK_STATS (0)
+#define VDISK_SYNCIO_STATS (0)
+
+#if VDISK_STATS
+#define DO_STATS(x) x
+#else
+#define DO_STATS(x)
+#endif
+
+
+
+
+// Datatype for addressing host memory
+#if defined __x86_64__
+typedef uint64_t addr_t;
+#else
+typedef uint32_t addr_t;
+#endif
+
+typedef int file_t;
+
+// Forward declaration
+struct vdisk_dev;
+
+// Stores info about a pending async IO
+typedef struct pending_aio {
+ uint32_t block;
+ uint32_t num_blocks;
+ void *arg;
+ void *aiocb;
+ off_t off;
+ file_t fd;
+ int op;
+ int res;
+} pending_aio_t;
+
+// Hash that stores async IO data
+typedef struct vdisk_hash {
+ uint64_t key;
+ struct iocb io;
+ pending_aio_t pio;
+} vdisk_hash_t;
+
+// run data to allow coalescing of writes when doing posix_fadvise() sync/flush
+typedef struct vdisk_syncio {
+ int is_set;
+ off_t io_start;
+ off_t io_len;
+#if VDISK_SYNCIO_STATS
+ unsigned long total_writes;
+ unsigned long contig_writes;
+ unsigned long flush_size_sub1MB;
+ unsigned long flush_size_sub2MB;
+ unsigned long flush_size_sub4MB;
+ unsigned long flush_size_sub8MB;
+ unsigned long flush_size_ovr8MB;
+ unsigned long flush_size_force;
+ time_t last_dbg_print;
+#endif
+} vdisk_syncio_t;
+
+// Per-file structure
+typedef struct vd_file {
+ struct list_head vdf_list;
+ char name[PATH_MAX];
+ file_t fd;
+ int flags;
+ int batch_sz; // number of blocks that are mapped sequentially
+ void *vdf; // format-specific data
+ vdisk_syncio_t *syncio; // allows sync io to buffer in pagecache for
+ // better io performance
+} vd_file_t;
+
+// Data describing format's properties (ops etc.)
+typedef struct vdf_data {
+ char ftype[8]; // File name extension
+
+ int (*open)(struct vdisk_dev *vdisk, char *filename);
+ void (*close)(struct vdisk_dev *vdisk);
+ int (*map_block)(vd_file_t *vf, uint32_t *blockno, int num_blocks,
+ int op, void **arg);
+ int (*xfer_commit)(void *arg, int err);
+ int (*print_header)(vd_file_t *vf);
+ int (*parse_args)(int argc, int operations, char *argv[], void **optp);
+ int (*create_vdisk)(char *filename, void *optp);
+ int (*modify_vdisk)(struct vdisk_dev *vdisk, void *optp);
+ struct list_head vdfd_list; // connects to global format list
+} vdf_data_t;
+
+// Top-level datastructure
+typedef struct vdisk_dev {
+
+ struct vdisk_geom {
+ int cyls;
+ int heads;
+ int secs;
+ } geom;
+
+ ssize_t sz; // Device size (bytes)
+
+ int flags;
+
+ // head of vdisk files (vd_file_t) list
+ struct list_head vdf_head;
+
+ vdf_data_t *vdfd;
+
+ // AIO data
+ vdisk_hash_t hash[VDISK_HASH_SZ];
+ struct iocb *aio_submit[VDISK_HASH_SZ];
+ struct io_event aio_events[VDISK_HASH_SZ];
+ tap_aio_context_t aio_ctx;
+ int use_aio;
+ int aio_fd;
+ int aio_cnt;
+
+ // Stats
+ uint64_t busyio;
+ uint64_t syncio;
+ uint64_t asyncio;
+ uint64_t tot_io;
+} vdisk_dev_t;
+
+struct program_props {
+ void *alloc_func;
+ void *free_func;
+ int out_target;
+};
+
+
+#define VDISK_OUT_STDERR (0)
+#define VDISK_OUT_SYSLOG (1)
+extern int vdisk_dbg_level;
+extern int vdisk_out_target;
+#define VIDDBG(n, fmt, args...) vdisk_log_error(n, __FILE__, __LINE__, fmt,
##args)
+
+#define ASSERT(expr) \
+ ((expr) ? 0 : \
+ ({ \
+ VIDDBG(0, "Assertion failed: %s\n", __STRING(expr)); \
+ abort(); \
+ }));
+
+extern int vdisk_pagesz; //4K
+
+extern void vdisk_log_error(int level, char *file, int line, char *fmt, ...);
+extern int vdf_read_state(vdisk_dev_t *vdisk, char *filename);
+extern int vdf_print_headers(vdisk_dev_t *vdisk, char *filename);
+extern int vdisk_register (vdf_data_t *vdfd);
+extern void vdisk_unregister (vdf_data_t *vdfd);
+extern int vdf_init(vdisk_dev_t *vdisk, char *fname);
+extern int vdisk_common_init(vdisk_dev_t *vdisk);
+extern int vdf_find_vdfd(vdisk_dev_t *vdisk, char *ftype);
+extern int vdisk_xfer_cb(vdisk_dev_t *vdisk, struct pending_aio *pio);
+extern int vdisk_rw(void *hdl, int64_t sector_num,
+ uint8_t *buf, int nb_sectors, int write, void *aiocb);
+extern void vdisk_alloc_init(void *alloc_func, void *free_func);
+extern int vdisk_init(vdisk_dev_t *vdisk, char *filename,
+ struct program_props *props, uint8_t flags);
+extern void vdisk_fini(vdisk_dev_t *vdisk);
+
+#endif /* __VDISK_H */
diff -r 75c61490cc06 tools/vdisk/vdisk_common.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_common.c Thu Jun 21 13:05:53 2007 -0400
@@ -0,0 +1,616 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dlfcn.h>
+
+#include "vdisk.h"
+#include "vdisk_utils.h"
+
+
+static int vdisk_initialized = 0;
+int vdisk_pagesz = 0;
+
+void vdisk_fini(vdisk_dev_t *vdisk)
+{
+ struct list_head *ptr;
+ vd_file_t *vdf;
+
+ // We may have already closed the device
+ if ((vdisk == NULL) || (vdisk->vdfd == NULL) ||
+ (vdisk->vdfd->close == NULL))
+ return;
+
+ list_for_each(ptr, &vdisk->vdf_head) {
+ vdf = list_entry(ptr, vd_file_t, vdf_list);
+ free(vdf->syncio);
+ vdf->syncio = NULL;
+ }
+
+ vdisk->vdfd->close(vdisk);
+}
+
+int vdisk_init(vdisk_dev_t *vdisk, char *filename,
+ struct program_props *props, uint8_t flags)
+{
+ int err;
+ char *fname;
+
+ vdisk_common_init(NULL/*XXX: ?? */);
+
+ if (props != NULL) {
+ // Set where output is directed
+ vdisk_out_target = props->out_target;
+ vdisk_alloc_init(props->alloc_func, props->free_func);
+ } else {
+ vdisk_out_target = VDISK_OUT_STDERR;
+ vdisk_alloc_init(NULL, NULL);
+ }
+
+ fname = strchr(filename, ':');
+ if (fname == NULL)
+ fname = filename;
+ else
+ fname++;
+
+ vdisk->flags = flags;
+
+ err = vdf_init(vdisk, fname);
+ if (err != 0) {
+ VIDDBG(0, "Can't initialize format's data for %s\n",
+ filename);
+ return (err);
+ }
+
+ return (0);
+}
+
+int
+vdf_init(vdisk_dev_t *vdisk, char *fname)
+{
+ char *ext;
+ int err;
+ struct list_head *ptr;
+ vd_file_t *vdf;
+
+ ext = strrchr(fname, '.');
+ if (ext == NULL) {
+ VIDDBG(0, "Can't determine file type for %s\n", fname);
+ return (EINVAL);
+ }
+
+ ext++; // Skip '.'
+
+ err = vdf_find_vdfd(vdisk, ext);
+ if (err) {
+ VIDDBG(0, "Can't find format's data\n");
+ return (err);
+ }
+
+ err = vdf_read_state(vdisk, fname);
+ if (err) {
+ VIDDBG(0, "failed to read headers\n");
+ return (-1);
+ }
+
+ if (vdisk->flags & VDISK_SYNCIO_BUF) {
+ list_for_each(ptr, &vdisk->vdf_head) {
+ vdf = list_entry(ptr, vd_file_t, vdf_list);
+ vdf->syncio = calloc( 1, sizeof(vdisk_syncio_t));
+ if (!vdf->syncio) {
+ VIDDBG(0, "vdisk_alloc_syncio_run_data() "
+ "failed '%s', thus no speed up\n",
+ strerror(errno));
+ }
+ }
+ }
+
+ return (0);
+}
+
+int
+vdisk_map_block(struct vdisk_dev *dev,
+ uint32_t *blockno, /* IN/OUT */
+ int op,
+ vd_file_t **vf,
+ void **arg)
+{
+ struct list_head *ptr;
+ vd_file_t *vdf;
+ int res = VID_BLOCK_NOTMAPPED;
+
+ list_for_each(ptr, &dev->vdf_head) {
+
+ *vf = vdf = list_entry(ptr, vd_file_t, vdf_list);
+
+ res = dev->vdfd->map_block(vdf, blockno, 1, op, arg);
+ if (res == VID_BLOCK_MAPPED)
+ return (res);
+ }
+
+ if (op == VDISK_WRITE)
+ VIDDBG(0, "Couldn't map block %d\n", *blockno);
+
+ return (res);
+}
+
+int
+vdf_read_state(vdisk_dev_t *vdisk, char *filename)
+{
+ int err;
+ int i;
+
+ INIT_LIST_HEAD(&vdisk->vdf_head);
+
+ if (vdisk->use_aio) {
+ for (i=0;i<VDISK_HASH_SZ;i++)
+ vdisk->hash[i].key = VDISK_INVALID_HASH;
+
+ memset(&vdisk->aio_ctx.aio_ctx, 0, sizeof(io_context_t));
+ err = io_queue_init(100, &vdisk->aio_ctx.aio_ctx);
+ if (err) {
+ VIDDBG(0, "io_queue_init() failed: %s. "
+ " Async IO will not be available\n",
+ strerror(-1*err));
+ vdisk->use_aio = 0;
+ }
+ }
+
+ err = vdisk->vdfd->open(vdisk, filename);
+ if (err) {
+ VIDDBG(0, "Problems opening vdisk %s (error %d)\n",
+ filename, err);
+ return (err);
+ }
+ return (0);
+}
+
+int
+vdf_print_headers(vdisk_dev_t *vdisk, char *filename)
+{
+ int err;
+ vd_file_t *vf;
+ //struct list_head *ptr;
+
+ err = vdf_read_state(vdisk, filename);
+ if (err) {
+ VIDDBG(0, "Failed to read state for %s\n", filename);
+ return (err);
+ }
+
+#if 0
+ list_for_each(ptr, &vdisk->vdf_head) {
+
+ vf = list_entry(ptr, vd_file_t, vdf_list);
+ (void)vdisk->vdfd->print_header(vf->vdf);
+ }
+#endif
+ vf = list_entry(vdisk->vdf_head.next, vd_file_t, vdf_list);
+ (void)vdisk->vdfd->print_header(vf);
+
+ return (0);
+}
+
+int
+vdisk_xfer_cb(vdisk_dev_t *vdisk, struct pending_aio *pio)
+{
+ uint32_t blk;
+ int err = 0;
+
+ ASSERT(pio != NULL);
+
+ err = vdisk->vdfd->xfer_commit(pio->arg, pio->res);
+ if (err)
+ VIDDBG(0, "Failed to commit transfer (error %d)\n", err);
+
+ if (pio->op == VDISK_WRITE) {
+ err = fsync(pio->fd);
+ if (err)
+ VIDDBG(0, "fsync: %s\n", strerror(errno));
+ }
+
+ /*
+ * posix_fadvise() (or, rather, kernel's sys_fadvise64_64())
+ * invalidates whole pages only.
+ */
+ err = posix_fadvise(pio->fd, (pio->off & (~((off_t)vdisk_pagesz-1))),
+ (ssize_t)(pio->num_blocks<<9) + (off_t)vdisk_pagesz,
+ POSIX_FADV_DONTNEED);
+ if (err)
+ VIDDBG(0, "posix_fadvise: %s\n", strerror(errno));
+
+
+ for (blk=pio->block; blk < (pio->block + pio->num_blocks); blk++)
+ vdisk->hash[VDISK_HASH_IDX(blk)].key = VDISK_INVALID_HASH;
+
+ return (err);
+}
+
+
+int vdisk_rw(void *hdl, int64_t block,
+ uint8_t *buf, int nb_blocks,
+ int op, void *aiocb)
+{
+ off_t offset;
+ unsigned long bytes;
+ uint32_t real_block, blk;
+ vd_file_t *vdf = NULL;
+ void *arg = NULL;
+ struct vdisk_dev *vdisk = (struct vdisk_dev *)hdl;
+ int i;
+ struct list_head *ptr;
+ int res = 0;
+ char *b = (char *)buf;
+ char *pool = NULL;
+ int batch;
+ int use_aio = vdisk->use_aio;
+ int busy = 0;
+ int hash_index;
+ int zero_blocks = 0;
+
+ VIDDBG(50, "block=0x%" PRIx64 ", nb_blocks=%d\n",
+ block, nb_blocks);
+
+ if (((block + (nb_blocks-1)) << 9) >= vdisk->sz) {
+ return (-ENOSPC);
+ }
+
+ vdisk->tot_io++;
+
+ if (use_aio) {
+ // Check whether the hash has available slots and reserve them
+ // We reserve them as we go because we want to make sure that
+ // the request fits in the hash.
+ for (i=0, blk=block; i<nb_blocks; i++, blk++) {
+ hash_index = VDISK_HASH_IDX(blk);
+ VIDDBG(50, "block=0x%" PRIx64 ", nb_blocks=%d i=%d "
+ "blk=0x%x, vdisk->hash.key[%d]=0x%" PRIx64 "\n",
+ block, nb_blocks, i,
+ blk, hash_index,
+ vdisk->hash[hash_index].key);
+ if (vdisk->hash[hash_index].key != VDISK_INVALID_HASH) {
+ vdisk->busyio++;
+ if (vdisk->hash[hash_index].key != blk)
+ busy = 1;
+ use_aio = 0;
+ break;
+ }
+ vdisk->hash[hash_index].key = blk;
+ VIDDBG(50, "hash_index=%d, blk=%d\n",
+ hash_index, blk);
+ }
+
+ // We need to free hash entries that we've just reserved.
+ if (!use_aio) {
+ uint32_t b;
+
+ VIDDBG(50, "Freeing hash for block %" PRId64 "\n",
+ block);
+ if (blk != 0) {
+ for (b=blk-1; b>=block; b--) {
+ hash_index = VDISK_HASH_IDX(b);
+ vdisk->hash[hash_index].key =
+ VDISK_INVALID_HASH;
+ }
+ }
+ VIDDBG(50, "Done\n");
+ if (busy) {
+ VIDDBG(50, "Busy\n");
+ return (-EBUSY);
+ }
+ vdisk->syncio++;
+ }
+ }
+
+ // We can only transfer to/from an aligned buffer
+ if ((addr_t)buf & 511) {
+ b = pool = vdisk_malloc((nb_blocks+1) * 512);
+ if (pool == NULL) {
+ VIDDBG(0, "Can't create buffer\n");
+ return (-ENOMEM);
+ }
+ while ((addr_t)b & 511) b++;
+ VIDDBG(10, "Aligned buffer %p (pool %p, b %p)\n", buf, pool, b);
+
+ use_aio = 0;
+ }
+
+ i = 0; // block in the buf[]
+ while (nb_blocks>0) {
+
+ // Find largest contiguous set of blocks that we
+ // we can access in a single IO.
+
+ batch = nb_blocks;
+ again:
+ arg = NULL;
+ list_for_each(ptr, &vdisk->vdf_head) {
+
+ vdf = list_entry(ptr, vd_file_t, vdf_list);
+
+ real_block = (uint32_t)block;
+
+ // Make batch fit into a single vdf->batch_sz
+ if ( ((block + batch - 1) & ~(vdf->batch_sz-1))
+ != (block & ~(vdf->batch_sz-1)))
+ batch = ( (block + vdf->batch_sz) &
+ ~(vdf->batch_sz-1) )
+ - block;
+
+ // Map the requested block set to address in the file
+ res = vdisk->vdfd->map_block(vdf, &real_block,
+ batch, op, &arg);
+
+ if (res == VID_BLOCK_TOOBIG) {
+ // Some blocks are mapped and some are not.
+ // Need to try a smaller batch
+
+ batch >>= 1;
+ if (!batch) {
+ int j;
+ // Free hash entries
+ for (j=0,blk=block; j<nb_blocks;
j++,blk++) {
+ hash_index =
VDISK_HASH_IDX(blk);
+
ASSERT(vdisk->hash[hash_index].key
+ == blk);
+ vdisk->hash[hash_index].key =
+ VDISK_INVALID_HASH;
+ }
+
+ VIDDBG(0, "Inconsistent mapping
error\n");
+ return EINVAL;
+ }
+ goto again;
+ }
+
+ if ((res != VID_BLOCK_NOTMAPPED) ||
+ ((vdf->flags & VDF_LEAF) && (op == VDISK_WRITE)))
+ break;
+ }
+
+ if (res != VID_BLOCK_MAPPED) {
+
+ // Unallocated blocks return zeroes for reads
+ if ((op == VDISK_READ) && (res == VID_BLOCK_NOTMAPPED))
{
+
+ if (use_aio) {
+ int j;
+ // Free up hash entries
+ for (j=0,blk=block; j<batch; j++,blk++)
{
+ hash_index =
VDISK_HASH_IDX(blk);
+
ASSERT(vdisk->hash[hash_index].key
+ == blk);
+ vdisk->hash[hash_index].key =
+ VDISK_INVALID_HASH;
+ }
+ }
+
+ memset(&buf[i*512], 0, batch*512);
+ i += batch;
+ b += batch * 512;
+ block += batch;
+ nb_blocks -= batch;
+ zero_blocks += batch;
+ VIDDBG(10, "Skipping %d blocks\n", batch);
+ continue;
+ }
+
+ VIDDBG(0, "Couldn't map block %d (%d)\n",
+ block, res);
+ if (pool)
+ vdisk_free(pool);
+ return (-1*res);
+ }
+
+ VIDDBG(50, "mapped sector %" PRId64 " to block %d for read\n",
+ block, real_block);
+
+ // Offset in the file
+ offset = (uint64_t)real_block << 9;
+
+ if (use_aio)
+ vdisk->asyncio++;
+
+ // Perform IO
+ if (op == VDISK_WRITE) {
+ if (pool)
+ memcpy(b, &buf[i*512], batch * 512);
+ if (!use_aio)
+ bytes = vdisk_syncio(vdf->fd, b, batch * 512,
+ offset, VDISK_WRITE,
vdf->syncio);
+ else
+ bytes = vdisk_asyncio(vdisk, block, vdf->fd,
+ b, batch * 512, offset,
+ arg, aiocb, VDISK_WRITE);
+ } else /* VDISK_READ */ {
+ if (!use_aio) {
+ bytes = vdisk_syncio(vdf->fd, b, batch * 512,
+ offset, VDISK_READ, NULL);
+ if (pool)
+ memcpy(&buf[i*512], b, batch * 512);
+ } else {
+ bytes = vdisk_asyncio(vdisk, block, vdf->fd,
+ b, batch * 512, offset,
+ arg, aiocb, VDISK_READ);
+ }
+ }
+
+ if (bytes != batch * 512) {
+ VIDDBG(0, "%s %ld bytes (block %d) instead of "
+ "%d (%s)\n", (op==VDISK_WRITE)?"Wrote":"Read",
+ bytes, real_block, batch * 512, vdf->name);
+ if ((signed long)bytes == -1)
+ res = errno;
+ }
+
+ if (!use_aio)
+ if (vdisk->vdfd->xfer_commit(arg, res))
+ VIDDBG(0, "Couldn't commit transfer\n");
+
+ i += batch;
+ b += batch * 512;
+ block += batch;
+ nb_blocks -= batch;
+ }
+
+ if (pool)
+ vdisk_free(pool);
+
+ /*
+ * Returning number of processed bytes to caller who requested AIO
+ * (vdisk->use_aio && aiocb) will tell him that there is no
+ * need to wait for AIO completion
+ * There are two cases when this happens:
+ * - We couldn't perform any AIOs (use_aio == 0)
+ * - Some requests have been reads to unallocated blocks (and
+ * thus are read as zeroes). Note that if *some* blocks have been
+ * sent as AIOs, the caller will need to wait for completions
+ * (and we return zero).
+ */
+ if (!use_aio)
+ return (i * 512); // 'i' is number of accessed sectors;
+ else if (vdisk->use_aio && aiocb && (zero_blocks != 0))
+ return (zero_blocks * 512);
+ else
+ return (0);
+}
+
+LIST_HEAD(vdfd_head);
+
+// Register new file format
+int
+vdisk_register(vdf_data_t *new_vdfd)
+{
+ struct list_head *ptr;
+ vdf_data_t *vdfd;
+
+ list_for_each(ptr, &vdfd_head) {
+ vdfd = list_entry(ptr, vdf_data_t, vdfd_list);
+ if (vdfd == new_vdfd) {
+ return (-1);
+ }
+ }
+
+ list_add(&new_vdfd->vdfd_list, &vdfd_head);
+ VIDDBG(10, "Registered \"%s\" format\n", new_vdfd->ftype);
+ return (0);
+}
+
+// Unregister file format
+void
+vdisk_unregister(vdf_data_t *vdfd)
+{
+ struct list_head *ptr;
+
+ list_for_each(ptr, &vdfd_head) {
+ if (vdfd == list_entry(ptr, vdf_data_t, vdfd_list)) {
+ list_del(&vdfd->vdfd_list);
+ break;
+ }
+ }
+}
+
+// Find format-specific library, load it and call its init routine
+int
+vdisk_init_format(char *name)
+{
+ void *handle;
+ char libname[64];
+ char initfunc[32];
+ void (*init)();
+ char *err;
+
+ // Construct library name
+ (void)strcpy(libname, "libvdisk_");
+ (void)strcat(libname, name);
+ (void)strcat(libname, ".so");
+
+ handle = dlopen (libname, RTLD_LAZY);
+ if (!handle) {
+ VIDDBG(0, "%s\n", dlerror());
+ return (-1);
+ }
+
+ dlerror(); // Clear any existing error
+
+ // Construct init function name
+ (void)strcpy(initfunc, name);
+ (void)strcat(initfunc, "_init");
+
+ *(void **) (&init) = dlsym(handle, initfunc);
+ if ((err = dlerror()) != NULL) {
+ VIDDBG(0, "%s\n", err);
+ return (-1);
+ }
+
+ // Call format-specific init routine
+ (*init)();
+
+ return (0);
+}
+
+int
+vdf_find_vdfd(vdisk_dev_t *vdisk, char *ftype)
+{
+ struct list_head *ptr;
+ vdf_data_t *vdfd;
+ int err;
+ int attempt = 0;
+
+ while (attempt < 2) {
+ list_for_each(ptr, &vdfd_head) {
+
+ vdfd = list_entry(ptr, vdf_data_t, vdfd_list);
+
+ if (!strcmp(vdfd->ftype, ftype)) {
+
+ vdisk->vdfd = vdfd;
+ return (0);
+ }
+ }
+
+ if (attempt) {
+ VIDDBG(0, "Unknown format %s\n", ftype);
+ return (EINVAL);
+ }
+
+ // Didn't find vdfd for this extension, maybe we need
+ // to initialize it and try again.
+ err = vdisk_init_format(ftype);
+ if (err != 0) {
+ VIDDBG(0, "Can't initialize format %s\n", ftype);
+ return (err);
+ }
+ attempt++;
+ }
+
+ /*NOTREACHED*/
+ return (EINVAL);
+}
+
+int
+vdisk_common_init(vdisk_dev_t *vdisk)
+{
+ if (vdisk_initialized)
+ return (0);
+
+ INIT_LIST_HEAD(&vdfd_head);
+
+ vdisk_pagesz = getpagesize();
+
+ vdisk_initialized = 1;
+
+ return (0);
+}
diff -r 75c61490cc06 tools/vdisk/vdisk_tool.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_tool.c Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,338 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE // for strndup()
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdint.h>
+#include <getopt.h>
+
+#include "vdisk.h"
+
+extern int vdisk_init_format(char *);
+static char *supported_formats[] = {"vhd", NULL};
+
+int
+init_tool()
+{
+ int err;
+ int i;
+
+ err = vdisk_common_init(NULL/*XXX: ?? */);
+ if (err) {
+ VIDDBG(0, "Failed to initialize vdisk\n");
+ return (err);
+ }
+
+ for (i=0; ;i++) {
+ if (supported_formats[i] == NULL)
+ break;
+
+ err = vdisk_init_format(supported_formats[i]);
+ if (err) {
+ VIDDBG(0, "Failed to initialize %s format\n",
+ supported_formats[i]);
+ return (err);
+ }
+ }
+ return (0);
+}
+
+static void
+print_usage(char *prog)
+{
+ int i;
+
+ fprintf(stderr, "Usage: %s OPTIONS -# <format-specific options> "
+ "<filename>\n", prog);
+ fprintf(stderr,
+ " OPTIONS:\n"
+ " [-f <format>] [-C] [-H] [-M] "
+ "[-D <block> [-b <num_blocks>] [-o outfile]]\n"
+ " -C Create a vdisk\n"
+ " -H Read vdisk headers from file\n"
+ " -M Modify a vdisk\n"
+ " -D Dump a vhd\n"
+ " block first block to read (required)\n"
+ " num_blocks number of blocks to read. If not\n"
+ " specified, whole file will be
read\n"
+ " outfile output file. If not specified,\n"
+ " stdout is used\n"
+ " Supported formats: ");
+ for (i=0; ;i++) {
+ if (supported_formats[i] == NULL) {
+ fprintf(stderr, "\n");
+ break;
+ }
+ fprintf(stderr, "%s ", supported_formats[i]);
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ char filename[PATH_MAX];
+ char *outfile = NULL;
+ char format[16] = "vhd";
+ int operations = 0;
+ char c = 0;
+ extern char *optarg;
+ extern int optind, opterr, optopt;
+ vdisk_dev_t vdisk;
+ int err;
+ void *optp = NULL; // Format-specific options
+ char *file_fmt;
+ int i;
+ int first_block = 0, num_blocks = -1;
+ struct program_props props;
+ uint8_t flags;
+
+ //init_tool();
+
+ /*
+ * Read the filename argument first -- we may need
+ * it to determine format
+ */
+ strcpy(filename, argv[argc-1]);
+ file_fmt = strrchr(filename, '.');
+
+ // See whether what we think is file's format is supported
+ if (file_fmt) {
+ file_fmt++; // Skip '.'
+ for (i=0; ;i++) {
+ if (supported_formats[i] == NULL) {
+ // Not a supported format, ignore suffix
+ file_fmt = NULL;
+ break;
+ }
+
+ if (!strcmp(file_fmt, supported_formats[i]))
+ break; // Found it
+ }
+ }
+
+ vdisk.vdfd = NULL;
+
+ while (c != '#') {
+
+ c = getopt(argc, argv, "f:CHMD:b:o:#");
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'f':
+ strcpy(format, optarg);
+
+ /*
+ * If we either coudn't determine format from filename
+ * argument or we thought we could but '-f' specifies
+ * different format, we append appropriate suffix
+ */
+ if (!file_fmt || strcmp(format, file_fmt)) {
+ (void)strcat(filename, ".");
+ (void)strcat(filename, format);
+ file_fmt = format;
+ }
+
+ break;
+ case 'C':
+ operations |= VDISK_OP_CREATE;
+ break;
+ case 'H':
+ /* File to read headers from */
+ operations |= VDISK_OP_HEADERS;
+ break;
+ case 'M':
+ /* File to read headers from */
+ operations |= VDISK_OP_MODIFY;
+ break;
+ case 'D':
+ first_block = atol(optarg);
+ operations |= VDISK_OP_DUMP;
+ break;
+ case 'b':
+ num_blocks = atol(optarg);
+ if (num_blocks < 0) {
+ VIDDBG(0, "Number of blocks must be a "
+ "non-negative number\n");
+ exit(1);
+ }
+ break;
+ case 'o':
+ // Don't confuse vdisk with output file
+ if (optarg == argv[argc-1]) {
+ print_usage(argv[0]);
+ exit(1);
+ }
+ outfile = strndup(optarg, strlen(optarg));
+ if (outfile == NULL) {
+ VIDDBG(0, "Out of memory\n");
+ exit(1);
+ }
+
+ case '#':
+
+ if (file_fmt) {
+ err = vdf_find_vdfd(&vdisk, file_fmt);
+ if (err) {
+ VIDDBG(0, "Fail to initialize "
+ "format data for %s\n",
+ format);
+ return (err);
+ }
+ } else {
+ VIDDBG(0, "Unspecified or unsupported
format\n");
+ print_usage(argv[0]);
+ return (EINVAL);
+ }
+
+ if (vdisk.vdfd->parse_args(argc, operations,
+ argv, &optp) != 0) {
+ print_usage(argv[0]);
+ return (EINVAL);
+ }
+
+ break;
+ default:
+ print_usage(argv[0]);
+ return (EINVAL);
+ }
+ }
+
+ /*
+ * At least one operation type is needed and
+ * filename needs to be specified
+ */
+ if (!operations || !file_fmt) {
+ print_usage(argv[0]);
+ return (EINVAL);
+ }
+
+ // XXX: We probably should have initialized by now
+ if (vdisk.vdfd == NULL) {
+ err = vdf_find_vdfd(&vdisk, file_fmt);
+ if (err) {
+ VIDDBG(0, "Fail to initialize format data for %s\n",
+ format);
+ return (err);
+ }
+ }
+
+
+ // First create file, if requested
+ if (operations & VDISK_OP_CREATE) {
+ err = vdisk.vdfd->create_vdisk(filename, optp);
+ if (err) {
+ VIDDBG(0, "Can't create file\n");
+ return (err);
+ }
+ }
+
+ props.alloc_func = NULL;
+ props.free_func = NULL;
+ props.out_target = VDISK_OUT_STDERR;
+
+ if (!(operations & VDISK_OP_CREATE) &&
+ !(operations & VDISK_OP_MODIFY))
+ flags = VDISK_RO;
+ else
+ flags = 0;
+
+ err = vdisk_init(&vdisk, filename, &props, flags);
+ if (err) {
+ VIDDBG(0, "Fail to initialize from file %s\n",
+ format);
+ return (err);
+ }
+
+ if (operations & VDISK_OP_HEADERS) {
+ err = vdf_print_headers(&vdisk, filename);
+ if (err) {
+ VIDDBG(0, "Can't read headers\n");
+ return (err);
+ }
+ }
+
+ if (operations & VDISK_OP_MODIFY) {
+ err = vdisk.vdfd->modify_vdisk(&vdisk, optp);
+ if (err) {
+ VIDDBG(0, "Can't modify headers\n");
+ return (err);
+ }
+ }
+
+ if (operations & VDISK_OP_DUMP) {
+ uint8_t *buf, *p;
+ int bytes;
+ int chunk_log = 21; // 2MB
+ int nblocks;
+ int fd;
+
+ // Open output file (use stdout if not specified)
+ if (outfile != NULL) {
+ fd = open(outfile, O_RDWR|O_CREAT,
+ S_IRUSR|S_IWUSR);
+ if (fd == -1) {
+ VIDDBG(0, "Can't open %s: %s\n",
+ outfile, strerror(errno));
+ exit(1);
+ }
+ } else
+ fd = 1; // stdout
+
+ // Allocate 512b-aligned read buffer
+ p = malloc((1<<chunk_log) + 512);
+ while (p == NULL) { // Try smaller chunks if we fail
+ if (chunk_log == 0) {
+ VIDDBG(0, "Can't allocate buffer\n");
+ exit(1);
+ }
+ chunk_log--;
+ p = malloc((1<<chunk_log) + 512);
+ }
+ buf = p;
+ while ((addr_t)buf & 511) buf++;
+
+ // nblocks per transfer
+ nblocks = (1<<chunk_log) >> 9;
+
+ // If number of blocks to read is not specified,
+ // read whole vdisk
+ if (num_blocks < 0)
+ num_blocks = vdisk.sz >> 9;
+
+ for (i=0; i<num_blocks; i+=nblocks) {
+
+ // This could happen on last iteration
+ if ((i+nblocks) > num_blocks)
+ nblocks = num_blocks - i;
+
+ bytes = vdisk_rw(&vdisk, first_block+i, buf, nblocks,
+ VDISK_READ, NULL);
+ if (bytes != (nblocks << 9)) {
+ VIDDBG(0, "vdisk_rw() returned %d\n", bytes);
+ exit(1);
+ }
+
+ bytes = write(fd, buf, nblocks<<9);
+ if (bytes == -1) {
+ VIDDBG(0, "write: %s\n", strerror(errno));
+ exit(1);
+ }
+ }
+
+ free(p);
+ }
+ return 0;
+}
diff -r 75c61490cc06 tools/vdisk/vdisk_utils.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_utils.c Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,435 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE // for O_DIRECT
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdarg.h>
+#include <libaio.h>
+#include <time.h>
+#include <limits.h>
+
+#include "list.h"
+#include "vdisk.h"
+#include "vdisk_utils.h"
+
+#define VDISK_MAX_ERRORS (100)
+#define VDISK_ERR_STRING_LEN (512)
+
+static void *(*vdisk_alloc_func)(size_t sz) = malloc;
+static void (*vdisk_free_func)(void *ptr) = free;
+
+int vdisk_dbg_level = 1;
+int vdisk_out_target = VDISK_OUT_STDERR; // where to print messages
+
+// Don't want to keep this on stack
+static char argstring[VDISK_ERR_STRING_LEN];
+
+// Data structure to help with message throttling
+struct vdisk_log_mgt {
+ int vdisk_err_cnt;
+ int interval;
+ int restart;
+ time_t last_error;
+ time_t next_check;
+};
+static struct vdisk_log_mgt vdisk_log = {
+ .vdisk_err_cnt = 0,
+ .restart = 0,
+ .interval = 0,
+ .last_error = (time_t)0,
+ .next_check = (time_t)LONG_MAX,
+};
+
+// Print the message to either syslog or stderr, optionally
+// specifying filename and line number
+static void
+vdisk_print_msg(char *file, int line, char *msg)
+{
+ if (file) {
+ if (vdisk_out_target == VDISK_OUT_SYSLOG) {
+ syslog(LOG_DEBUG, "%s:%d: %s",
+ file, line, msg);
+ } else {
+ fprintf(stderr, "%s:%d: %s",
+ file, line, msg);
+ }
+ } else {
+ if (vdisk_out_target == VDISK_OUT_SYSLOG) {
+ syslog(LOG_DEBUG, "%s", msg);
+ } else {
+ fprintf(stderr, "%s", msg);
+ }
+ }
+}
+
+void
+vdisk_log_error(int level, char *file, int line, char *fmt, ...)
+{
+ int print_msg;
+
+ if (level > vdisk_dbg_level)
+ return;
+
+ print_msg = 0;
+
+ // Decide whether to print the message.
+ // Only manage message reporting for level 0, which is
+ // usually reserved for errors. Other messages will be
+ // printed unconditionally.
+ if (level == 0) {
+ time_t now;
+
+ if (time(&now) == (time_t)-1) {
+ // This should never happen ;-()
+ vdisk_print_msg(NULL, 0, "vdisk: Can't get time, "
+ "error reporting stopped\n");
+ return; // XXX: Or continue?
+ }
+
+ if (now >= vdisk_log.next_check) {
+
+ if (now - vdisk_log.last_error >
+ (time_t)vdisk_log.interval) {
+ // reset message throttling
+ vdisk_log.restart = 0;
+ vdisk_log.interval = 0;
+ vdisk_log.vdisk_err_cnt = 0;
+ vdisk_log.next_check = LONG_MAX;
+ vdisk_print_msg(NULL, 0, "vdisk: Restoring "
+ "error reporting\n");
+ }
+
+ if (vdisk_log.restart) {
+ // Double the interval, max at 128 seconds
+ vdisk_log.interval = (vdisk_log.interval > 64)
?
+ vdisk_log.interval :
+ (vdisk_log.interval * 2);
+ vdisk_log.next_check +=
+ (time_t)vdisk_log.interval;
+ vdisk_log.restart = 0;
+ }
+
+ vdisk_log.vdisk_err_cnt = 0;
+
+ } else {
+ // Message received during throttling interval.
+ // We will need to double the interval later
+ vdisk_log.restart = 1;
+ }
+
+ if (vdisk_log.vdisk_err_cnt < VDISK_MAX_ERRORS) {
+ vdisk_log.vdisk_err_cnt++;
+ print_msg = 1;
+ }
+
+ if (vdisk_log.vdisk_err_cnt == VDISK_MAX_ERRORS) {
+ vdisk_log.vdisk_err_cnt++;
+ if (vdisk_log.interval == 0) {
+ // Start interval management
+ vdisk_print_msg(NULL, 0, "vdisk: Too many "
+ "errors, slowing down rate "
+ "of reporting\n");
+ vdisk_log.interval = 1;
+ vdisk_log.next_check = now +
+ (time_t)vdisk_log.interval;
+ }
+ }
+
+ vdisk_log.last_error = now;
+
+ } else
+ print_msg = 1;
+
+
+ if (print_msg) {
+ va_list args;
+
+ // Roll arguments into a string
+ va_start(args, fmt);
+ (void)vsnprintf(argstring, VDISK_ERR_STRING_LEN,
+ fmt, args);
+ va_end(args);
+
+ vdisk_print_msg(file, line, argstring);
+ }
+}
+
+void
+vdisk_alloc_init(void *alloc_func, void *free_func)
+{
+ if (alloc_func != NULL)
+ vdisk_alloc_func = alloc_func;
+
+ if (free_func != NULL)
+ vdisk_free_func = free_func;
+}
+
+void *
+vdisk_malloc(size_t sz)
+{
+ void *ptr;
+
+ ptr = vdisk_alloc_func(sz);
+ if (ptr)
+ memset(ptr, 0, sz);
+ return (ptr);
+}
+
+void
+vdisk_free(void *ptr)
+{
+ vdisk_free_func(ptr);
+ ptr = NULL;
+}
+
+int
+vdisk_close(int fp)
+{
+ int err;
+
+ err = fsync(fp);
+ if (err)
+ VIDDBG(0, "fsync(): %s\n", strerror(errno));
+
+ // Invalidate all pages from page cache
+ err = posix_fadvise(fp, 0, 0, POSIX_FADV_DONTNEED);
+ if (err)
+ VIDDBG(0, "posix_fadvise(): %s\n", strerror(errno));
+
+ err = close(fp);
+ return (err);
+}
+
+size_t
+vdisk_size(int f, size_t *sz)
+{
+ size_t cur;
+ int err;
+
+ /*
+ * XXX: Obviously, we should use fstat(). Unfortunately, I couldn't
+ * figure out how to make a dynamic library that calls fstat.
+ * See glibc FAQ for descritpion of *problem* (why couldn't they
+ * provide a solution as well?)
+ */
+
+ // Remember current position
+ cur = lseek(f, 0, SEEK_CUR);
+ if (cur == -1) {
+ err = errno;
+ VIDDBG(0, "lseek: Can't seek to current: %s\n",
strerror(errno));
+ return (err);
+ }
+
+ *sz = lseek(f, 0, SEEK_END);
+ if (*sz == -1) {
+ err = errno;
+ VIDDBG(0, "lseek: Can't seek to end: %s\n", strerror(errno));
+ return (err);
+ }
+
+ // Restore current position
+ cur = lseek(f, 0, SEEK_SET);
+ if (cur == -1) {
+ err = errno;
+ VIDDBG(0, "lseek: Can't seek to current: %s\n",
strerror(errno));
+ return (err);
+ }
+
+ return (0);
+}
+
+size_t
+vdisk_asyncio(vdisk_dev_t *vdisk, uint64_t block,
+ int fp, char *buf,
+ size_t size, off_t off,
+ void *arg, void *aiocb,
+ int op)
+{
+ int hash_index = VDISK_HASH_IDX(block);
+ struct iocb *io;
+ struct pending_aio *pio;
+
+
+ ASSERT(vdisk->aio_cnt < VDISK_HASH_SZ);
+ ASSERT(vdisk->hash[hash_index].key == block);
+
+ io = &vdisk->hash[hash_index].io;
+ pio = &vdisk->hash[hash_index].pio;
+
+ pio->arg = arg;
+ pio->block = block;
+ pio->aiocb = aiocb;
+ pio->num_blocks = size>>9;
+ pio->off = off;
+ pio->fd = fp;
+ pio->op = op;
+
+ if (op == VDISK_WRITE)
+ io_prep_pwrite(io, fp, buf, size, off);
+ else
+ io_prep_pread(io, fp, buf, size, off);
+
+ io->data = pio;
+
+ VIDDBG(50, "Using hash entry %d (block %d)\n",
+ VDISK_HASH_IDX(pio->block), pio->block);
+
+ vdisk->aio_submit[vdisk->aio_cnt++] = io;
+
+ return (size);
+}
+
+static void
+vdisk_manage_pcache(int fp, vdisk_syncio_t *syncio, off_t start, off_t len)
+{
+#define WRITE_RUN (1<<22) //4MB
+ int res;
+ DO_STATS(time_t now);
+
+ DO_STATS(++(syncio->total_writes));
+
+ if (syncio->is_set) {
+ if (start >= syncio->io_start &&
+ start <= syncio->io_start + syncio->io_len) {
+ syncio->io_len -= (syncio->io_start +
+ syncio->io_len) - start;
+ syncio->io_len += len;
+ DO_STATS(++(syncio->contig_writes));
+ if (syncio->io_len > WRITE_RUN) {
+ DO_STATS(++(syncio->flush_size_force));
+
+ syncio->is_set = 0;
+
+ res = fsync(fp);
+ if (res)
+ VIDDBG(0, "fsync: %s\n",
+ strerror(errno));
+
+ res = posix_fadvise(fp, syncio->io_start,
+ syncio->io_len,
+ POSIX_FADV_DONTNEED);
+ if (res)
+ VIDDBG(0, "posix_fadvise: %s\n",
+ strerror(errno));
+ }
+ len = 0; // NOTE:len is consumed into previous
+ } else {
+#if VDISK_SYNCIO_STATS
+ if (syncio->io_len < (1<<20))
+ ++(syncio->flush_size_sub1MB);
+ else if (syncio->io_len < (1<<21))
+ ++(syncio->flush_size_sub2MB);
+ else if (syncio->io_len < (1<<22))
+ ++(syncio->flush_size_sub4MB);
+ else if (syncio->io_len < (1<<23))
+ ++(syncio->flush_size_sub8MB);
+ else
+ ++(syncio->flush_size_ovr8MB);
+#endif /* VDISK_SYNCIO_STATS */
+ syncio->is_set = 0;
+ res = fsync(fp);
+ if (res)
+ VIDDBG(0, "fsync: %s\n", strerror(errno));
+ res = posix_fadvise(fp, syncio->io_start,
+ syncio->io_len,
+ POSIX_FADV_DONTNEED);
+ if (res)
+ VIDDBG(0, "posix_fadvise: %s\n",
+ strerror(errno));
+ }
+ }
+ if (len > 0) {
+ if (len <= WRITE_RUN) {
+ syncio->is_set = 1;
+ syncio->io_start = start;
+ syncio->io_len = len;
+ } else {
+ DO_STATS(++(syncio->flush_size_force));
+ res = fsync(fp);
+ if (res)
+ VIDDBG(0, "fsync: %s\n", strerror(errno));
+ res = posix_fadvise(fp, start, len,
+ POSIX_FADV_DONTNEED);
+ if (res)
+ VIDDBG(0, "posix_fadvise: %s\n",
+ strerror(errno));
+ }
+ }
+#if VDISK_SYNCIO_STATS
+ now = time(NULL);
+ if (now >= syncio->last_dbg_print + 60) {
+ VIDDBG(0, ":WRITE_PERF: [%lu] tWrts %lu | conWrts %lu | s1M %lu"
+ " | s2M %lu | s4M %lu | s8M %lu | o8M %lu | f %lu\n",
+ (unsigned long)(now - syncio->last_dbg_print),
+ syncio->total_writes, syncio->contig_writes,
+ syncio->flush_size_sub1MB, syncio->flush_size_sub2MB,
+ syncio->flush_size_sub4MB, syncio->flush_size_sub8MB,
+ syncio->flush_size_ovr8MB, syncio->flush_size_force);
+ syncio->last_dbg_print = now;
+ }
+#endif /* VDISK_SYNCIO_STATS */
+}
+
+size_t
+vdisk_syncio(int fp, char *buf, size_t size, off_t off, int op,
+ vdisk_syncio_t *syncio)
+{
+ size_t bytes;
+ off_t res;
+ off_t io_start;
+ off_t io_len;
+
+ ASSERT(!(size & 511));
+ ASSERT(!(off & 511));
+ ASSERT(!((addr_t)buf & 511));
+
+ res = vdisk_seek(fp, off, SEEK_SET);
+ if (res != off) {
+ VIDDBG(0, "lseek couldn't set offset to 0x%" PRIx64 ": %s\n",
+ off, strerror(errno));
+ return (-1);
+ }
+
+ if (op == VDISK_WRITE) {
+ bytes = write(fp, buf, size);
+ } else
+ bytes = read(fp, buf, size);
+
+ if (bytes != size) {
+ VIDDBG(0, "%s %zd bytes instead of %zd: %s\n",
+ (op == VDISK_WRITE)?"Wrote":"Read",
+ bytes, size, strerror(errno));
+ }
+
+ io_start = (off & (~((off_t)vdisk_pagesz-1)));
+ io_len = (size + vdisk_pagesz);
+
+ if (op == VDISK_READ) {
+ res = posix_fadvise(fp, io_start, io_len, POSIX_FADV_DONTNEED);
+ if (res)
+ VIDDBG(0, "posix_fadvise: %s\n", strerror(errno));
+ } else if (syncio) {
+ vdisk_manage_pcache(fp, syncio, io_start, io_len);
+ } else {
+ res = fsync(fp);
+ if (res)
+ VIDDBG(0, "fsync: %s\n", strerror(errno));
+ res = posix_fadvise(fp, io_start, io_len, POSIX_FADV_DONTNEED);
+ if (res)
+ VIDDBG(0, "posix_fadvise: %s\n", strerror(errno));
+ }
+
+ return (bytes);
+}
diff -r 75c61490cc06 tools/vdisk/vdisk_utils.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_utils.h Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,36 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VDISK_UTILS
+#define __VDISK_UTILS
+
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "vdisk.h"
+
+
+
+#define vdisk_open(cp, fl, mode) open((cp), (fl), (mode))
+#define vdisk_seek(fp, off, whence) lseek64((fp), (off), (whence))
+
+extern void *vdisk_malloc(size_t sz);
+extern void vdisk_free(void *ptr);
+extern int vdisk_close(int fp);
+extern size_t vdisk_size(int f, size_t *sz);
+extern size_t vdisk_syncio(int fp, char *buf, size_t sz, loff_t off,
+ int op, vdisk_syncio_t *syncio);
+extern size_t vdisk_asyncio(vdisk_dev_t *, uint64_t, int, char *, size_t,
+ loff_t, void *, void *, int);
+
+
+#endif /* __VDISK_UTILS */
diff -r 75c61490cc06 tools/vdisk/vhd.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd.c Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,925 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE // for O_DIRECT
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "list.h"
+#include "vdisk.h"
+#include "vdisk_utils.h"
+#include "vhd.h"
+#include "vhd_footer.h"
+
+char __vhd_zeroes[VHD_FTR_SZ+512];
+char *vhd_zeroes;
+
+#define BLOCK_MASK (~(((addr_t)1<<9)-1))
+
+
+int
+vhd_verify_metadata(vd_file_t *vf)
+{
+ // XXX: Something more robust, maybe?
+ return (0);
+}
+
+int
+vhd_read_footer(vd_file_t *vf)
+{
+ off_t ftr_off, res;
+ vhd_file_t *vhd = vf->vdf;
+ size_t bytes;
+
+ if (vhd->ftr_mem == NULL) {
+ vhd->ftr_mem = vdisk_malloc(VHD_FTR_SZ+512);
+ if (vhd->ftr_mem == NULL) {
+ VIDDBG(0, "Couldn't allocate dynamic header\n");
+ return (ENOMEM);
+ }
+ vhd->ftr = vhd->ftr_mem;
+ while ((addr_t)vhd->ftr & 511) vhd->ftr++;
+ }
+
+ /* Find file size (seek to the end) */
+ res = vdisk_seek(vf->fd, 0, SEEK_END);
+ if (res == -1) {
+ VIDDBG(0, "lseek couldn't set offset to end of file\n");
+ vdisk_free(vhd->ftr_mem);
+ return (-1);
+ }
+
+ ftr_off = res - 512;
+
+ res = vdisk_seek(vf->fd, ftr_off, SEEK_SET);
+ if (res != ftr_off) {
+ VIDDBG(0, "lseek couldn't set offset to 0x%" PRIx64 "\n",
+ ftr_off);
+ vdisk_free(vhd->ftr_mem);
+ return (-1);
+ }
+
+ if ((bytes = vdisk_syncio(vf->fd, vhd->ftr, 512, ftr_off,
+ VDISK_READ, NULL)) != 512) {
+ VIDDBG(0, "vdisk read from offset 0x%" PRIx64 " failed "
+ "(read %zd insted of 512) %d\n",
+ ftr_off, bytes, errno);
+ vdisk_free(vhd->ftr_mem);
+ return (-1);
+ }
+
+ return 0;
+}
+
+int
+vhd_read_dynhdr(vd_file_t *vf)
+{
+ off_t res;
+ vhd_file_t *vhd = vf->vdf;
+ size_t bat_sz;
+ int err = 0;
+
+
+ vhd->dhdr_mem = vdisk_malloc(VHD_DHDR_SZ+512);
+ if (vhd->dhdr_mem == NULL) {
+ VIDDBG(0, "Couldn't allocate dynamic header\n");
+ err = ENOMEM;
+ goto fail;
+ }
+
+ vhd->dhdr = vhd->dhdr_mem;
+ while ((addr_t)vhd->dhdr & 511) vhd->dhdr++;
+
+ // Skip copy of the footer
+ res = vdisk_seek(vf->fd, VHD_FTR_SZ, SEEK_SET);
+ if (res != VHD_FTR_SZ) {
+ VIDDBG(0, "Couldn't skip copy of the footer\n");
+ err = -1;
+ goto fail;
+ }
+
+ if (vdisk_syncio(vf->fd, vhd->dhdr, VHD_DHDR_SZ, VHD_FTR_SZ,
VDISK_READ, NULL)
+ != VHD_DHDR_SZ) {
+ VIDDBG(0, "Failed to read dynamic header");
+ err = -1;
+ goto fail;
+ }
+
+ // Read BAT (in 512B units)
+ // XXX: May need to only keep a part of BAT due to memory size concerns
+ bat_sz = vhd_get_dhdr_tbl_entries(vhd->dhdr) << 2;
+ if (bat_sz & 511)
+ bat_sz += (512-(bat_sz & 511));
+
+ vhd->bat_mem = vdisk_malloc(bat_sz+512);
+ if (vhd->bat_mem == NULL) {
+ VIDDBG(0, "Couldn't allocate BAT\n");
+ err = ENOMEM;
+ goto fail;
+ }
+ vhd->bat = vhd->bat_mem;
+ while ((addr_t)vhd->bat & 511) vhd->bat++;
+
+ if (vdisk_syncio(vf->fd, (char *)vhd->bat, bat_sz,
+ VHD_DHDR_SZ+VHD_FTR_SZ, VDISK_READ, NULL) != bat_sz) {
+ VIDDBG(0, "Failed to read BAT");
+ err = -1;
+ goto fail;
+ }
+
+ return (0);
+
+fail:
+ return (err);
+}
+
+int
+vhd_read_metadata(vdisk_dev_t *vdisk, vd_file_t *vf)
+{
+ int err;
+ int type;
+ vhd_file_t *vhd = NULL;
+ int secs_per_block;
+ uint32_t geom;
+
+ vf->vdf = (vhd_file_t *)vdisk_malloc(sizeof(vhd_file_t));
+ if (vf->vdf == NULL) {
+ VIDDBG(0, "Couldn't allocate format-specific data\n");
+ err = ENOMEM;
+ goto fail;
+ }
+
+ vhd = vf->vdf;
+ memset(vhd, 0, sizeof(vhd_file_t));
+
+ err = vhd_read_footer(vf);
+ if (err) {
+ VIDDBG(0, "Couldn't read footer\n");
+ goto fail;
+ }
+
+ vdisk->sz = vhd_get_ftr_orig_sz(vhd->ftr);
+
+ type = vhd_get_ftr_type(vhd->ftr);
+ if ( (type != VHD_TYPE_FIXED) &&
+ (type != VHD_TYPE_DYNAMIC) &&
+ (type != VHD_TYPE_DIFF)){
+ // Return error for VHD_TYPE_NONE as well.
+ VIDDBG(0, "Unsupported VHD file type (%d)\n", type);
+ err = EIO; // XXX: Something else?
+ goto fail;
+ }
+
+ if (type != VHD_TYPE_FIXED) {
+ size_t sz;
+ int i;
+
+ // We should have a dynamic header
+ err = vhd_read_dynhdr(vf);
+ if (err) {
+ VIDDBG(0, "Couldn't read dynamic header\n");
+ goto fail;
+ }
+
+ // No fls() in userland, so we do log2 ourselves
+ vhd->sec_per_block_log = 0;
+ secs_per_block = vhd_get_dhdr_blksz(vhd->dhdr) >> 9;
+ while (secs_per_block >>= 1)
+ vhd->sec_per_block_log++;
+
+ if (type == VHD_TYPE_DYNAMIC) {
+ // How many sectors are mapped sequentially
+ vf->batch_sz = (1<<vhd->sec_per_block_log);
+ } else {
+ // XXX: Need to think about this.
+ vf->batch_sz = 1;
+ }
+
+ // bytes for sectormap is ((sectors per block) / 8)
+ vhd->sectormap_sz = (vhd_get_dhdr_blksz(vhd->dhdr) >> 9) >> 3;
+
+ // Align on 512-byte boundary
+ if ((vhd->sectormap_sz == 0) || (vhd->sectormap_sz & 511))
+ vhd->sectormap_sz += 512 - (vhd->sectormap_sz & 511);
+
+ // First new block will be allocated where the footer
+ // currently is, which is at the end of the file
+ err = vdisk_size(vf->fd, &sz);
+ if (err) {
+ VIDDBG(0, "Couldn't get file size\n");
+ goto fail;
+ }
+ vhd->next_block_off = (sz-VHD_FTR_SZ) >> 9;
+
+ // Allocate sectormap buffer
+ vhd->sec_mem = vdisk_malloc(512*2);
+ if (vhd->sec_mem == NULL) {
+ //XXX: free everything
+ VIDDBG(0, "Can't allocate sectormap\n");
+ err = ENOMEM;
+ goto fail;
+ }
+ vhd->secmap_chunk = vhd->sec_mem;
+ while ((addr_t)vhd->secmap_chunk & 511) vhd->secmap_chunk++;
+
+ // Allocate sectormap cache
+ for (i=0;i<VHD_CACHE_SZ;i++) {
+ vhd->cache[i].sec_mem = vdisk_malloc(512*2);
+ if (vhd->cache[i].sec_mem == NULL) {
+ //XXX: free everything
+ VIDDBG(0, "Can't allocate sectormap\n");
+ err = ENOMEM;
+ goto fail;
+ }
+ vhd->cache[i].secmap_chunk = vhd->cache[i].sec_mem;
+ while ((addr_t)vhd->cache[i].secmap_chunk & 511)
+ vhd->cache[i].secmap_chunk++;
+
+ // Point to sector 0 (or any other sector),
+ // but make the map empty
+ vhd->cache[i].first_sector = 0; //VHD_INVALID_SECTOR;
+ memset(vhd->cache[i].secmap_chunk, 0, 512);
+ }
+
+ if (VHD_CACHE_SZ > 0) {
+ vhd->cache_head = &vhd->cache[0];
+ vhd->cache[0].prev = NULL;
+ for (i=1;i<VHD_CACHE_SZ;i++) {
+ vhd->cache[i-1].next = &vhd->cache[i];
+ vhd->cache[i].prev = &vhd->cache[i-1];
+ }
+ vhd->cache_tail = &vhd->cache[VHD_CACHE_SZ-1];
+ vhd->cache[VHD_CACHE_SZ-1].next = NULL;
+ } //else
+ //vhd->cache_head == NULL;
+ } else
+ vf->batch_sz = (1<<30); // (signed) infinity
+
+ vf->flags = 0;
+
+ err = vhd_verify_metadata(vf);
+ if (err) {
+ VIDDBG(0, "File appears to be corrupted\n");
+
+ // XXX: It may be salvageable
+ if (type != VHD_TYPE_FIXED) {
+ vdisk_free(vhd->dhdr_mem);
+ vdisk_free(vhd->bat_mem);
+ vdisk_free(vhd->sec_mem);
+ }
+ err = EIO;
+ goto fail;
+ }
+
+ // We are assuming here that all files of the
+ // vdisk have the same geometry.
+ geom = vhd_get_ftr_geom(vhd->ftr);
+ vdisk->geom.cyls = (geom >> 16) & 0xffff;
+ vdisk->geom.heads = (geom >> 8) & 0xff;
+ vdisk->geom.secs = geom & 0xff;
+
+ return (0);
+
+fail:
+ if (vhd) {
+ if (vhd->ftr_mem)
+ vdisk_free(vhd->ftr_mem);
+ if (vhd->dhdr_mem)
+ vdisk_free(vhd->dhdr_mem);
+ if (vhd->bat_mem)
+ vdisk_free(vhd->bat_mem);
+ if (vhd->sec_mem)
+ vdisk_free(vhd->sec_mem);
+ vdisk_free(vhd);
+ }
+ return (err);
+}
+
+int
+vhd_alloc_block(vd_file_t *vf, uint32_t blockno)
+{
+ size_t bytes;
+ off_t bat_off;
+ char *ptr;
+ vhd_file_t *vhd = vf->vdf;
+ size_t blocksz;
+
+
+ ASSERT(__arch__swab32(vhd->bat[blockno]) == VHD_BAT_INVALID_ENTRY);
+ ASSERT((vhd_get_dhdr_blksz(vhd->dhdr) & 511) == 0);
+ ASSERT((vhd->sectormap_sz & 511) == 0);
+
+ blocksz = vhd_get_dhdr_blksz(vhd->dhdr) + vhd->sectormap_sz;
+
+ /*
+ * First try to write footer at new position.
+ * The hole should be filled with zeroes
+ * XXX: Are we sure?
+ */
+ bytes = vdisk_syncio(vf->fd, vhd->ftr, VHD_FTR_SZ,
+ (vhd->next_block_off<<9) + blocksz,
+ VDISK_WRITE, NULL);
+ if (bytes != VHD_FTR_SZ) {
+ VIDDBG(0, "Can't append footer\n");
+ return (EIO);
+ }
+
+
+ // Overwrite footer with zeroes
+ bytes = vdisk_syncio(vf->fd, vhd_zeroes, VHD_FTR_SZ,
+ vhd->next_block_off<<9, VDISK_WRITE, NULL);
+ if (bytes != VHD_FTR_SZ) {
+ VIDDBG(0, "Can't overwrite footer\n");
+ return (EIO);
+ }
+
+ // Now update BAT in a 512-b chunk
+ vhd->bat[blockno] = __arch__swab32(vhd->next_block_off);
+ bat_off = (VHD_FTR_SZ + VHD_DHDR_SZ + (blockno<<2)) & BLOCK_MASK;
+ ptr = (char *)(((addr_t)&vhd->bat[blockno]) & BLOCK_MASK);
+ bytes = vdisk_syncio(vf->fd, ptr, 512, bat_off, VDISK_WRITE, NULL);
+ if (bytes != 512) {
+ VIDDBG(0, "Can't update BAT\n");
+ return (EIO);
+ }
+
+ vhd->next_block_off += (blocksz >> 9);
+
+ return(0);
+}
+
+/*
+ * It would be easier to use test_bit()/set_bit() routines,
+ * but x86 bit test/set instructions count bits (in the last byte)
+ * from LSb, which is not what we want. We could recompute pos
+ * (pos=(pos&(~7))+7-(pos&7)) but doing this operation more
+ * explicitely seems to be safer.
+ */
+inline int
+vhd_test_bit(int pos, char *buf)
+{
+ char *addr = (char *)((addr_t)buf + (pos>>3));
+ uint8_t byte = *addr;
+ uint8_t bitinbyte = 7-(pos&7);
+
+ return (byte & (1<<bitinbyte));
+}
+
+inline int
+vhd_test_bitset(int start, int bits, char *buf)
+{
+ int i;
+
+ for (i=0;i<bits;i++)
+ if (!vhd_test_bit(start+i, buf))
+ return (0);
+
+ return (1);
+}
+
+inline void
+vhd_set_bit(int pos, char *buf)
+{
+ char *addr = (char *)((addr_t)buf + (pos>>3));
+ uint8_t byte = *addr;
+ uint8_t bitinbyte = 7-(pos&7);
+
+ *addr = byte | (1<<bitinbyte);
+}
+
+inline void
+vhd_set_bitset(int start, int bits, char *buf)
+{
+ int i;
+
+ for (i=0;i<bits;i++)
+ vhd_set_bit(start+i, buf);
+}
+
+
+int
+vhd_xfer_commit(void *arg, int err)
+{
+ vhd_xfer_t *vhdx = arg;
+ size_t bytes;
+
+ if (arg == NULL)
+ return (0);
+
+ if (err == 0) {
+
+ // Read the 512b chunk of sector map
+ bytes = vdisk_syncio(vhdx->fd, vhdx->secmap_chunk, 512,
+ vhdx->secmap_addr, VDISK_READ, NULL);
+ if (bytes != 512) {
+ VIDDBG(0, "Failed to read sector bitmap\n");
+ vdisk_free(vhdx->mem);
+ return (EIO);
+ }
+
+ // Set sector bit
+ vhd_set_bitset(vhdx->sector_bit, vhdx->num_secs,
+ vhdx->secmap_chunk);
+
+ // and write it back
+ bytes = vdisk_syncio(vhdx->fd, vhdx->secmap_chunk, 512,
+ vhdx->secmap_addr, VDISK_WRITE, NULL);
+ if (bytes != 512) {
+ VIDDBG(0, "Can't commit access\n");
+ vdisk_free(vhdx->mem);
+ return (EIO);
+ }
+
+ if (vhdx->cache && vhdx->first_sector != VHD_INVALID_SECTOR) {
+ ASSERT(vhdx->cache->first_sector == VHD_INVALID_SECTOR);
+ memcpy(vhdx->cache->secmap_chunk, vhdx->secmap_chunk,
512);
+ vhdx->cache->first_sector = vhdx->first_sector;
+ }
+ }
+
+ vdisk_free(vhdx->mem);
+ return (0);
+}
+
+// Microsoft uses "sector" for 512-byte unit that we
+// refer to as "block" elsewhere.
+// This routine is *NOT* SMP-safe!
+int
+vhd_map_block(vd_file_t *vf,
+ uint32_t *sectorno, /* IN/OUT */
+ int num_secs,
+ int op,
+ void **arg)
+{
+ vhd_file_t *vhd = vf->vdf;
+ int type = vhd_get_ftr_type(vhd->ftr);
+ uint32_t blockno; // block of sectors in the file
+ int err;
+ size_t bytes;
+ int sector_bit; // bit offset into 512b chunk of sectormap
+ int sector_in_block;
+ off_t sectormap_addr;
+ uint32_t first_sector;
+ vhd_cache_t *cache = vhd->cache_head;
+
+
+ if (type == VHD_TYPE_FIXED)
+ return (VID_BLOCK_MAPPED);
+
+ vhd->stats.access++;
+
+ blockno = *sectorno >> vhd->sec_per_block_log;
+
+ // We can only map sequence on sectors in the same block
+ ASSERT(((*sectorno+num_secs-1) >> vhd->sec_per_block_log)
+ == blockno);
+
+ // First sector in the block (really, blockno<<vhd->sec_per_block_log)
+ first_sector = *sectorno & (~(((uint32_t)1<<vhd->sec_per_block_log)-1));
+
+ // This sector's offset in the block
+ sector_in_block = *sectorno & (((uint32_t)1<<vhd->sec_per_block_log)-1);
+
+ sector_bit = sector_in_block & ((512*8)-1); // 8 bits in a byte
+ while (cache != NULL) {
+ if (cache->first_sector == first_sector) {
+ // Sectormap is cached
+ if (vhd_test_bitset(sector_bit, num_secs,
+ cache->secmap_chunk)) {
+
+ // sector is mapped
+ *sectorno = cache->phys_first_sector +
+ + sector_in_block;
+
+ vhd->stats.cache_hit++;
+
+ // Make the line LRU
+ if (cache->prev) {
+ cache->prev->next = cache->next;
+ if (cache->next)
+ cache->next->prev =
+ cache->prev;
+ else
+ vhd->cache_tail = cache->prev;
+
+ cache->next = vhd->cache_head;
+ cache->next->prev = cache;
+ cache->prev = NULL;
+ vhd->cache_head = cache;
+ }
+
+ return (VID_BLOCK_MAPPED);
+ } else {
+ break;
+ }
+ }
+ cache = cache->next;
+ }
+
+ if (__arch__swab32(vhd->bat[blockno]) == VHD_BAT_INVALID_ENTRY) {
+
+ // For reads, the caller will assume that
+ // read returned zeroes
+ if (op == VDISK_READ)
+ return (VID_BLOCK_NOTMAPPED);
+
+ err = vhd_alloc_block(vf, blockno);
+ vhd->stats.block_alloc++;
+ VIDDBG(100, "Allocated block %d\n", blockno);
+ if (err) {
+ VIDDBG(0, "Failed to allocate block\n");
+ return (err);
+ }
+ }
+
+ if (VHD_CACHE_SZ > 0) {
+
+ if (vhd->cache_tail->first_sector != VHD_INVALID_SECTOR) {
+ if ((cache == NULL) && (vhd->cache_head != NULL)) {
+ vhd_cache_t *oldh = vhd->cache_head;
+ vhd_cache_t *oldt = vhd->cache_tail;
+
+ vhd->cache_head = oldt;
+ vhd->cache_tail = oldt->prev;
+
+ vhd->cache_head->next = oldh;
+ oldh->prev = oldt;
+
+ vhd->cache_head->prev = NULL;
+
+ vhd->cache_tail->next = NULL;
+
+ cache = vhd->cache_head;
+ }
+
+ } else {
+ // tail cache fill is in-flight. We assume that
+ // all others are in-flight as well.
+ // We will not be caching
+ // XXX: we should probably walk the list
+ //first_sector = VHD_INVALID_SECTOR;
+ }
+ }
+
+ // Read a block worth of sector bitmap
+ sectormap_addr =
+ ((uint64_t)__arch__swab32(vhd->bat[blockno])<<9) +
+ ((sector_in_block>>3) & BLOCK_MASK);
+ bytes = vdisk_syncio(vf->fd, vhd->secmap_chunk, 512,
+ sectormap_addr, VDISK_READ, NULL);
+ if (bytes != 512) {
+ VIDDBG(0, "Failed to read sector bitmap\n");
+ return (EIO);
+ }
+
+ // See whether the sector is present
+ if (!vhd_test_bitset(sector_bit, num_secs, vhd->secmap_chunk)) {
+ vhd_xfer_t *vhdx;
+ int byteaddr, bitno;
+ char *ptr;
+
+ // For reads, the caller will assume that
+ // read returned zeroes
+ if (op == VDISK_READ) {
+ int i;
+ int mapped = 0;
+
+ for (i=0; i<num_secs; i++) {
+ if (vhd_test_bit(sector_bit+i,
+ vhd->secmap_chunk)) {
+ mapped = 1;
+ break;
+ }
+ }
+
+ if (!mapped) {
+ // None of blocks is mapped
+ return (VID_BLOCK_NOTMAPPED);
+ } else {
+ // Some blocks are mapped and some are not
+ return (VID_BLOCK_TOOBIG);
+ }
+ }
+
+ byteaddr = sector_bit >> 3; // Find word in the map
+ bitno = sector_bit & 7; // Bit in the word
+ ASSERT(byteaddr<512);
+
+ // sectormap is the first member and will be aligned
+ vhdx = vdisk_malloc(sizeof(vhd_xfer_t)+512);
+ if (vhdx == NULL) {
+ VIDDBG(0, "Failed to allocate commit data\n");
+ return (EIO);
+ }
+
+ ptr = (char *)vhdx;
+ while ((addr_t)ptr & 511) ptr++;
+
+ if (((addr_t)ptr - (addr_t)vhdx) >= 512)
+ VIDDBG(0, "vhdx=%p, ptr=%p\n", vhdx, ptr);
+
+ ASSERT(((addr_t)ptr - (addr_t)vhdx) < 512);
+
+ ((vhd_xfer_t *)ptr)->mem = (void *)vhdx;
+ vhdx = (vhd_xfer_t *)ptr;
+ vhdx->fd = vf->fd;
+ vhdx->secmap_addr = sectormap_addr;
+ vhdx->sector_bit = sector_bit;
+ vhdx->num_secs = num_secs;
+
+ if (VHD_CACHE_SZ > 0) {
+ //vhdx->cache = &vhd->cache[cache_index];
+ vhdx->cache = cache;
+ vhdx->first_sector = first_sector;
+ if (cache) // Flush old cache entry
+ cache->first_sector = VHD_INVALID_SECTOR;
+ } else
+ vhdx->first_sector = VHD_INVALID_SECTOR;
+
+ *arg = vhdx;
+
+ vhd->stats.sec_alloc++;
+
+ } else {
+ // cache the map
+ if (VHD_CACHE_SZ > 0) {
+ if (cache &&
+ (cache->first_sector != VHD_INVALID_SECTOR)) {
+ memcpy(cache->secmap_chunk,
+ vhd->secmap_chunk, 512);
+ cache->first_sector = first_sector;
+ }
+ }
+ }
+
+ if (cache)
+ cache->phys_first_sector = __arch__swab32(vhd->bat[blockno]) +
+ (vhd->sectormap_sz >> 9);
+
+ // Sector in the backing file
+ *sectorno = (__arch__swab32(vhd->bat[blockno])) + sector_in_block
+ + (vhd->sectormap_sz >> 9);
+
+
+ return (VID_BLOCK_MAPPED);
+}
+
+void
+vhd_close(struct vdisk_dev *vdisk)
+{
+ struct list_head *ptr, *tmp;
+ vd_file_t *vf;
+ vhd_file_t *vhd;
+ int err;
+
+ if (vdisk == NULL) {
+ VIDDBG(0, "Invalid vdisk pointer\n");
+ return;
+ }
+
+ list_for_each_safe(ptr, tmp, &vdisk->vdf_head) {
+
+ vf = list_entry(ptr, vd_file_t, vdf_list);
+ if (vf == NULL) {
+ VIDDBG(0, "Invalid vdisk file pointer\n");
+ return;
+ }
+
+ vhd = vf->vdf;
+ if (vhd) {
+ VIDDBG(10, "VHD Stats for %s: \n"
+ "\t accesses:\t%" PRId64 "\n"
+ "\t cache_hit:\t%" PRId64 "\n"
+ "\t block_alloc:\t%" PRId64 "\n"
+ "\t sec_alloc:\t%" PRId64 "\n"
+ "\t total IOs:\t%" PRId64 "\n"
+ "\t busy:\t%" PRId64 "\n"
+ "\t sync:\t%" PRId64 "\n"
+ "\t async:\t%" PRId64 "\n",
+ vf->name,
+ vhd->stats.access,
+ vhd->stats.cache_hit,
+ vhd->stats.block_alloc,
+ vhd->stats.sec_alloc,
+ vdisk->tot_io,
+ vdisk->busyio,
+ vdisk->syncio,
+ vdisk->asyncio);
+
+ if (vhd->ftr_mem)
+ vdisk_free(vhd->ftr_mem);
+ if (vhd->dhdr_mem)
+ vdisk_free(vhd->dhdr_mem);
+ if (vhd->bat_mem)
+ vdisk_free(vhd->bat_mem);
+ if (vhd->sec_mem)
+ vdisk_free(vhd->sec_mem);
+ vdisk_free(vhd);
+ }
+
+ list_del(&vf->vdf_list);
+
+ err = vdisk_close(vf->fd);
+ if (err)
+ VIDDBG(0, "close(%s): %s\n", vf->name, strerror(errno));
+
+ vdisk_free(vf);
+
+ if (list_empty(&vdisk->vdf_head))
+ break;
+ }
+}
+
+
+int vhd_open(struct vdisk_dev *vdisk, char *filename)
+{
+ int ret = 0;
+ int err;
+ vd_file_t *vf, *child_vf = NULL;
+ char *f, *child = NULL;
+ vhd_file_t *vhd;
+ int rw;
+
+ if (vdisk->flags & VDISK_RO)
+ rw = O_RDONLY;
+ else
+ rw = O_RDWR;
+
+ f = (char *)filename;
+
+ while (f != NULL) { // Read all file associated with this VD file
+
+ vf = (vd_file_t *)vdisk_malloc(sizeof(vd_file_t));
+ if (vf == NULL) {
+ VIDDBG(0, "Couldn't allocate vd_file structure\n");
+ vdisk_free(vdisk);
+ return (ENOMEM);
+ }
+ memset(vf, 0, sizeof(vd_file_t));
+
+ if (strlen(f) > PATH_MAX) {
+ strncpy(vf->name, f, PATH_MAX-1);
+ vf->name[PATH_MAX] = 0;
+ } else
+ strcpy(vf->name, f);
+
+ vf->fd = vdisk_open(f, rw, 0);
+ if (vf->fd < 0) {
+ VIDDBG(0, "Failed to open %s\n", f);
+ vdisk_free(vf);
+ vhd_close((void *)vdisk);
+ return (errno);
+ }
+ err = vhd_read_metadata(vdisk, vf);
+ if (err) {
+ VIDDBG(0, "Couldn't read metadata for %s\n", f);
+ vdisk_free(vf);
+ vhd_close((void *)vdisk);
+ return (err);
+ }
+
+
+ if (child_vf == NULL) {
+ vf->flags |= VDF_LEAF;
+ rw = O_RDONLY; // for next iteration
+ }
+
+#if 0
+ // If this is a parent, verify paternity
+ if (!vhd_isfamily(vf, child_vf)) {
+ VIDDBG(0, "%s is not parent of %s\n",
+ f, child_vf);
+ }
+#endif
+
+ list_add_tail(&vf->vdf_list, &vdisk->vdf_head);
+
+ vhd = (vhd_file_t *)(vf->vdf);
+ if (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DIFF ) {
+ int i;
+
+ child = f;
+ child_vf = vf;
+
+ for (i=0;i<8;i++) {
+ ple_t ple;
+ int fd;
+
+ vhd_get_dhdr_ple(vhd->dhdr, &ple, i);
+ if (ple.code == VHD_DYN_PLE_ABS ||
+ ple.code == VHD_DYN_PLE_REL ) {
+ f = vhd_get_parent_name(vf, &ple);
+ if (f == NULL) {
+ VIDDBG(0, "Can't locate parent "
+ "info for %s\n", f);
+ ret = EINVAL;
+ goto out;
+ }
+
+ // stat would be better
+ fd = open(f, O_RDONLY);
+ if (fd < 0) {
+ if (errno == ENOENT ||
+ errno == ELOOP ||
+ errno == ENOTDIR ||
+ errno == ENODEV ||
+ errno == EFAULT) {
+ continue;
+ } else {
+ VIDDBG(0, "stat(%s):
%s\n",
+ f,
strerror(errno));
+ ret = errno;
+ goto out;
+ }
+ } else {
+ (void)close(fd);
+ break;
+ }
+ }
+ }
+ } else
+ break;
+ }
+out:
+ return ret;
+}
+
+uint64_t
+vhd_size(void *hdl)
+{
+ struct vdisk_dev *vdisk = (struct vdisk_dev *)hdl;
+ return (vdisk->sz);
+}
+
+int
+vhd_get_geometry(void *hdl, int *cyls, int *heads, int *secs)
+{
+ struct vdisk_dev *vdisk = (struct vdisk_dev *)hdl;
+ vd_file_t *vf = NULL;
+ struct list_head *ptr;
+ vhd_file_t *vhd;
+ uint32_t geom;
+
+
+ // Assume that the last file (base) has all the info
+ list_for_each(ptr, &vdisk->vdf_head)
+ vf = list_entry(ptr, vd_file_t, vdf_list);
+
+ if (!vf) {
+ VIDDBG(0, "Can't find base file\n");
+ return (-1);
+ }
+
+ vhd = (vhd_file_t *)vf->vdf;
+ if (vhd == NULL) {
+ VIDDBG(0, "Can't find VHD data\n");
+ return (-1);
+ }
+ geom = vhd_get_ftr_geom(vhd->ftr);
+
+ *cyls = (geom >> 16) & 0xffff;
+ *heads = (geom >> 8) & 0xff;
+ *secs = geom & 0xff;
+
+ VIDDBG(10, "geom = 0x%x (0x%x 0x%x 0x%x)\n", geom, *cyls, *heads,
*secs);
+
+ return (0);
+}
+
+vdf_data_t vdfd_vhd = {
+ VHD_EXTENSION,
+ vhd_open,
+ vhd_close,
+ vhd_map_block,
+ vhd_xfer_commit,
+ vhd_print_header,
+ vhd_parse_args,
+ vhd_create_vdisk,
+ vhd_modify_vdisk,
+ {NULL,NULL},
+};
+
+void
+vhd_init()
+{
+ vhd_zeroes = __vhd_zeroes;
+ while ((addr_t)vhd_zeroes & 511) vhd_zeroes++;
+
+ vdisk_register(&vdfd_vhd);
+ memset(vhd_zeroes, 0, VHD_FTR_SZ);
+}
+
+void
+vhd_exit()
+{
+ vdisk_unregister(&vdfd_vhd);
+}
diff -r 75c61490cc06 tools/vdisk/vhd.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd.h Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,107 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VHD_H
+#define __VHD_H
+
+#define VHD_EXTENSION "vhd"
+
+#define VHD_FTR_SZ (512)
+#define VHD_DHDR_SZ (1024)
+
+#define VHD_BAT_INVALID_ENTRY (0xffffffff)
+
+#define VHD_CACHE_SZ (16)
+#define VHD_INVALID_SECTOR (0xffffffff)
+
+//Parent locator entry
+typedef struct ple {
+ uint32_t code;
+ uint32_t data_space;
+ uint32_t data_len;
+ uint32_t reserved; // XXX: do we care?
+ uint64_t data_off;
+} ple_t;
+
+typedef struct vhd_cache {
+ char *secmap_chunk; // 512b chunk of block's sectormap
+ char *sec_mem;
+ uint32_t first_sector; // First sector of the sectormap
+ uint32_t phys_first_sector; // Sector in the file that first_sector
+ // maps to
+ struct vhd_cache *next, *prev;
+} vhd_cache_t;
+
+typedef struct vhd_stat {
+ uint64_t access;
+ uint64_t cache_hit;
+ uint64_t block_alloc;
+ uint64_t sec_alloc;
+} vhd_stat_t;
+
+typedef struct vhd_xfer {
+ // sectormap *must* be first member!
+ char secmap_chunk[512]; // 512b chunk of sectormap.
+ off_t secmap_addr; // Address of the chunk
+ int sector_bit; // bit to be set in sectormap chunk
+ int num_secs;
+ vhd_cache_t *cache;
+ int first_sector;
+ file_t fd;
+ void *mem; // memory for vhd_xfer
+} vhd_xfer_t;
+
+typedef struct vhd_file {
+ char *secmap_chunk; // 512B-aligned block of sectormap.
+ char *sec_mem; // memory for sectormap section
+ char *ftr; // 512B-aligned footer
+ char *ftr_mem; // memory for footer
+ char *dhdr; // 512B-aligned dynamic header
+ char *dhdr_mem; // memory for dynamic header
+ uint32_t *bat; // 512B-aligned Block Allocation Table
+ uint32_t *bat_mem; // memory for BAT
+ vhd_cache_t cache[VHD_CACHE_SZ];
+ vhd_cache_t *cache_head;
+ vhd_cache_t *cache_tail;
+ vhd_stat_t stats;
+ int sec_per_block_log;
+ int sectormap_sz;
+ off_t next_block_off;
+} vhd_file_t;
+
+
+#define VHD_ARG_SZ (1<<0)
+#define VHD_ARG_TYPE (1<<1)
+#define VHD_ARG_BLOCKSZ (1<<2)
+#define VHD_ARG_UUID (1<<3)
+#define VHD_ARG_TIME (1<<4)
+#define VHD_ARG_PARENT (1<<5)
+
+
+typedef struct vhd_args {
+ size_t vhd_sz;
+ uint8_t type;
+ size_t blocksz;
+ uint8_t uuid[16];
+ char *parent;
+ uint64_t args_mask;
+} vhd_args_t;
+
+
+extern vdf_data_t vdfd_vhd;
+extern char *vhd_zeroes; // Just a bunch of zeroes
+
+extern int vhd_print_header(vd_file_t *vf);
+extern int vhd_parse_args(int argc, int operations, char *argv[], void **args);
+extern int vhd_create_vdisk(char *filename, void *args);
+extern int vhd_modify_vdisk(struct vdisk_dev *vdisk, void *args);
+extern char *vhd_get_parent_name(vd_file_t *vf, ple_t *ple);
+extern void vhd_init(void);
+extern void vhd_exit(void);
+
+#endif /* __VHD_H */
diff -r 75c61490cc06 tools/vdisk/vhd_footer.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd_footer.h Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,316 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VHD_FOOTER_H
+#define __VHD_FOOTER_H
+
+#include <string.h>
+#include <linux/types.h>
+#include <linux/byteorder/swab.h>
+
+#define VHD_COOKIE (uint64_t) ( (uint64_t)'c' \
+ | ((uint64_t)'o'<<(8*1)) \
+ | ((uint64_t)'n'<<(8*2)) \
+ | ((uint64_t)'e'<<(8*3)) \
+ | ((uint64_t)'c'<<(8*4)) \
+ | ((uint64_t)'t'<<(8*5)) \
+ | ((uint64_t)'i'<<(8*6)) \
+ | ((uint64_t)'x'<<(8*7)))
+
+#define VHD_FEATURES_NONE (0)
+#define VHD_FEATURES_TEMP (1)
+#define VHD_FEATURES_RSVD (2)
+
+#define VHD_FORMAT_VER_1 (0x00010000)
+
+/* data offset for fixed disks */
+#define VHD_FIXED_OFFSET ((uint64_t)-1)
+
+#define VHD_CREATOR_APP ((uint32_t)'v' \
+ | ((uint32_t)'i'<<8) \
+ | ((uint32_t)'t'<<16) \
+ | ((uint32_t)'l'<<24))
+#define VHD_CREATOR_VER_1 (0x00010000)
+
+#define VHD_CREATOR_HOST_OS ((uint32_t)'L' \
+ | ((uint32_t)'i'<<8) \
+ | ((uint32_t)'n'<<16) \
+ | ((uint32_t)'x'<<24))
+
+#define VHD_TYPE_NONE (0)
+#define VHD_TYPE_FIXED (2)
+#define VHD_TYPE_DYNAMIC (3)
+#define VHD_TYPE_DIFF (4)
+
+
+
+#define VHD_GEOM(c,h,s) { \
+ ASSERT((c<=0xffff) && (h<=0xff) && (s<=0xff)) ; \
+ (s | (h<<8) | (c<<16)); }
+
+
+static inline uint64_t vhd_get_ftr_cookie(char *ftr) {
+ uint64_t tmp = *(uint64_t *)(&ftr[0]);
+ return (tmp);
+}
+static inline void vhd_set_ftr_cookie(char *ftr, uint64_t val) {
+ uint64_t tmp = val;
+ *(uint64_t *)(&ftr[0]) = tmp;
+}
+
+static inline uint32_t vhd_get_ftr_features(char *ftr) {
+ uint32_t tmp = *(uint32_t *)(&ftr[8]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_features(char *ftr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&ftr[8]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_fformat(char *ftr) {
+ uint32_t tmp = *(uint32_t *)(&ftr[12]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_fformat(char *ftr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&ftr[12]) = __arch__swab32(tmp);
+}
+
+static inline uint64_t vhd_get_ftr_dataoff(char *ftr) {
+ uint64_t tmp = *(uint64_t *)(&ftr[16]);
+ return __arch__swab64(tmp);
+}
+static inline void vhd_set_ftr_dataoff(char *ftr, uint64_t val) {
+ uint64_t tmp = val;
+ *(uint64_t *)(&ftr[16]) = __arch__swab64(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_timestamp(char *ftr) {
+ uint32_t tmp = *(uint32_t *)(&ftr[24]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_timestamp(char *ftr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&ftr[24]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_cr_app(char *ftr) {
+ uint32_t tmp = *(uint32_t *)(&ftr[28]);
+ return (tmp);
+}
+static inline void vhd_set_ftr_cr_app(char *ftr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&ftr[28]) = tmp;
+}
+
+static inline uint32_t vhd_get_ftr_cr_ver(char *ftr) {
+ uint32_t tmp = *(uint32_t *)(&ftr[32]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_cr_ver(char *ftr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&ftr[32]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_cr_hostos(char *ftr) {
+ uint32_t tmp = *(uint32_t *)(&ftr[36]);
+ return (tmp);
+}
+static inline void vhd_set_ftr_cr_hostos(char *ftr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&ftr[36]) = tmp;
+}
+
+static inline uint64_t vhd_get_ftr_orig_sz(char *ftr) {
+ uint64_t tmp = *(uint64_t *)(&ftr[40]);
+ return __arch__swab64(tmp);
+}
+static inline void vhd_set_ftr_orig_sz(char *ftr, uint64_t val) {
+ uint64_t tmp = val;
+ *(uint64_t *)(&ftr[40]) = __arch__swab64(tmp);
+}
+
+static inline uint64_t vhd_get_ftr_cur_sz(char *ftr) {
+ uint64_t tmp = *(uint64_t *)(&ftr[48]);
+ return __arch__swab64(tmp);
+}
+static inline void vhd_set_ftr_cur_sz(char *ftr, uint64_t val) {
+ uint64_t tmp = val;
+ *(uint64_t *)(&ftr[48]) = __arch__swab64(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_geom(char *ftr) {
+ uint32_t tmp = *(uint32_t *)(&ftr[56]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_geom(char *ftr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&ftr[56]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_type(char *ftr) {
+ uint32_t tmp = *(uint32_t *)(&ftr[60]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_type(char *ftr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&ftr[60]) = __arch__swab32(tmp);
+}
+
+#define VHD_FTR_CHKSUM_OFF (64)
+static inline uint32_t vhd_get_ftr_chksum(char *ftr) {
+ uint32_t tmp = *(uint32_t *)(&ftr[64]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_chksum(char *ftr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&ftr[64]) = __arch__swab32(tmp);
+}
+
+static inline uint8_t *vhd_get_ftr_uid(char *ftr) {
+ return (uint8_t *)&ftr[68];
+}
+static inline void vhd_set_ftr_uid(char *ftr, uint8_t *val) {
+ memcpy(&ftr[68], val, 16);
+}
+
+static inline uint8_t vhd_get_ftr_saved_state(char *ftr) {
+ uint8_t tmp = *(uint8_t *)(&ftr[84]);
+ return (tmp);
+}
+static inline void vhd_set_ftr_saved_state(char *ftr, uint8_t val) {
+ uint8_t tmp = val;
+ *(uint8_t *)(&ftr[84]) = tmp;
+}
+
+
+
+#define VHD_DYN_COOKIE (uint64_t) ( (uint64_t)'c' \
+ | ((uint64_t)'x'<<(8*1)) \
+ | ((uint64_t)'s'<<(8*2)) \
+ | ((uint64_t)'p'<<(8*3)) \
+ | ((uint64_t)'a'<<(8*4)) \
+ | ((uint64_t)'r'<<(8*5)) \
+ | ((uint64_t)'s'<<(8*6)) \
+ | ((uint64_t)'e'<<(8*7)))
+
+#define VHD_DYN_OFFSET ((uint64_t)-1)
+#define VHD_DYN_HDR_VER_1 (0x00010000)
+
+// Parent locator codes (our own)
+#define VHD_DYN_PLE_ABS ( (uint64_t)'u' \
+ | ((uint64_t)'x'<<(8*1)) \
+ | ((uint64_t)'n'<<(8*2)) \
+ | ((uint64_t)'L'<<(8*3)))
+#define VHD_DYN_PLE_REL ( (uint64_t)'k' \
+ | ((uint64_t)'x'<<(8*1)) \
+ | ((uint64_t)'n'<<(8*2)) \
+ | ((uint64_t)'L'<<(8*3)))
+
+
+static inline uint64_t vhd_get_dhdr_cookie(char *hdr) {
+ uint64_t tmp = *(uint64_t *)(&hdr[0]);
+ return (tmp);
+}
+static inline void vhd_set_dhdr_cookie(char *hdr, uint64_t val) {
+ uint64_t tmp = val;
+ *(uint64_t *)(&hdr[0]) = tmp;
+}
+
+static inline uint64_t vhd_get_dhdr_dataoff(char *hdr) {
+ uint64_t tmp = *(uint64_t *)(&hdr[8]);
+ return __arch__swab64(tmp);
+}
+static inline void vhd_set_dhdr_dataoff(char *hdr, uint64_t val) {
+ uint64_t tmp = val;
+ *(uint64_t *)(&hdr[8]) = __arch__swab64(tmp);
+}
+
+static inline uint64_t vhd_get_dhdr_tbloff(char *hdr) {
+ uint64_t tmp = *(uint64_t *)(&hdr[16]);
+ return __arch__swab64(tmp);
+}
+static inline void vhd_set_dhdr_tbloff(char *hdr, uint64_t val) {
+ uint64_t tmp = val;
+ *(uint64_t *)(&hdr[16]) = __arch__swab64(tmp);
+}
+
+static inline uint32_t vhd_get_dhdr_hdrver(char *hdr) {
+ uint32_t tmp = *(uint32_t *)(&hdr[24]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_hdrver(char *hdr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&hdr[24]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_dhdr_tbl_entries(char *hdr) {
+ uint32_t tmp = *(uint32_t *)(&hdr[28]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_tbl_entries(char *hdr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&hdr[28]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_dhdr_blksz(char *hdr) {
+ uint32_t tmp = *(uint32_t *)(&hdr[32]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_blksz(char *hdr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&hdr[32]) = __arch__swab32(tmp);
+}
+
+#define VHD_DHDR_CHKSUM_OFF (36)
+static inline uint32_t vhd_get_dhdr_chksum(char *hdr) {
+ uint32_t tmp = *(uint32_t *)(&hdr[36]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_chksum(char *hdr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&hdr[36]) = __arch__swab32(tmp);
+}
+
+static inline uint8_t *vhd_get_dhdr_puid(char *hdr) {
+ return (uint8_t *)&hdr[40];
+}
+static inline void vhd_set_dhdr_puid(char *hdr, uint8_t *val) {
+ memcpy(&hdr[40], val, 16);
+}
+
+static inline uint32_t vhd_get_dhdr_ptimestamp(char *hdr) {
+ uint32_t tmp = *(uint32_t *)(&hdr[56]);
+ return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_ptimestamp(char *hdr, uint32_t val) {
+ uint32_t tmp = val;
+ *(uint32_t *)(&hdr[56]) = __arch__swab32(tmp);
+}
+
+static inline void vhd_get_dhdr_ple(char *hdr, ple_t *ple, int idx) {
+ char *tmp = &hdr[576+24*idx];
+
+ ple->code = __arch__swab32(*(uint32_t *)tmp);
+ ple->data_space = __arch__swab32(*(uint32_t *)(tmp+4));
+ ple->data_len = __arch__swab32(*(uint32_t *)(tmp+8));
+ ple->data_off = __arch__swab64(*(uint64_t *)(tmp+16));
+}
+
+static inline void vhd_set_dhdr_ple(char *hdr, ple_t *ple, int idx) {
+ char *tmp = &hdr[576+24*idx];
+
+ *(uint32_t *)(tmp) = __arch__swab32(ple->code);
+ *(uint32_t *)(tmp+4) = __arch__swab32(ple->data_space);
+ *(uint32_t *)(tmp+8) = __arch__swab32(ple->data_len);
+ *(uint64_t *)(tmp+16) = __arch__swab64(ple->data_off);
+}
+
+
+
+#endif /* __VHD_FOOTER_H */
diff -r 75c61490cc06 tools/vdisk/vhd_utils.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd_utils.c Thu Jun 21 13:05:31 2007 -0400
@@ -0,0 +1,964 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/stddef.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#include "vdisk.h"
+#include "vdisk_utils.h"
+#include "vhd.h"
+#include "vhd_footer.h"
+
+
+// Taken from Microsoft's VHD spec (hence notations...)
+static uint32_t
+vhd_chs(ssize_t sz)
+{
+ uint32_t totalSectors = (sz >> 9); // Assume sz in whole #sectors
+ int sectorsPerTrack, heads, cylinderTimesHeads, cylinders;
+
+
+ if (totalSectors > 65535 * 16 * 255)
+ totalSectors = 65535 * 16 * 255;
+
+ if (totalSectors >= 65535 * 16 * 63) {
+ sectorsPerTrack = 255;
+ heads = 16;
+ cylinderTimesHeads = totalSectors / sectorsPerTrack;
+ } else {
+ sectorsPerTrack = 17;
+ cylinderTimesHeads = totalSectors / sectorsPerTrack;
+
+ heads = (cylinderTimesHeads + 1023) / 1024;
+
+ if (heads < 4)
+ heads = 4;
+
+ if (cylinderTimesHeads >= (heads * 1024) || heads > 16) {
+ sectorsPerTrack = 31;
+ heads = 16;
+ cylinderTimesHeads = totalSectors / sectorsPerTrack;
+ }
+
+ if (cylinderTimesHeads >= (heads * 1024)) {
+ sectorsPerTrack = 63;
+ heads = 16;
+ cylinderTimesHeads = totalSectors / sectorsPerTrack;
+ }
+ }
+ cylinders = cylinderTimesHeads / heads;
+
+ return (VHD_GEOM(cylinders, heads, sectorsPerTrack));
+}
+
+uint32_t
+vhd_chksum(char *ptr, size_t sz, char *excl)
+{
+ uint32_t chksum = 0;
+ int i;
+
+ if (ptr == NULL)
+ return (0);
+
+ for (i=0; i<sz; i++)
+ chksum += (uint8_t)ptr[i];
+
+ if (excl != NULL) {
+ // Subtract 4 bytes of checksum
+ chksum -= (excl[0] + excl[1] + excl[2] + excl[3]);
+ }
+
+ return (~chksum);
+}
+
+
+static char *
+vhd_time(uint32_t *file_time, uint32_t *cur_time, int f2c)
+{
+ time_t tm, tm_1970, tm_2000;
+ char *timestr;
+ struct tm epoch_1970 = {0,0,0,1,0,70,0,0,0};
+ struct tm epoch_2000 = {0,0,0,1,0,100,0,0,0};
+
+
+ tm_1970 = mktime(&epoch_1970);
+ tm_2000 = mktime(&epoch_2000);
+
+ if (f2c) {
+ if (file_time == NULL) {
+ VIDDBG(0, "Invalid time\n");
+ if (cur_time != NULL)
+ *cur_time = 0;
+ return NULL;
+ }
+
+ tm = *file_time + (tm_2000-tm_1970);
+ timestr = ctime(&tm);
+ if (timestr == NULL) {
+ VIDDBG(0, "Couldn't convert time (0x%x)\n", *file_time);
+ return NULL;
+ }
+ if (cur_time != NULL)
+ *cur_time = tm;
+ timestr[strlen(timestr)-1] = '\0';
+ } else {
+ if (cur_time == NULL) {
+ VIDDBG(0, "Invalid time\n");
+ if (file_time != NULL)
+ *file_time = 0;
+ return NULL;
+ }
+ tm = *cur_time - (tm_2000-tm_1970);
+ timestr = ctime(&tm);
+ if (timestr == NULL) {
+ VIDDBG(0, "Couldn't convert time (0x%x)\n", *cur_time);
+ return NULL;
+ }
+ if (file_time != NULL)
+ *file_time = tm;
+ timestr[strlen(timestr)-1] = '\0';
+ }
+
+ return timestr;
+}
+
+
+char *
+vhd_get_parent_name(vd_file_t *vf, ple_t *ple)
+{
+ char *cp, *buf, *pool;
+ int bytes;
+
+ if ((ple == NULL) || (ple->data_len == 0)) {
+ VIDDBG(0, "Invalid data\n");
+ return (NULL);
+ }
+
+ // The file is opened with O_DIRECT, so we need to
+ // align buffer on 512-byte boundary
+ pool = buf = vdisk_malloc(ple->data_space+512);
+ if (buf == NULL) {
+ perror("malloc");
+ return (NULL);
+ }
+ while((addr_t)buf & 511) buf++;
+
+ if (lseek(vf->fd, ple->data_off, SEEK_SET) != ple->data_off) {
+ perror("lseek");
+ vdisk_free(pool);
+ return NULL;
+ }
+
+ bytes = read(vf->fd, buf, (size_t)ple->data_space);
+ if (bytes != ple->data_space) {
+ perror("read");
+ vdisk_free(pool);
+ VIDDBG(0, "fd = %d\n", vf->fd);
+ return NULL;
+ }
+
+ cp = vdisk_malloc(ple->data_len+1);
+ if (cp == NULL) {
+ perror("malloc");
+ vdisk_free(pool);
+ return (NULL);
+ }
+
+ strncpy(cp, buf, ple->data_len);
+ buf[ple->data_len] = 0;
+ vdisk_free(pool);
+
+ //XXX: for codes W2Ru and W2ku we need to convert from UTF-16 to ASCII
+ return cp;
+}
+
+int
+vhd_print_header(vd_file_t *vf)
+{
+ char *cp;
+ uint64_t v64;
+ uint32_t v32;
+ vhd_file_t *vhd = (vhd_file_t *)(vf->vdf);
+ size_t sz, max_sz;
+ int i;
+ int err;
+
+ // Figure out max file size
+
+ err = vdisk_size(vf->fd, &sz);
+ if (err) {
+ VIDDBG(0, "Couldn't get file size\n");
+ return (err);
+ }
+
+ if (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_FIXED)
+ max_sz = sz;
+ else {
+ uint64_t unmapped_blocks = 0;
+ size_t new_bytes;
+
+ // Count blocks that haven't been allocated
+ for (i=0; i< vhd_get_dhdr_tbl_entries(vhd->dhdr); i++)
+ if (__arch__swab32(vhd->bat[i]) ==
+ VHD_BAT_INVALID_ENTRY)
+ unmapped_blocks++;
+
+ // XXX: Assume that block size is in 512-byte chunks
+ new_bytes = unmapped_blocks * (vhd->sectormap_sz +
+ vhd_get_dhdr_blksz(vhd->dhdr));
+ max_sz = sz + new_bytes;
+ }
+
+ printf("FILE %s:\n", vf->name);
+ printf("\tMaximum file size:\t0x%016zx\n\n", max_sz);
+
+
+ v64 = vhd_get_ftr_cookie(vhd->ftr);
+ cp = (char *)&v64;
+ printf("\tCookie:\t\t\t0x%016" PRIx64 " (\"%c%c%c%c%c%c%c%c\")\n", v64,
+ cp[0], cp[1], cp[2], cp[3], cp[4], cp[5], cp[6], cp[7]);
+
+ printf("\tFeatures:\t\t0x%08x\n", vhd_get_ftr_features(vhd->ftr));
+ printf("\tFile format vervion:\t0x%08x\n",
vhd_get_ftr_fformat(vhd->ftr));
+ printf("\tData Offset:\t\t0x%016" PRIx64 "\n",
+ vhd_get_ftr_dataoff(vhd->ftr));
+
+ v32 = vhd_get_ftr_timestamp(vhd->ftr);
+ printf("\ttimestamp:\t\t0x%08x (%s)\n", v32, vhd_time(&v32, NULL, 1));
+
+ printf("\tCreator App:\t\t0x%08x\n", vhd_get_ftr_cr_app(vhd->ftr));
+ printf("\tCreator Ver:\t\t0x%08x\n", vhd_get_ftr_cr_ver(vhd->ftr));
+ printf("\tCreator Host OS:\t0x%08x\n", vhd_get_ftr_cr_hostos(vhd->ftr));
+ printf("\tOriginal size:\t\t0x%016" PRIx64 "\n",
+ vhd_get_ftr_orig_sz(vhd->ftr));
+ printf("\tCurrent size:\t\t0x%016" PRIx64 "\n",
+ vhd_get_ftr_cur_sz(vhd->ftr));
+ printf("\tGeometry:\t\t0x%08x\n", vhd_get_ftr_geom(vhd->ftr));
+ printf("\tType:\t\t\t0x%08x\n", vhd_get_ftr_type(vhd->ftr));
+ printf("\tChecksum:\t\t0x%08x\n", vhd_get_ftr_chksum(vhd->ftr));
+
+ printf("\tUnique ID:\t\t");
+ cp = (char *)vhd_get_ftr_uid(vhd->ftr);
+ for (i=0;i<16;i++)
+ printf("%02x", (*cp++) & 0xff);
+
+ printf("\n\tSaved state:\t\t0x%08x\n",
vhd_get_ftr_saved_state(vhd->ftr));
+ if ((vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DYNAMIC ) ||
+ (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DIFF )) {
+
+ printf(" Dynamic Header:\n");
+
+ v64 = vhd_get_dhdr_cookie(vhd->dhdr);
+ cp = (char *)&v64;
+ printf("\t Cookie:\t\t0x%016" PRIx64 "
(\"%c%c%c%c%c%c%c%c\")\n",
+ v64, cp[0], cp[1], cp[2], cp[3], cp[4], cp[5], cp[6],
cp[7]);
+ printf("\t Data Offset:\t\t0x%016" PRIx64 "\n",
+ vhd_get_dhdr_dataoff(vhd->dhdr));
+ printf("\t Table Offset:\t\t0x%016" PRIx64 "\n",
+ vhd_get_dhdr_tbloff(vhd->dhdr));
+ printf("\t Max Table Entries:\t0x%08x\n",
+ vhd_get_dhdr_tbl_entries(vhd->dhdr));
+ printf("\t Block Size:\t\t0x%08x\n",
+ vhd_get_dhdr_blksz(vhd->dhdr));
+ printf("\t Checksum:\t\t0x%08x\n",
+ vhd_get_dhdr_chksum(vhd->dhdr));
+ }
+
+ if (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DIFF ) {
+
+ printf("\t Parent Unique ID:\t");
+ cp = (char *)vhd_get_dhdr_puid(vhd->dhdr);
+ for (i=0;i<16;i++)
+ printf("%02x", (*cp++) & 0xff);
+ v32 = vhd_get_dhdr_ptimestamp(vhd->dhdr);
+ printf("\n\t Parent Timestamp:\t0x%08x (%s)\n",
+ v32, vhd_time(&v32, NULL, 1));
+
+ for (i=0;i<8;i++) {
+ ple_t ple;
+
+ vhd_get_dhdr_ple(vhd->dhdr, &ple, i);
+ if (ple.code != 0) {
+ printf("\t Parent Locator Entry %d:\n", i);
+
+ cp = (char *)&ple.code;
+ printf("\t\tPlatform Code:\t0x%08x "
+ "(\"%c%c%c%c\")\n",
+ ple.code, cp[3], cp[2], cp[1], cp[0]);
+ printf("\t\tData Space:\t0x%08x\n",
+ ple.data_space);
+ printf("\t\tData Length:\t0x%08x\n",
+ ple.data_len);
+ printf("\t\tData Offset:\t0x%016" PRIx64" \n",
+ ple.data_off);
+ cp = vhd_get_parent_name(vf, &ple);
+ if (cp == NULL) {
+ VIDDBG(0, "Can't locate parent info "
+ "in file\n");
+ continue;
+ }
+ printf("\t\tParent Locator:\t%s\n", cp);
+ free(cp);
+ }
+ }
+ }
+
+ return (0);
+}
+
+int
+vhd_parse_args(int argc, int operations, char *argv[], void **args)
+{
+ char c;
+ int i;
+ extern char *optarg;
+ extern int optind, opterr, optopt;
+ vhd_args_t *vhd_args;
+
+ void vhd_usage() {
+ fprintf(stderr, "VHD-specific options: "
+ "-S <size(MB)> [-f|-d [-p <parent>]] [-B <size(B)>]"
+ " [-u UUID] [-t]\n");
+ }
+
+ vhd_args = malloc(sizeof(vhd_args_t));
+ if (vhd_args == NULL) {
+ VIDDBG(0, "Can't allocate arguments\n");
+ return (-1);
+ }
+
+ memset(vhd_args, 0, sizeof(vhd_args_t));
+ vhd_args->type = VHD_TYPE_NONE;
+ vhd_args->blocksz = 0x200000; // 2MB
+
+ while (1) {
+
+ c = getopt(argc, argv, "S:fdstB:u:p:");
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'f':
+ vhd_args->type = VHD_TYPE_FIXED;
+ vhd_args->args_mask |= VHD_ARG_TYPE;
+ break;
+ case 's': // 's' for "sparse"
+ VIDDBG(0, "'-s' option is obsolete. Use '-d'
instead\n");
+ case 'd':
+ vhd_args->type = VHD_TYPE_DYNAMIC;
+ vhd_args->args_mask |= VHD_ARG_TYPE;
+ break;
+ case 'S':
+ vhd_args->vhd_sz = atol(optarg) * 1024 * 1024;
+ vhd_args->args_mask |= VHD_ARG_SZ;
+ break;
+ case 't':
+ vhd_args->args_mask |= VHD_ARG_TIME;
+ break;
+ case 'p':
+ vhd_args->args_mask |= VHD_ARG_PARENT;
+ vhd_args->parent = malloc(strlen(optarg)+1);
+ if (vhd_args->parent == NULL) {
+ VIDDBG(0, "Out of memory\n");
+ goto fail;
+ }
+ strncpy(vhd_args->parent, optarg, strlen(optarg)+1);
+ break;
+ case 'B':
+ vhd_args->blocksz = atol(optarg);
+ // Must be in 512 byte chunks
+ if (vhd_args->blocksz & 511) {
+ VIDDBG(0,
+ "block size must be divisible by 512\n");
+ goto fail;
+ }
+ vhd_args->args_mask |= VHD_ARG_BLOCKSZ;
+ break;
+ case 'u':
+ if ((optarg == NULL) || (strlen(optarg) != 32)) {
+ VIDDBG(0, "UUID is a 16-byte (32-character)"
+ " string\n");
+ goto fail;
+ }
+
+ // Convert UUID characters to hex
+ for(i=0;i<32;i++) {
+ uint8_t val;
+
+ val = optarg[i];
+ if (!isxdigit(val)) {
+ VIDDBG(0, "Invalid character in UUID "
+ "string ('%c')\n", optarg[i]);
+ free(vhd_args);
+ vhd_usage();
+ return (-1);
+ }
+ if (isalpha(val)) {
+ val = tolower(val);
+ val -= ('a' - 0xa);
+ } else
+ val -= '0';
+
+ // two hex numbers per byte
+ vhd_args->uuid[i>>1] |= (val << (4*((i&1)^1)));
+ }
+ vhd_args->args_mask |= VHD_ARG_UUID;
+ break;
+ default:
+ vhd_usage();
+ goto fail;
+ }
+ }
+
+ if ((vhd_args->parent != NULL) && (vhd_args->type == VHD_TYPE_FIXED)) {
+ VIDDBG(0, "Fixed VHD cannot have a parent\n");
+ goto fail;
+ }
+
+ if (operations & VDISK_OP_CREATE) {
+ if (vhd_args->parent == NULL) {
+ if ((vhd_args->vhd_sz == 0) ||
+ (vhd_args->type == VHD_TYPE_NONE))
+ goto fail;
+ }
+ }
+
+ if (vhd_args->vhd_sz % vhd_args->blocksz) {
+ VIDDBG(0, "File size must be multiple of block size\n");
+ goto fail;
+ }
+
+ if (operations & VDISK_OP_MODIFY) {
+ if (vhd_args->args_mask & VHD_ARG_PARENT) {
+ if (vhd_args->type == VHD_TYPE_FIXED) {
+ VIDDBG(0, "Fixed VHDs can't have parents\n");
+ goto fail;
+ }
+ }
+ if (vhd_args->args_mask & (VHD_ARG_SZ | VHD_ARG_BLOCKSZ)) {
+ VIDDBG(0, "Can't modify VHD's size or block size\n");
+ goto fail;
+ }
+ }
+
+ if (vhd_args->parent != NULL) {
+ vhd_args->type = VHD_TYPE_DIFF;
+ if (vhd_args->args_mask & (VHD_ARG_SZ | VHD_ARG_BLOCKSZ)) {
+ VIDDBG(0, "Differencing VHD's size and block size "
+ "are inherited from parent\n");
+ goto fail;
+ }
+ }
+
+
+ *args = vhd_args;
+ return (0);
+
+fail:
+ if (vhd_args->parent)
+ free(vhd_args->parent);
+ free(vhd_args);
+ vhd_usage();
+ return (-1);
+}
+
+// Store differencing file's parent information
+static int
+vhd_store_parent(int vfd, vhd_file_t *vhd, vhd_file_t *pvhd,
+ char *parentname, loff_t *data)
+{
+ uint32_t bat_sz;
+ ple_t ple;
+ int i;
+ int err;
+ size_t bytes;
+
+ vhd_set_dhdr_puid(vhd->dhdr, vhd_get_ftr_uid(pvhd->ftr));
+ vhd_set_dhdr_ptimestamp(vhd->dhdr,
+ vhd_get_ftr_timestamp(pvhd->ftr));
+
+ memset(&ple, 0, sizeof(ple_t));
+ for (i=0;i<8;i++)
+ vhd_set_dhdr_ple(vhd->dhdr, &ple, i);
+
+ if (parentname[0] == '/')
+ ple.code = VHD_DYN_PLE_ABS;
+ else
+ ple.code = VHD_DYN_PLE_REL;
+
+ // XXX: The spec says this is number of 512b sectors,
+ // but file created by MS's Virtual PC tool seems to
+ // think this is number of bytes, aligned at 512b
+ ple.data_space = (strlen(parentname) + 1 + 512)
+ & (~511);
+ ple.data_len = strlen(parentname) + 1;
+
+ bat_sz = vhd_get_dhdr_tbl_entries(vhd->dhdr) << 2;
+
+ ple.data_off = VHD_DHDR_SZ + VHD_FTR_SZ +
+ bat_sz +
+ ((bat_sz & 511) ? (512-(bat_sz&511)) : 0) +
+ 512; // XXX: see comment in vhd_create_vdisk()
+ vhd_set_dhdr_ple(vhd->dhdr, &ple, 0);
+
+ // Recalculate checksum
+ vhd_set_dhdr_chksum(vhd->dhdr,
+ vhd_chksum(vhd->dhdr, VHD_DHDR_SZ,
+ &vhd->dhdr[VHD_DHDR_CHKSUM_OFF]));
+
+ if (lseek(vfd, VHD_FTR_SZ, SEEK_SET) != VHD_FTR_SZ) {
+ err = errno;
+ VIDDBG(0, "lseek: %s", strerror(err));
+ return (err);
+ }
+
+ // Write the dynamic header
+ bytes = write(vfd, vhd->dhdr, VHD_DHDR_SZ);
+ if (bytes != VHD_DHDR_SZ) {
+ err = errno;
+ VIDDBG(0, "write: %s", strerror(err));
+ return (err);
+ }
+
+ // Write PLE
+ bytes = lseek(vfd, ple.data_off, SEEK_SET);
+ if (bytes != ple.data_off) {
+ err = errno;
+ VIDDBG(0, "lseek: %s", strerror(err));
+ return (err);
+ }
+ bytes = write(vfd, parentname, strlen(parentname)+1);
+ if (bytes != strlen(parentname)+1) {
+ err = errno;
+ VIDDBG(0, "write: %s", strerror(err));
+ return (err);
+ }
+
+ if (data != NULL)
+ *data = (loff_t)ple.data_off + (loff_t)ple.data_space;
+
+ return (0);
+}
+
+
+int
+vhd_modify_vdisk(struct vdisk_dev *vdisk, void *args)
+{
+ vhd_args_t *vhd_args = args;
+ vd_file_t *vf = NULL;
+ vhd_file_t *vhd;
+ size_t sz, bytes;
+ int err;
+ int store_footer = 0;
+ struct list_head *ptr;
+ int stop = 0;
+
+
+ // XXX: We always make a single pass
+ list_for_each(ptr, &vdisk->vdf_head) {
+
+ vf = list_entry(ptr, vd_file_t, vdf_list);
+ if ((vf == NULL) || (vf->vdf == NULL)) {
+ VIDDBG(0, "Can't access vdisk's structures\n");
+ return (-1);
+ }
+ vhd = (vhd_file_t *)vf->vdf;
+
+ // Close and reopen file (it may have been open O_DIRECT)
+ err = vdisk_close(vf->fd);
+ if (err) {
+ VIDDBG(0, "Can't close %s:%d\n", vf->name, err);
+ return (err);
+ }
+
+ vf->fd = open(vf->name, O_RDWR, 0644);
+ if (vf->fd == -1) {
+ err = errno;
+ VIDDBG(0, "Can't open %s:%d\n", vf->name,
strerror(errno));
+ return (err);
+ }
+
+ // Update UUID
+ if (vhd_args->args_mask & VHD_ARG_UUID) {
+
+ vhd_set_ftr_uid(vhd->ftr, vhd_args->uuid);
+
+ store_footer = 1;
+ stop = 1;
+ }
+
+ // Change parent name
+ if (vhd_args->args_mask & VHD_ARG_PARENT) {
+ vhd_file_t *pvhd;
+ struct vdisk_dev parent;
+ vd_file_t *pvf;
+
+ // Open parent file
+ err = vdisk_init(&parent, vhd_args->parent, NULL, 0);
+ if (err) {
+ VIDDBG(0, "Failed to initialize state for "
+ "parent %s\n", vhd_args->parent);
+ return (err);
+ }
+ pvf = list_entry(parent.vdf_head.next, vd_file_t,
vdf_list);
+ pvhd = (vhd_file_t *)pvf->vdf;
+
+ // Update dynamic header and parent data
+ err = vhd_store_parent(vf->fd, vhd, pvhd,
+ vhd_args->parent, NULL);
+ if (err) {
+ VIDDBG(0, "Failed to store parent name (%s)\n",
+ vhd_args->parent);
+ vdisk_fini(&parent);
+ return (err);
+ }
+ vdisk_fini(&parent);
+
+ store_footer = 1;
+ stop = 1;
+ }
+
+ // Update timestamp
+ if (vhd_args->args_mask & VHD_ARG_TIME) {
+ uint32_t curtime, ftime;
+
+ curtime = time(NULL);
+ if (curtime == -1) {
+ perror("time");
+ return (errno);
+ }
+ (void)vhd_time(&ftime, &curtime, 0);
+ vhd_set_ftr_timestamp(vhd->ftr, ftime);
+
+ stop = 1;
+ }
+
+ // Recompute footer's checksum
+ vhd_set_ftr_chksum(vhd->ftr,
+ vhd_chksum(vhd->ftr, VHD_FTR_SZ,
+ &vhd->ftr[VHD_FTR_CHKSUM_OFF]));
+
+ // Write the footer back if needed
+ if (store_footer) {
+
+ err = vdisk_size(vf->fd, &sz);
+ if (err != 0) {
+ VIDDBG(0, "Can't determine vdisk's size\n");
+ return (-1);
+ }
+
+ if (lseek(vf->fd, (sz-VHD_FTR_SZ), SEEK_SET) !=
+ (sz - VHD_FTR_SZ)) {
+ perror("lseek");
+ return (errno);
+ }
+ bytes = write(vf->fd, vhd->ftr, VHD_FTR_SZ);
+ if (bytes != VHD_FTR_SZ) {
+ perror("write");
+ return (errno);
+ }
+
+ // For non-fixed disks write footer at front as well
+ if (vhd_get_ftr_type(vhd->ftr) != VHD_TYPE_FIXED) {
+ if (lseek(vf->fd, 0, SEEK_SET) != 0) {
+ perror("lseek");
+ return (errno);
+ }
+ bytes = write(vf->fd, vhd->ftr, VHD_FTR_SZ);
+ if (bytes != VHD_FTR_SZ) {
+ perror("write");
+ return (errno);
+ }
+ }
+ }
+
+ if (stop)
+ break;
+ }
+
+ if (fsync(vf->fd))
+ VIDDBG(0, "fsync: %s\n", strerror(errno));
+
+ return (0);
+}
+
+int
+vhd_create_vdisk(char *filename, void *args)
+{
+ vhd_args_t *vhd_args = args;
+ vhd_file_t vhd;
+ uint32_t curtime, ftime;
+ int vfd = -1;
+ ssize_t bytes;
+ int i;
+ int err = 0;
+ char *hdr_pool = NULL, *ftr_pool = NULL;
+ struct vdisk_dev parent;
+
+ vfd = open(filename, O_CREAT|O_EXCL|O_RDWR, 0644);
+ if (vfd == -1) {
+ if (errno == EEXIST) {
+ size_t sz;
+
+ // File already exists
+ if (vhd_args->type != VHD_TYPE_FIXED) {
+ VIDDBG(0, "Raw files can only be converted to "
+ "fixed VHD format\n");
+ return (EINVAL);
+ }
+
+ vfd = open(filename, O_RDWR, 0644);
+ if (vfd == -1) {
+ err = errno;
+ VIDDBG(0, "vfd open(%s, O_RDWR) failed: %s\n",
+ filename, strerror(err));
+ return (err);
+ }
+
+ err = vdisk_size(vfd, &sz);
+ if (err) {
+ VIDDBG(0, "vdisk_size(%s) failed: %s\n",
+ filename, strerror(err));
+ return (err);
+ }
+
+ if (vhd_args->vhd_sz < sz) {
+ VIDDBG(0, "WARNING: Truncating %s (%ld bytes) "
+ "to %ld bytes\n",
+ filename, sz, vhd_args->vhd_sz);
+
+ err = ftruncate(vfd, vhd_args->vhd_sz);
+ if (err == -1) {
+ err = errno;
+ VIDDBG(0, "ftruncate(%s, %ld): %s\n",
+ filename, vhd_args->vhd_sz,
+ strerror(err));
+ return (err);
+ }
+ }
+ } else {
+ err = errno;
+ VIDDBG(0, "vfd open(%s, O_CREAT|O_EXCL|O_RDWR) "
+ "failed: %s\n", filename, strerror(err));
+ return (err);
+ }
+ }
+
+ parent.vdfd = NULL;
+
+ memset((char *)&vhd, 0, sizeof(vhd));
+ ftr_pool = vhd.ftr = vdisk_malloc(VHD_FTR_SZ+512);
+ if (vhd.ftr == NULL) {
+ VIDDBG(0, "Couldn't allocate VHD footer\n");
+ close(vfd);
+ return (ENOMEM);
+ }
+ while ((addr_t)vhd.ftr & 511) vhd.ftr++;
+
+ vhd_set_ftr_cookie(vhd.ftr, VHD_COOKIE);
+ vhd_set_ftr_features(vhd.ftr, VHD_FEATURES_RSVD);
+ vhd_set_ftr_fformat(vhd.ftr, VHD_FORMAT_VER_1);
+ vhd_set_ftr_type(vhd.ftr, vhd_args->type);
+
+ curtime = time(NULL);
+ if (curtime == -1) {
+ err = errno;
+ perror("time");
+ goto out;
+ }
+ (void)vhd_time(&ftime, &curtime, 0);
+ vhd_set_ftr_timestamp(vhd.ftr, ftime);
+
+ vhd_set_ftr_cr_app(vhd.ftr, VHD_CREATOR_APP);
+ vhd_set_ftr_cr_ver(vhd.ftr, VHD_CREATOR_VER_1);
+ vhd_set_ftr_cr_hostos(vhd.ftr, VHD_CREATOR_HOST_OS);
+ vhd_set_ftr_orig_sz(vhd.ftr, vhd_args->vhd_sz);
+ vhd_set_ftr_cur_sz(vhd.ftr, vhd_args->vhd_sz);
+ vhd_set_ftr_geom(vhd.ftr, vhd_chs(vhd_args->vhd_sz));
+
+ vhd_set_ftr_uid(vhd.ftr, vhd_args->uuid);
+
+ if (vhd_args->type == VHD_TYPE_FIXED)
+ vhd_set_ftr_dataoff(vhd.ftr, VHD_FIXED_OFFSET);
+ else if ((vhd_args->type == VHD_TYPE_DYNAMIC) ||
+ (vhd_args->type == VHD_TYPE_DIFF))
+ vhd_set_ftr_dataoff(vhd.ftr, VHD_FTR_SZ);
+ else
+ ASSERT(0);
+
+ vhd_set_ftr_chksum(vhd.ftr, vhd_chksum(vhd.ftr, VHD_FTR_SZ,
+ &vhd.ftr[VHD_FTR_CHKSUM_OFF]));
+
+ // Create dynamic header
+ if ((vhd_args->type == VHD_TYPE_DYNAMIC) ||
+ (vhd_args->type == VHD_TYPE_DIFF)) {
+
+ uint32_t bat_entry, bat_sz;
+ loff_t data;
+ vhd_file_t *pvhd = NULL;
+ vd_file_t *pvf;
+
+ if (vhd_args->type == VHD_TYPE_DIFF) {
+ // Read parent data
+ err = vdisk_init(&parent, vhd_args->parent, NULL, 0);
+ if (err) {
+ VIDDBG(0, "Failed to initialize state for "
+ "parent %s\n", vhd_args->parent);
+ return (err);
+ }
+ pvf = list_entry(parent.vdf_head.next,
+ vd_file_t, vdf_list);
+ pvhd = (vhd_file_t *)pvf->vdf;
+
+ // Update footer fields inherited from parent
+ vhd_set_ftr_orig_sz(vhd.ftr,
+ vhd_get_ftr_orig_sz(pvhd->ftr));
+ vhd_set_ftr_cur_sz(vhd.ftr,
+ vhd_get_ftr_cur_sz(pvhd->ftr));
+ vhd_set_ftr_geom(vhd.ftr,
+ vhd_get_ftr_geom(pvhd->ftr));
+
+ vhd_args->vhd_sz = vhd_get_ftr_cur_sz(vhd.ftr);
+ }
+
+ hdr_pool = vhd.dhdr = vdisk_malloc(VHD_DHDR_SZ+512);
+ if (vhd.dhdr == NULL) {
+ vdisk_free(ftr_pool);
+ VIDDBG(0, "Couldn't allocate dynamic header\n");
+ err = ENOMEM;
+ goto out;
+ }
+ while ((addr_t)vhd.dhdr & 511) vhd.dhdr++;
+
+ vhd_set_dhdr_cookie(vhd.dhdr, VHD_DYN_COOKIE);
+ vhd_set_dhdr_dataoff(vhd.dhdr, VHD_DYN_OFFSET);
+ vhd_set_dhdr_tbloff(vhd.dhdr, VHD_FTR_SZ+VHD_DHDR_SZ);
+ vhd_set_dhdr_hdrver(vhd.dhdr, VHD_DYN_HDR_VER_1);
+ vhd_set_dhdr_tbl_entries(vhd.dhdr,
+ vhd_args->vhd_sz/vhd_args->blocksz);
+ vhd_set_dhdr_blksz(vhd.dhdr, vhd_args->blocksz);
+
+ vhd_set_dhdr_chksum(vhd.dhdr,
+ vhd_chksum(vhd.dhdr, VHD_DHDR_SZ,
+ &vhd.dhdr[VHD_DHDR_CHKSUM_OFF]));
+
+ // Write the copy of the footer first
+ bytes = write(vfd, vhd.ftr, VHD_FTR_SZ);
+ if (bytes != VHD_FTR_SZ) {
+ perror("write");
+ err = errno;
+ goto out;
+ }
+
+ // Write the dynamic header
+ bytes = write(vfd, vhd.dhdr, VHD_DHDR_SZ);
+ if (bytes != VHD_DHDR_SZ) {
+ perror("write");
+ vdisk_free(vhd.dhdr);
+ close(vfd);
+ return (errno);
+ }
+
+ // Initialize BAT
+ // XXX: Make it faster perhaps?
+ bat_entry = VHD_BAT_INVALID_ENTRY;
+ for (i=0; i< vhd_get_dhdr_tbl_entries(vhd.dhdr); i++) {
+ bytes = write(vfd, &bat_entry, 4);
+ if (bytes != 4) {
+ err = errno;
+ perror("write");
+ goto out;
+ }
+ }
+
+ // BAT must end on sector boundary (512 bytes)
+ bat_entry = 0;
+ bat_sz = vhd_get_dhdr_tbl_entries(vhd.dhdr) << 2;
+ if (bat_sz & 511) {
+ for (i=0; i<512-(bat_sz&511);i++) {
+ // Write 1 byte at a time
+ bytes = write(vfd, &bat_entry, 1);
+ if (bytes != 1) {
+ perror("write");
+ err = errno;
+ goto out;
+ }
+ }
+ }
+
+ // XXX: It appears that there is a 512B block
+ // at the end of BAT, which is not mentioned in the spec
+ for (i=0; i<512>>2; i++) {
+ bytes = write(vfd, &bat_entry, 4);
+ if (bytes != 4) {
+ err = errno;
+ perror("write");
+ goto out;
+ }
+ }
+
+ if (vhd_args->type == VHD_TYPE_DIFF) {
+ // This will store dynamic header again, but that's OK
+ err = vhd_store_parent(vfd, &vhd, pvhd,
+ vhd_args->parent, &data);
+ if (err) {
+ VIDDBG(0, "Failed to store parent name (%s)\n",
+ vhd_args->parent);
+ return (err);
+ }
+
+ bytes = lseek(vfd, data, SEEK_SET);
+ if (bytes != data) {
+ err = errno;
+ perror("lseek");
+ goto out;
+ }
+ }
+ } else {
+ // for fixed disk, seek to the end of the file
+ if (lseek(vfd, vhd_args->vhd_sz, SEEK_SET) !=
+ vhd_args->vhd_sz) {
+ perror("lseek");
+ err = errno;
+ goto out;
+ }
+ }
+
+ // Write footer. For fixed disks allocate whole filesize
+ bytes = write(vfd, vhd.ftr, VHD_FTR_SZ);
+ if (bytes != VHD_FTR_SZ) {
+ perror("write");
+ err = errno;
+ goto out;
+ }
+
+out:
+ if (parent.vdfd != NULL)
+ vdisk_fini(&parent);
+
+ if (ftr_pool)
+ vdisk_free(ftr_pool);
+ if (hdr_pool)
+ vdisk_free(hdr_pool);
+ if (vfd != -1) {
+ if (fsync(vfd))
+ VIDDBG(0, "fsync: %s\n", strerror(errno));
+ close(vfd);
+ }
+
+ return (err);
+}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|