# HG changeset patch
# User cl349@xxxxxxxxxxxxxxxxxxxx
# Node ID 522bc50588eda1c0bba0562a16fe8edd1a715f09
# Parent 6783e59e1c45c858d76d0e101ac6f9a5a8fef4a7
# Parent df11e0709383f518b0c09ba50ec8d051170bb3c0
merge?
diff -r 6783e59e1c45 -r 522bc50588ed .hgignore
--- a/.hgignore Tue Aug 23 18:25:51 2005
+++ b/.hgignore Tue Aug 23 18:27:22 2005
@@ -147,6 +147,7 @@
^tools/xcs/xcsdump$
^tools/xcutils/xc_restore$
^tools/xcutils/xc_save$
+^tools/xenstat/xentop/xentop$
^tools/xenstore/testsuite/tmp/.*$
^tools/xenstore/xen$
^tools/xenstore/xenstored$
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Tue Aug
23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Tue Aug
23 18:27:22 2005
@@ -807,7 +807,107 @@
#
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
-# CONFIG_USB is not set
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+# CONFIG_USB_DEVICEFS is not set
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_EHCI_HCD is not set
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_UHCI_HCD=y
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed;
see USB_STORAGE Help for more information
+#
+# CONFIG_USB_STORAGE is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+CONFIG_USB_HIDINPUT=y
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Multimedia devices
+#
+# CONFIG_USB_DABUSB is not set
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+
+#
+# USB ATM/DSL drivers
+#
#
# USB Gadget Support
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Tue Aug 23
18:25:51 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Tue Aug 23
18:27:22 2005
@@ -149,12 +149,12 @@
if (cpu_is_offline(cpu)) {
local_irq_disable();
+#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
/* Ack it. From this point on until
we get woken up, we're not allowed
to take any locks. In particular,
don't printk. */
__get_cpu_var(cpu_state) = CPU_DEAD;
-#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
/* Tell hypervisor to take vcpu down. */
HYPERVISOR_vcpu_down(cpu);
#endif
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug 23 18:27:22 2005
@@ -1575,19 +1575,20 @@
/* Make sure we have a correctly sized P->M table. */
if (max_pfn != xen_start_info.nr_pages) {
phys_to_machine_mapping = alloc_bootmem_low_pages(
- max_pfn * sizeof(unsigned long));
+ max_pfn * sizeof(unsigned int));
if (max_pfn > xen_start_info.nr_pages) {
/* set to INVALID_P2M_ENTRY */
memset(phys_to_machine_mapping, ~0,
- max_pfn * sizeof(unsigned long));
+ max_pfn * sizeof(unsigned int));
memcpy(phys_to_machine_mapping,
- (unsigned long *)xen_start_info.mfn_list,
- xen_start_info.nr_pages * sizeof(unsigned
long));
+ (unsigned int *)xen_start_info.mfn_list,
+ xen_start_info.nr_pages * sizeof(unsigned int));
} else {
memcpy(phys_to_machine_mapping,
- (unsigned long *)xen_start_info.mfn_list,
- max_pfn * sizeof(unsigned long));
+ (unsigned int *)xen_start_info.mfn_list,
+ max_pfn * sizeof(unsigned int));
+ /* N.B. below relies on sizeof(int) == sizeof(long). */
if (HYPERVISOR_dom_mem_op(
MEMOP_decrease_reservation,
(unsigned long *)xen_start_info.mfn_list +
max_pfn,
@@ -1597,11 +1598,11 @@
free_bootmem(
__pa(xen_start_info.mfn_list),
PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
- sizeof(unsigned long))));
+ sizeof(unsigned int))));
}
pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
- for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
+ for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned int)), j++ )
{
pfn_to_mfn_frame_list[j] =
virt_to_mfn(&phys_to_machine_mapping[i]);
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c Tue Aug 23 18:27:22 2005
@@ -281,7 +281,7 @@
siginfo_t info;
/* Set the "privileged fault" bit to something sane. */
- error_code &= 3;
+ error_code &= ~4;
error_code |= (regs->xcs & 2) << 1;
if (regs->eflags & X86_EFLAGS_VM)
error_code |= 4;
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 23 18:27:22 2005
@@ -348,9 +348,12 @@
{
unsigned long vaddr;
pgd_t *pgd_base = (pgd_t *)xen_start_info.pt_base;
+ int i;
swapper_pg_dir = pgd_base;
init_mm.pgd = pgd_base;
+ for (i = 0; i < NR_CPUS; i++)
+ per_cpu(cur_pgd, i) = pgd_base;
/* Enable PSE if available */
if (cpu_has_pse) {
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Tue Aug 23 18:27:22 2005
@@ -36,6 +36,8 @@
{
}
+#ifdef __i386__
+
void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
{
return NULL;
@@ -44,6 +46,8 @@
void __init bt_iounmap(void *addr, unsigned long size)
{
}
+
+#endif /* __i386__ */
#else
@@ -58,7 +62,7 @@
extern unsigned long max_low_pfn;
unsigned long mfn = address >> PAGE_SHIFT;
unsigned long pfn = mfn_to_pfn(mfn);
- return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
+ return ((pfn < max_low_pfn) && (phys_to_machine_mapping[pfn] == mfn));
}
/*
@@ -126,10 +130,12 @@
return NULL;
area->phys_addr = phys_addr;
addr = (void __iomem *) area->addr;
+ flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+#ifdef __x86_64__
+ flags |= _PAGE_USER;
+#endif
if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr,
- size, __pgprot(_PAGE_PRESENT | _PAGE_RW |
- _PAGE_DIRTY | _PAGE_ACCESSED
- | flags), domid)) {
+ size, __pgprot(flags), domid)) {
vunmap((void __force *) addr);
return NULL;
}
@@ -218,6 +224,8 @@
kfree(p);
}
+#ifdef __i386__
+
void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
{
unsigned long offset, last_addr;
@@ -288,6 +296,8 @@
--nrpages;
}
}
+
+#endif /* __i386__ */
#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
@@ -346,7 +356,7 @@
* Fill in the machine address: PTE ptr is done later by
* __direct_remap_area_pages().
*/
- v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
+ v->val = pte_val_ma(pfn_pte_ma(machine_addr >> PAGE_SHIFT,
prot));
machine_addr += PAGE_SIZE;
address += PAGE_SIZE;
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Tue Aug 23 18:27:22 2005
@@ -40,38 +40,82 @@
EXPORT_SYMBOL(gnttab_end_foreign_transfer);
EXPORT_SYMBOL(gnttab_alloc_grant_references);
EXPORT_SYMBOL(gnttab_free_grant_references);
+EXPORT_SYMBOL(gnttab_free_grant_reference);
EXPORT_SYMBOL(gnttab_claim_grant_reference);
EXPORT_SYMBOL(gnttab_release_grant_reference);
EXPORT_SYMBOL(gnttab_grant_foreign_access_ref);
EXPORT_SYMBOL(gnttab_grant_foreign_transfer_ref);
-static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES];
+#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
+#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1)
+
+static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
+static int gnttab_free_count = NR_GRANT_ENTRIES;
static grant_ref_t gnttab_free_head;
+static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED;
static grant_entry_t *shared;
-/*
- * Lock-free grant-entry allocator
- */
-
-static inline int
-get_free_entry(
- void)
-{
- grant_ref_t fh, nfh = gnttab_free_head;
- do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; }
- while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh,
- gnttab_free_list[fh])) != fh) );
- return fh;
+static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
+
+static int
+get_free_entries(int count)
+{
+ unsigned long flags;
+ int ref;
+ grant_ref_t head;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ if (gnttab_free_count < count) {
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ return -1;
+ }
+ ref = head = gnttab_free_head;
+ gnttab_free_count -= count;
+ while (count-- > 1)
+ head = gnttab_list[head];
+ gnttab_free_head = gnttab_list[head];
+ gnttab_list[head] = GNTTAB_LIST_END;
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ return ref;
+}
+
+#define get_free_entry() get_free_entries(1)
+
+static void
+do_free_callbacks(void)
+{
+ struct gnttab_free_callback *callback = gnttab_free_callback_list, *next;
+ gnttab_free_callback_list = NULL;
+ while (callback) {
+ next = callback->next;
+ if (gnttab_free_count >= callback->count) {
+ callback->next = NULL;
+ callback->fn(callback->arg);
+ } else {
+ callback->next = gnttab_free_callback_list;
+ gnttab_free_callback_list = callback;
+ }
+ callback = next;
+ }
}
static inline void
-put_free_entry(
- grant_ref_t ref)
-{
- grant_ref_t fh, nfh = gnttab_free_head;
- do { gnttab_free_list[ref] = fh = nfh; wmb(); }
- while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) );
+check_free_callbacks(void)
+{
+ if (unlikely(gnttab_free_callback_list))
+ do_free_callbacks();
+}
+
+static void
+put_free_entry(grant_ref_t ref)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ gnttab_list[ref] = gnttab_free_head;
+ gnttab_free_head = ref;
+ gnttab_free_count++;
+ check_free_callbacks();
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
/*
@@ -79,8 +123,7 @@
*/
int
-gnttab_grant_foreign_access(
- domid_t domid, unsigned long frame, int readonly)
+gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly)
{
int ref;
@@ -96,8 +139,8 @@
}
void
-gnttab_grant_foreign_access_ref(
- grant_ref_t ref, domid_t domid, unsigned long frame, int readonly)
+gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int readonly)
{
shared[ref].frame = frame;
shared[ref].domid = domid;
@@ -107,7 +150,7 @@
int
-gnttab_query_foreign_access( grant_ref_t ref )
+gnttab_query_foreign_access(grant_ref_t ref)
{
u16 nflags;
@@ -117,7 +160,7 @@
}
void
-gnttab_end_foreign_access( grant_ref_t ref, int readonly )
+gnttab_end_foreign_access(grant_ref_t ref, int readonly)
{
u16 flags, nflags;
@@ -132,8 +175,7 @@
}
int
-gnttab_grant_foreign_transfer(
- domid_t domid, unsigned long pfn )
+gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
{
int ref;
@@ -149,8 +191,8 @@
}
void
-gnttab_grant_foreign_transfer_ref(
- grant_ref_t ref, domid_t domid, unsigned long pfn )
+gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
+ unsigned long pfn)
{
shared[ref].frame = pfn;
shared[ref].domid = domid;
@@ -159,8 +201,7 @@
}
unsigned long
-gnttab_end_foreign_transfer(
- grant_ref_t ref)
+gnttab_end_foreign_transfer(grant_ref_t ref)
{
unsigned long frame = 0;
u16 flags;
@@ -189,59 +230,79 @@
}
void
-gnttab_free_grant_references( u16 count, grant_ref_t head )
-{
- /* TODO: O(N)...? */
- grant_ref_t to_die = 0, next = head;
- int i;
-
- for ( i = 0; i < count; i++ )
- {
- to_die = next;
- next = gnttab_free_list[next];
- put_free_entry( to_die );
+gnttab_free_grant_reference(grant_ref_t ref)
+{
+
+ put_free_entry(ref);
+}
+
+void
+gnttab_free_grant_references(grant_ref_t head)
+{
+ grant_ref_t ref;
+ unsigned long flags;
+ int count = 1;
+ if (head == GNTTAB_LIST_END)
+ return;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ ref = head;
+ while (gnttab_list[ref] != GNTTAB_LIST_END) {
+ ref = gnttab_list[ref];
+ count++;
}
-}
-
-int
-gnttab_alloc_grant_references( u16 count,
- grant_ref_t *head,
- grant_ref_t *terminal )
-{
- int i;
- grant_ref_t h = gnttab_free_head;
-
- for ( i = 0; i < count; i++ )
- if ( unlikely(get_free_entry() == -1) )
- goto not_enough_refs;
+ gnttab_list[ref] = gnttab_free_head;
+ gnttab_free_head = head;
+ gnttab_free_count += count;
+ check_free_callbacks();
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+
+int
+gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
+{
+ int h = get_free_entries(count);
+
+ if (h == -1)
+ return -ENOSPC;
*head = h;
- *terminal = gnttab_free_head;
return 0;
-
-not_enough_refs:
- gnttab_free_head = h;
- return -ENOSPC;
-}
-
-int
-gnttab_claim_grant_reference( grant_ref_t *private_head,
- grant_ref_t terminal )
-{
- grant_ref_t g;
- if ( unlikely((g = *private_head) == terminal) )
+}
+
+int
+gnttab_claim_grant_reference(grant_ref_t *private_head)
+{
+ grant_ref_t g = *private_head;
+ if (unlikely(g == GNTTAB_LIST_END))
return -ENOSPC;
- *private_head = gnttab_free_list[g];
+ *private_head = gnttab_list[g];
return g;
}
void
-gnttab_release_grant_reference( grant_ref_t *private_head,
- grant_ref_t release )
-{
- gnttab_free_list[release] = *private_head;
+gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release)
+{
+ gnttab_list[release] = *private_head;
*private_head = release;
+}
+
+void
+gnttab_request_free_callback(struct gnttab_free_callback *callback,
+ void (*fn)(void *), void *arg, u16 count)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ if (callback->next)
+ goto out;
+ callback->fn = fn;
+ callback->arg = arg;
+ callback->count = count;
+ callback->next = gnttab_free_callback_list;
+ gnttab_free_callback_list = callback;
+ check_free_callbacks();
+ out:
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
/*
@@ -252,8 +313,9 @@
static struct proc_dir_entry *grant_pde;
-static int grant_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long data)
+static int
+grant_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+ unsigned long data)
{
int ret;
privcmd_hypercall_t hypercall;
@@ -291,8 +353,9 @@
ioctl: grant_ioctl,
};
-static int grant_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+static int
+grant_read(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
{
int len;
unsigned int i;
@@ -321,8 +384,9 @@
return len;
}
-static int grant_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+static int
+grant_write(struct file *file, const char __user *buffer, unsigned long count,
+ void *data)
{
/* TODO: implement this */
return -ENOSYS;
@@ -330,7 +394,8 @@
#endif /* CONFIG_PROC_FS */
-int gnttab_resume(void)
+int
+gnttab_resume(void)
{
gnttab_setup_table_t setup;
unsigned long frames[NR_GRANT_FRAMES];
@@ -349,7 +414,8 @@
return 0;
}
-int gnttab_suspend(void)
+int
+gnttab_suspend(void)
{
int i;
@@ -359,7 +425,8 @@
return 0;
}
-static int __init gnttab_init(void)
+static int __init
+gnttab_init(void)
{
int i;
@@ -368,7 +435,7 @@
shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END);
for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
- gnttab_free_list[i] = i + 1;
+ gnttab_list[i] = i + 1;
#ifdef CONFIG_PROC_FS
/*
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Aug 23
18:25:51 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Aug 23
18:27:22 2005
@@ -44,7 +44,7 @@
c-obj-$(CONFIG_MODULES) += module.o
-#obj-y += topology.o
+obj-y += topology.o
c-obj-y += intel_cacheinfo.o
bootflag-y += ../../../i386/kernel/bootflag.o
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Tue Aug 23
18:25:51 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Tue Aug 23
18:27:22 2005
@@ -778,21 +778,21 @@
/* Make sure we have a large enough P->M table. */
if (end_pfn > xen_start_info.nr_pages) {
phys_to_machine_mapping = alloc_bootmem(
- max_pfn * sizeof(unsigned long));
+ max_pfn * sizeof(u32));
memset(phys_to_machine_mapping, ~0,
- max_pfn * sizeof(unsigned long));
+ max_pfn * sizeof(u32));
memcpy(phys_to_machine_mapping,
- (unsigned long *)xen_start_info.mfn_list,
- xen_start_info.nr_pages * sizeof(unsigned long));
+ (u32 *)xen_start_info.mfn_list,
+ xen_start_info.nr_pages * sizeof(u32));
free_bootmem(
__pa(xen_start_info.mfn_list),
PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
- sizeof(unsigned long))));
+ sizeof(u32))));
}
pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE);
- for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(unsigned
long)), j++ )
+ for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(u32)), j++ )
{
pfn_to_mfn_frame_list[j] =
virt_to_mfn(&phys_to_machine_mapping[i]);
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile Tue Aug 23 18:27:22 2005
@@ -6,10 +6,10 @@
CFLAGS += -Iarch/$(XENARCH)/mm
-obj-y := init.o fault.o ioremap.o pageattr.o
+obj-y := init.o fault.o pageattr.o
c-obj-y := extable.o
-i386-obj-y := hypervisor.o
+i386-obj-y := hypervisor.o ioremap.o
#obj-y := init.o fault.o ioremap.o extable.o pageattr.o
#c-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 23 18:27:22 2005
@@ -559,6 +559,11 @@
void __init xen_init_pt(void)
{
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++)
+ per_cpu(cur_pgd, i) = init_mm.pgd;
+
memcpy((void *)init_level4_pgt,
(void *)xen_start_info.pt_base, PAGE_SIZE);
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue Aug 23
18:25:51 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue Aug 23
18:27:22 2005
@@ -65,9 +65,6 @@
static PEND_RING_IDX pending_prod, pending_cons;
#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-static kmem_cache_t *buffer_head_cachep;
-#else
static request_queue_t *plugged_queue;
static inline void flush_plugged_queue(void)
{
@@ -80,7 +77,6 @@
plugged_queue = NULL;
}
}
-#endif
/* When using grant tables to map a frame for device access then the
* handle returned must be used to unmap the frame. This is needed to
@@ -184,11 +180,7 @@
blkif_t *blkif;
struct list_head *ent;
- daemonize(
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- "xenblkd"
-#endif
- );
+ daemonize("xenblkd");
for ( ; ; )
{
@@ -215,11 +207,7 @@
}
/* Push the batch through to disc. */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- run_task_queue(&tq_disk);
-#else
flush_plugged_queue();
-#endif
}
}
@@ -268,13 +256,6 @@
}
}
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-static void end_block_io_op(struct buffer_head *bh, int uptodate)
-{
- __end_block_io_op(bh->b_private, uptodate);
- kmem_cache_free(buffer_head_cachep, bh);
-}
-#else
static int end_block_io_op(struct bio *bio, unsigned int done, int error)
{
if ( bio->bi_size != 0 )
@@ -283,7 +264,6 @@
bio_put(bio);
return error;
}
-#endif
/******************************************************************************
@@ -357,13 +337,9 @@
unsigned long buf; unsigned int nsec;
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned int nseg;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- struct buffer_head *bh;
-#else
struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int nbio = 0;
request_queue_t *q;
-#endif
/* Check that number of segments is sane. */
nseg = req->nr_segments;
@@ -434,49 +410,6 @@
pending_req->operation = operation;
pending_req->status = BLKIF_RSP_OKAY;
pending_req->nr_pages = nseg;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-
- atomic_set(&pending_req->pendcnt, nseg);
- pending_cons++;
- blkif_get(blkif);
-
- for ( i = 0; i < nseg; i++ )
- {
- bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
- if ( unlikely(bh == NULL) )
- {
- __end_block_io_op(pending_req, 0);
- continue;
- }
-
- memset(bh, 0, sizeof (struct buffer_head));
-
- init_waitqueue_head(&bh->b_wait);
- bh->b_size = seg[i].nsec << 9;
- bh->b_dev = preq.dev;
- bh->b_rdev = preq.dev;
- bh->b_rsector = (unsigned long)preq.sector_number;
- bh->b_data = (char *)MMAP_VADDR(pending_idx, i) +
- (seg[i].buf & ~PAGE_MASK);
- bh->b_page = virt_to_page(MMAP_VADDR(pending_idx, i));
- bh->b_end_io = end_block_io_op;
- bh->b_private = pending_req;
-
- bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) |
- (1 << BH_Req) | (1 << BH_Launder);
- if ( operation == WRITE )
- bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
-
- atomic_set(&bh->b_count, 1);
-
- /* Dispatch a single request. We'll flush it to disc later. */
- generic_make_request(operation, bh);
-
- preq.sector_number += seg[i].nsec;
- }
-
-#else
for ( i = 0; i < nseg; i++ )
{
@@ -526,8 +459,6 @@
for ( i = 0; i < nbio; i++ )
submit_bio(operation, biolist[i]);
-#endif
-
return;
bad_descriptor:
@@ -595,12 +526,6 @@
if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
BUG();
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- buffer_head_cachep = kmem_cache_create(
- "buffer_head_cache", sizeof(struct buffer_head),
- 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-#endif
-
blkif_xenbus_init();
memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES );
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Tue Aug 23 18:27:22 2005
@@ -5,7 +5,6 @@
#include <linux/config.h>
#include <linux/version.h>
#include <linux/module.h>
-#include <linux/rbtree.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
@@ -30,12 +29,13 @@
#define DPRINTK(_f, _a...) ((void)0)
#endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-typedef struct rb_root rb_root_t;
-typedef struct rb_node rb_node_t;
-#else
-struct block_device;
-#endif
+struct vbd {
+ blkif_vdev_t handle; /* what the domain refers to this vbd as */
+ unsigned char readonly; /* Non-zero -> read-only */
+ unsigned char type; /* VDISK_xxx */
+ blkif_pdev_t pdevice; /* phys device that this vbd maps to */
+ struct block_device *bdev;
+};
typedef struct blkif_st {
/* Unique identifier for this interface. */
@@ -48,25 +48,18 @@
/* Comms information. */
blkif_back_ring_t blk_ring;
/* VBDs attached to this interface. */
- rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs.*/
- spinlock_t vbd_lock; /* Protects VBD mapping. */
+ struct vbd vbd;
/* Private fields. */
enum { DISCONNECTED, CONNECTED } status;
- /*
- * DISCONNECT response is deferred until pending requests are ack'ed.
- * We therefore need to store the id from the original request.
- */
- u8 disconnect_rspid;
#ifdef CONFIG_XEN_BLKDEV_TAP_BE
/* Is this a blktap frontend */
unsigned int is_blktap;
#endif
- struct blkif_st *hash_next;
struct list_head blkdev_list;
spinlock_t blk_ring_lock;
atomic_t refcnt;
- struct work_struct work;
+ struct work_struct free_work;
u16 shmem_handle;
unsigned long shmem_vaddr;
grant_ref_t shmem_ref;
@@ -77,30 +70,25 @@
void blkif_connect(blkif_be_connect_t *connect);
int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
void blkif_disconnect_complete(blkif_t *blkif);
-blkif_t *blkif_find(domid_t domid);
-void free_blkif(blkif_t *blkif);
+blkif_t *alloc_blkif(domid_t domid);
+void free_blkif_callback(blkif_t *blkif);
int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
#define blkif_put(_b) \
do { \
if ( atomic_dec_and_test(&(_b)->refcnt) ) \
- free_blkif(_b); \
+ free_blkif_callback(_b); \
} while (0)
-struct vbd;
-void vbd_free(blkif_t *blkif, struct vbd *vbd);
-
-/* Creates inactive vbd. */
-struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t
pdevice, int readonly);
-int vbd_is_active(struct vbd *vbd);
-void vbd_activate(blkif_t *blkif, struct vbd *vbd);
+/* Create a vbd. */
+int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice,
+ int readonly);
+void vbd_free(struct vbd *vbd);
unsigned long vbd_size(struct vbd *vbd);
unsigned int vbd_info(struct vbd *vbd);
unsigned long vbd_secsize(struct vbd *vbd);
-void vbd_destroy(blkif_be_vbd_destroy_t *delete);
-void destroy_all_vbds(blkif_t *blkif);
struct phys_req {
unsigned short dev;
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Tue Aug 23
18:25:51 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Tue Aug 23
18:27:22 2005
@@ -9,27 +9,11 @@
#include "common.h"
#include <asm-xen/evtchn.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#endif
+static kmem_cache_t *blkif_cachep;
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d) (((int)(_d))&(BLKIF_HASHSZ-1))
-
-static kmem_cache_t *blkif_cachep;
-static blkif_t *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find(domid_t domid)
+blkif_t *alloc_blkif(domid_t domid)
{
- blkif_t *blkif = blkif_hash[BLKIF_HASH(domid)];
-
- while (blkif) {
- if (blkif->domid == domid) {
- blkif_get(blkif);
- return blkif;
- }
- blkif = blkif->hash_next;
- }
+ blkif_t *blkif;
blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
if (!blkif)
@@ -38,12 +22,9 @@
memset(blkif, 0, sizeof(*blkif));
blkif->domid = domid;
blkif->status = DISCONNECTED;
- spin_lock_init(&blkif->vbd_lock);
spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 1);
- blkif->hash_next = blkif_hash[BLKIF_HASH(domid)];
- blkif_hash[BLKIF_HASH(domid)] = blkif;
return blkif;
}
@@ -55,7 +36,7 @@
op.flags = GNTMAP_host_map;
op.ref = shared_page;
op.dom = blkif->domid;
-
+
BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
if (op.handle < 0) {
@@ -91,7 +72,7 @@
if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
return -ENOMEM;
- err = map_frontend_page(blkif, VMALLOC_VMADDR(vma->addr), shared_page);
+ err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page);
if (err) {
vfree(vma->addr);
return err;
@@ -123,10 +104,10 @@
return 0;
}
-void free_blkif(blkif_t *blkif)
+static void free_blkif(void *arg)
{
- blkif_t **pblkif;
evtchn_op_t op = { .cmd = EVTCHNOP_close };
+ blkif_t *blkif = (blkif_t *)arg;
op.u.close.port = blkif->evtchn;
op.u.close.dom = DOMID_SELF;
@@ -134,6 +115,8 @@
op.u.close.port = blkif->remote_evtchn;
op.u.close.dom = blkif->domid;
HYPERVISOR_event_channel_op(&op);
+
+ vbd_free(&blkif->vbd);
if (blkif->evtchn)
unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
@@ -143,20 +126,17 @@
vfree(blkif->blk_ring.sring);
}
- pblkif = &blkif_hash[BLKIF_HASH(blkif->domid)];
- while ( *pblkif != blkif )
- {
- BUG_ON(!*pblkif);
- pblkif = &(*pblkif)->hash_next;
- }
- *pblkif = blkif->hash_next;
- destroy_all_vbds(blkif);
kmem_cache_free(blkif_cachep, blkif);
+}
+
+void free_blkif_callback(blkif_t *blkif)
+{
+ INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
+ schedule_work(&blkif->free_work);
}
void __init blkif_interface_init(void)
{
blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
0, 0, NULL, NULL);
- memset(blkif_hash, 0, sizeof(blkif_hash));
}
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Tue Aug 23 18:27:22 2005
@@ -2,10 +2,6 @@
* blkback/vbd.c
*
* Routines for managing virtual block devices (VBDs).
- *
- * NOTE: vbd_lock protects updates to the rb_tree against concurrent lookups
- * in vbd_translate. All other lookups are implicitly protected because the
- * only caller (the control message dispatch routine) serializes the calls.
*
* Copyright (c) 2003-2005, Keir Fraser & Steve Hand
*/
@@ -13,28 +9,13 @@
#include "common.h"
#include <asm-xen/xenbus.h>
-struct vbd {
- blkif_vdev_t handle; /* what the domain refers to this vbd as */
- unsigned char readonly; /* Non-zero -> read-only */
- unsigned char type; /* VDISK_xxx */
- blkif_pdev_t pdevice; /* phys device that this vbd maps to */
- struct block_device *bdev;
-
- int active;
- rb_node_t rb; /* for linking into R-B tree lookup struct */
-};
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
static inline dev_t vbd_map_devnum(blkif_pdev_t cookie)
-{ return MKDEV(cookie>>8, cookie&0xff); }
+{
+ return MKDEV(BLKIF_MAJOR(cookie), BLKIF_MINOR(cookie));
+}
#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
(_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
#define bdev_put(_b) blkdev_put(_b)
-#else
-#define vbd_sz(_v) (blk_size[MAJOR((_v)->pdevice)][MINOR((_v)->pdevice)]*2)
-#define bdev_put(_b) ((void)0)
-#define bdev_hardsect_size(_b) 512
-#endif
unsigned long vbd_size(struct vbd *vbd)
{
@@ -51,45 +32,32 @@
return bdev_hardsect_size(vbd->bdev);
}
-int vbd_is_active(struct vbd *vbd)
+int vbd_create(blkif_t *blkif, blkif_vdev_t handle,
+ blkif_pdev_t pdevice, int readonly)
{
- return vbd->active;
-}
+ struct vbd *vbd;
-struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t handle,
- blkif_pdev_t pdevice, int readonly)
-{
- struct vbd *vbd;
-
- if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) )
- {
- DPRINTK("vbd_create: out of memory\n");
- return ERR_PTR(-ENOMEM);
- }
-
+ vbd = &blkif->vbd;
vbd->handle = handle;
vbd->readonly = readonly;
vbd->type = 0;
- vbd->active = 0;
vbd->pdevice = pdevice;
- /* FIXME: Who frees vbd on failure? --RR */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
vbd->bdev = open_by_devnum(
vbd_map_devnum(vbd->pdevice),
vbd->readonly ? FMODE_READ : FMODE_WRITE);
if ( IS_ERR(vbd->bdev) )
{
DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
- return ERR_PTR(-ENOENT);
+ return -ENOENT;
}
if ( (vbd->bdev->bd_disk == NULL) )
{
DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
- bdev_put(vbd->bdev);
- return ERR_PTR(-ENOENT);
+ vbd_free(vbd);
+ return -ENOENT;
}
if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
@@ -97,121 +65,27 @@
if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
vbd->type |= VDISK_REMOVABLE;
-#else
- if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) )
- {
- DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
- return ERR_PTR(-ENOENT);
- }
-#endif
-
DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
handle, blkif->domid);
- return vbd;
+ return 0;
}
-void vbd_activate(blkif_t *blkif, struct vbd *vbd)
+void vbd_free(struct vbd *vbd)
{
- rb_node_t **rb_p, *rb_parent = NULL;
- struct vbd *i;
- BUG_ON(vbd_is_active(vbd));
-
- /* Find where to put it. */
- rb_p = &blkif->vbd_rb.rb_node;
- while ( *rb_p != NULL )
- {
- rb_parent = *rb_p;
- i = rb_entry(rb_parent, struct vbd, rb);
- if ( vbd->handle < i->handle )
- {
- rb_p = &rb_parent->rb_left;
- }
- else if ( vbd->handle > i->handle )
- {
- rb_p = &rb_parent->rb_right;
- }
- else
- {
- /* We never create two of same vbd, so not possible. */
- BUG();
- }
- }
-
- /* Now we're active. */
- vbd->active = 1;
- blkif_get(blkif);
-
- spin_lock(&blkif->vbd_lock);
- rb_link_node(&vbd->rb, rb_parent, rb_p);
- rb_insert_color(&vbd->rb, &blkif->vbd_rb);
- spin_unlock(&blkif->vbd_lock);
-}
-
-void vbd_free(blkif_t *blkif, struct vbd *vbd)
-{
- if (vbd_is_active(vbd)) {
- spin_lock(&blkif->vbd_lock);
- rb_erase(&vbd->rb, &blkif->vbd_rb);
- spin_unlock(&blkif->vbd_lock);
- blkif_put(blkif);
- }
- bdev_put(vbd->bdev);
- kfree(vbd);
-}
-
-void destroy_all_vbds(blkif_t *blkif)
-{
- struct vbd *vbd;
- rb_node_t *rb;
-
- spin_lock(&blkif->vbd_lock);
-
- while ( (rb = blkif->vbd_rb.rb_node) != NULL )
- {
- vbd = rb_entry(rb, struct vbd, rb);
- rb_erase(rb, &blkif->vbd_rb);
- spin_unlock(&blkif->vbd_lock);
- bdev_put(vbd->bdev);
- kfree(vbd);
- spin_lock(&blkif->vbd_lock);
- blkif_put(blkif);
- }
-
- spin_unlock(&blkif->vbd_lock);
+ if (vbd->bdev)
+ bdev_put(vbd->bdev);
+ vbd->bdev = NULL;
}
int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
{
- struct vbd *vbd;
- rb_node_t *rb;
- int rc = -EACCES;
+ struct vbd *vbd = &blkif->vbd;
+ int rc = -EACCES;
- /* Take the vbd_lock because another thread could be updating the tree. */
- spin_lock(&blkif->vbd_lock);
-
- rb = blkif->vbd_rb.rb_node;
- while ( rb != NULL )
- {
- vbd = rb_entry(rb, struct vbd, rb);
- if ( req->dev < vbd->handle )
- rb = rb->rb_left;
- else if ( req->dev > vbd->handle )
- rb = rb->rb_right;
- else
- goto found;
- }
-
- DPRINTK("vbd_translate; domain %u attempted to access "
- "non-existent VBD.\n", blkif->domid);
- rc = -ENODEV;
- goto out;
-
- found:
-
- if ( (operation == WRITE) && vbd->readonly )
+ if ((operation == WRITE) && vbd->readonly)
goto out;
- if ( unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)) )
+ if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
goto out;
req->dev = vbd->pdevice;
@@ -219,6 +93,5 @@
rc = 0;
out:
- spin_unlock(&blkif->vbd_lock);
return rc;
}
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue Aug 23 18:27:22 2005
@@ -26,7 +26,6 @@
/* our communications channel */
blkif_t *blkif;
- struct vbd *vbd;
long int frontend_id;
long int pdev;
@@ -47,8 +46,6 @@
if (be->watch.node)
unregister_xenbus_watch(&be->watch);
unregister_xenbus_watch(&be->backend_watch);
- if (be->vbd)
- vbd_free(be->blkif, be->vbd);
if (be->blkif)
blkif_put(be->blkif);
if (be->frontpath)
@@ -72,7 +69,7 @@
device_unregister(&be->dev->dev);
return;
}
- if (vbd_is_active(be->vbd))
+ if (be->blkif->status == CONNECTED)
return;
err = xenbus_gather(be->frontpath, "grant-id", "%lu", &sharedmfn,
@@ -85,9 +82,8 @@
}
/* Domains must use same shared frame for all vbds. */
- if (be->blkif->status == CONNECTED &&
- (evtchn != be->blkif->remote_evtchn ||
- sharedmfn != be->blkif->shmem_frame)) {
+ if (evtchn != be->blkif->remote_evtchn ||
+ sharedmfn != be->blkif->shmem_frame) {
xenbus_dev_error(be->dev, err,
"Shared frame/evtchn %li/%u not same as"
" old %li/%u",
@@ -105,7 +101,7 @@
}
err = xenbus_printf(be->dev->nodename, "sectors", "%lu",
- vbd_size(be->vbd));
+ vbd_size(&be->blkif->vbd));
if (err) {
xenbus_dev_error(be->dev, err, "writing %s/sectors",
be->dev->nodename);
@@ -114,33 +110,28 @@
/* FIXME: use a typename instead */
err = xenbus_printf(be->dev->nodename, "info", "%u",
- vbd_info(be->vbd));
+ vbd_info(&be->blkif->vbd));
if (err) {
xenbus_dev_error(be->dev, err, "writing %s/info",
be->dev->nodename);
goto abort;
}
err = xenbus_printf(be->dev->nodename, "sector-size", "%lu",
- vbd_secsize(be->vbd));
+ vbd_secsize(&be->blkif->vbd));
if (err) {
xenbus_dev_error(be->dev, err, "writing %s/sector-size",
be->dev->nodename);
goto abort;
}
- /* First vbd? We need to map the shared frame, irq etc. */
- if (be->blkif->status != CONNECTED) {
- err = blkif_map(be->blkif, sharedmfn, evtchn);
- if (err) {
- xenbus_dev_error(be->dev, err,
- "mapping shared-frame %lu port %u",
- sharedmfn, evtchn);
- goto abort;
- }
- }
-
- /* We're ready, activate. */
- vbd_activate(be->blkif, be->vbd);
+ /* Map the shared frame, irq etc. */
+ err = blkif_map(be->blkif, sharedmfn, evtchn);
+ if (err) {
+ xenbus_dev_error(be->dev, err,
+ "mapping shared-frame %lu port %u",
+ sharedmfn, evtchn);
+ goto abort;
+ }
xenbus_transaction_end(0);
xenbus_dev_ok(be->dev);
@@ -228,20 +219,16 @@
p = strrchr(be->frontpath, '/') + 1;
handle = simple_strtoul(p, NULL, 0);
- be->blkif = blkif_find(be->frontend_id);
+ be->blkif = alloc_blkif(be->frontend_id);
if (IS_ERR(be->blkif)) {
err = PTR_ERR(be->blkif);
be->blkif = NULL;
goto device_fail;
}
- be->vbd = vbd_create(be->blkif, handle, be->pdev,
- be->readonly);
- if (IS_ERR(be->vbd)) {
- err = PTR_ERR(be->vbd);
- be->vbd = NULL;
+ err = vbd_create(be->blkif, handle, be->pdev, be->readonly);
+ if (err)
goto device_fail;
- }
frontend_changed(&be->watch, be->frontpath);
}
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue Aug 23
18:25:51 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue Aug 23
18:27:22 2005
@@ -63,25 +63,16 @@
/* Control whether runtime update of vbds is enabled. */
#define ENABLE_VBD_UPDATE 1
-#define BLKIF_STATE_CLOSED 0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED 2
-
-static unsigned int blkif_state = BLKIF_STATE_CLOSED;
-static unsigned int blkif_evtchn = 0;
-static unsigned int blkif_vbds = 0;
-static unsigned int blkif_vbds_connected = 0;
-
-static blkif_front_ring_t blk_ring;
+#define BLKIF_STATE_DISCONNECTED 0
+#define BLKIF_STATE_CONNECTED 1
+
+static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED;
#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-static domid_t rdomid = 0;
-static grant_ref_t gref_head, gref_terminal;
#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
#define GRANTREF_INVALID (1<<15)
-static int shmem_ref;
static struct blk_shadow {
blkif_request_t req;
@@ -92,7 +83,7 @@
static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */
-static void kick_pending_request_queues(void);
+static void kick_pending_request_queues(struct blkfront_info *info);
static int __init xlblk_init(void);
@@ -119,7 +110,7 @@
/* Kernel-specific definitions used in the common code */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define DISABLE_SCATTERGATHER()
+#define DISABLE_SCATTERGATHER()
#else
static int sg_operation = -1;
#define DISABLE_SCATTERGATHER() (sg_operation = -1)
@@ -138,11 +129,11 @@
}
-static inline void flush_requests(void)
+static inline void flush_requests(struct blkfront_info *info)
{
DISABLE_SCATTERGATHER();
- RING_PUSH_REQUESTS(&blk_ring);
- notify_via_evtchn(blkif_evtchn);
+ RING_PUSH_REQUESTS(&info->ring);
+ notify_via_evtchn(info->evtchn);
}
@@ -152,30 +143,39 @@
module_init(xlblk_init);
-static struct xlbd_disk_info *head_waiting = NULL;
-static void kick_pending_request_queues(void)
-{
- struct xlbd_disk_info *di;
- while ( ((di = head_waiting) != NULL) && !RING_FULL(&blk_ring) )
- {
- head_waiting = di->next_waiting;
- di->next_waiting = NULL;
- /* Re-enable calldowns. */
- blk_start_queue(di->rq);
- /* Kick things off immediately. */
- do_blkif_request(di->rq);
- }
+static void kick_pending_request_queues(struct blkfront_info *info)
+{
+ if (!RING_FULL(&info->ring)) {
+ /* Re-enable calldowns. */
+ blk_start_queue(info->rq);
+ /* Kick things off immediately. */
+ do_blkif_request(info->rq);
+ }
+}
+
+static void blkif_restart_queue(void *arg)
+{
+ struct blkfront_info *info = (struct blkfront_info *)arg;
+ spin_lock_irq(&blkif_io_lock);
+ kick_pending_request_queues(info);
+ spin_unlock_irq(&blkif_io_lock);
+}
+
+static void blkif_restart_queue_callback(void *arg)
+{
+ struct blkfront_info *info = (struct blkfront_info *)arg;
+ schedule_work(&info->work);
}
int blkif_open(struct inode *inode, struct file *filep)
{
- struct gendisk *gd = inode->i_bdev->bd_disk;
- struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
-
- /* Update of usage count is protected by per-device semaphore. */
- di->mi->usage++;
-
- return 0;
+ // struct gendisk *gd = inode->i_bdev->bd_disk;
+ // struct xlbd_disk_info *di = (struct xlbd_disk_info
*)gd->private_data;
+
+ /* Update of usage count is protected by per-device semaphore. */
+ // di->mi->usage++;
+
+ return 0;
}
@@ -192,8 +192,8 @@
int i;
DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
- command, (long)argument, inode->i_rdev);
-
+ command, (long)argument, inode->i_rdev);
+
switch ( command )
{
case HDIO_GETGEO:
@@ -219,7 +219,7 @@
/*
* blkif_queue_request
*
- * request block io
+ * request block io
*
* id: for guest use only.
* operation: BLKIF_OP_{READ,WRITE,PROBE}
@@ -228,7 +228,7 @@
*/
static int blkif_queue_request(struct request *req)
{
- struct xlbd_disk_info *di = req->rq_disk->private_data;
+ struct blkfront_info *info = req->rq_disk->private_data;
unsigned long buffer_ma;
blkif_request_t *ring_req;
struct bio *bio;
@@ -237,20 +237,28 @@
unsigned long id;
unsigned int fsect, lsect;
int ref;
-
- if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
+ grant_ref_t gref_head;
+
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
return 1;
+ if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST,
+ &gref_head) < 0) {
+ gnttab_request_free_callback(&info->callback,
+ blkif_restart_queue_callback, info,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ return 1;
+ }
+
/* Fill out a communications ring structure. */
- ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
+ ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
id = GET_ID_FROM_FREELIST();
blk_shadow[id].request = (unsigned long)req;
ring_req->id = id;
- ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
- BLKIF_OP_READ;
+ ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ;
ring_req->sector_number = (blkif_sector_t)req->sector;
- ring_req->handle = di->handle;
+ ring_req->handle = info->handle;
ring_req->nr_segments = 0;
rq_for_each_bio(bio, req)
@@ -263,31 +271,34 @@
fsect = bvec->bv_offset >> 9;
lsect = fsect + (bvec->bv_len >> 9) - 1;
/* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ref = gnttab_claim_grant_reference(&gref_head);
ASSERT( ref != -ENOSPC );
gnttab_grant_foreign_access_ref(
ref,
- rdomid,
+ info->backend_id,
buffer_ma >> PAGE_SHIFT,
rq_data_dir(req) );
blk_shadow[id].frame[ring_req->nr_segments] =
buffer_ma >> PAGE_SHIFT;
- ring_req->frame_and_sects[ring_req->nr_segments++] =
+ ring_req->frame_and_sects[ring_req->nr_segments] =
blkif_fas_from_gref(ref, fsect, lsect);
+
+ ring_req->nr_segments++;
}
}
- blk_ring.req_prod_pvt++;
-
+ info->ring.req_prod_pvt++;
+
/* Keep a private copy so we can reissue requests when recovering. */
pickle_request(&blk_shadow[id], ring_req);
+ gnttab_free_grant_references(gref_head);
+
return 0;
}
-
/*
* do_blkif_request
@@ -295,24 +306,26 @@
*/
void do_blkif_request(request_queue_t *rq)
{
- struct xlbd_disk_info *di;
+ struct blkfront_info *info = NULL;
struct request *req;
int queued;
- DPRINTK("Entered do_blkif_request\n");
+ DPRINTK("Entered do_blkif_request\n");
queued = 0;
while ( (req = elv_next_request(rq)) != NULL )
{
+ info = req->rq_disk->private_data;
+
if ( !blk_fs_request(req) )
{
end_request(req, 0);
continue;
}
- if ( RING_FULL(&blk_ring) )
- goto wait;
+ if (RING_FULL(&info->ring))
+ goto wait;
DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
req, req->cmd, req->sector, req->current_nr_sectors,
@@ -320,25 +333,19 @@
rq_data_dir(req) ? "write" : "read");
blkdev_dequeue_request(req);
- if ( blkif_queue_request(req) )
- {
+ if (blkif_queue_request(req)) {
+ blk_requeue_request(rq, req);
wait:
- di = req->rq_disk->private_data;
- if ( di->next_waiting == NULL )
- {
- di->next_waiting = head_waiting;
- head_waiting = di;
- /* Avoid pointless unplugs. */
- blk_stop_queue(rq);
- }
- break;
+ /* Avoid pointless unplugs. */
+ blk_stop_queue(rq);
+ break;
}
queued++;
}
if ( queued != 0 )
- flush_requests();
+ flush_requests(info);
}
@@ -347,25 +354,24 @@
struct request *req;
blkif_response_t *bret;
RING_IDX i, rp;
- unsigned long flags;
-
- spin_lock_irqsave(&blkif_io_lock, flags);
-
- if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
- unlikely(recovery) )
- {
+ unsigned long flags;
+ struct blkfront_info *info = (struct blkfront_info *)dev_id;
+
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) {
spin_unlock_irqrestore(&blkif_io_lock, flags);
return IRQ_HANDLED;
}
-
- rp = blk_ring.sring->rsp_prod;
+
+ rp = info->ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for ( i = blk_ring.rsp_cons; i != rp; i++ )
+ for ( i = info->ring.rsp_cons; i != rp; i++ )
{
unsigned long id;
- bret = RING_GET_RESPONSE(&blk_ring, i);
+ bret = RING_GET_RESPONSE(&info->ring, i);
id = bret->id;
req = (struct request *)blk_shadow[id].request;
@@ -382,7 +388,7 @@
bret->status);
if ( unlikely(end_that_request_first
- (req,
+ (req,
(bret->status == BLKIF_RSP_OKAY),
req->hard_nr_sectors)) )
BUG();
@@ -394,9 +400,9 @@
}
}
- blk_ring.rsp_cons = i;
-
- kick_pending_request_queues();
+ info->ring.rsp_cons = i;
+
+ kick_pending_request_queues(info);
spin_unlock_irqrestore(&blkif_io_lock, flags);
@@ -425,31 +431,31 @@
static void kick_pending_request_queues(void)
{
/* We kick pending request queues if the ring is reasonably empty. */
- if ( (nr_pending != 0) &&
- (RING_PENDING_REQUESTS(&blk_ring) < (BLK_RING_SIZE >> 1)) )
+ if ( (nr_pending != 0) &&
+ (RING_PENDING_REQUESTS(&info->ring) < (BLK_RING_SIZE >> 1)) )
{
/* Attempt to drain the queue, but bail if the ring becomes full. */
- while ( (nr_pending != 0) && !RING_FULL(&blk_ring) )
+ while ( (nr_pending != 0) && !RING_FULL(&info->ring) )
do_blkif_request(pending_queues[--nr_pending]);
}
}
int blkif_open(struct inode *inode, struct file *filep)
{
- short xldev = inode->i_rdev;
+ short xldev = inode->i_rdev;
struct gendisk *gd = get_gendisk(xldev);
xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
- short minor = MINOR(xldev);
+ short minor = MINOR(xldev);
if ( gd->part[minor].nr_sects == 0 )
- {
+ {
/*
* Device either doesn't exist, or has zero capacity; we use a few
* cheesy heuristics to return the relevant error code
*/
if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
((minor & (gd->max_p - 1)) != 0) )
- {
+ {
/*
* We have a real device, but no such partition, or we just have a
* partition number so guess this is the problem.
@@ -458,16 +464,16 @@
}
else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
{
- /* This is a removable device => assume that media is missing. */
+ /* This is a removable device => assume that media is missing. */
return -ENOMEDIUM; /* media not present (this is a guess) */
- }
+ }
else
- {
+ {
/* Just go for the general 'no such device' error. */
return -ENODEV; /* no such device */
}
}
-
+
/* Update of usage count is protected by per-device semaphore. */
disk->usage++;
@@ -496,24 +502,24 @@
{
kdev_t dev = inode->i_rdev;
struct hd_geometry *geo = (struct hd_geometry *)argument;
- struct gendisk *gd;
- struct hd_struct *part;
+ struct gendisk *gd;
+ struct hd_struct *part;
int i;
unsigned short cylinders;
byte heads, sectors;
/* NB. No need to check permissions. That is done for us. */
-
+
DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
- command, (long) argument, dev);
-
+ command, (long) argument, dev);
+
gd = get_gendisk(dev);
- part = &gd->part[MINOR(dev)];
+ part = &gd->part[MINOR(dev)];
switch ( command )
{
case BLKGETSIZE:
- DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
+ DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
return put_user(part->nr_sects, (unsigned long *) argument);
case BLKGETSIZE64:
@@ -526,7 +532,7 @@
return blkif_revalidate(dev);
case BLKSSZGET:
- return hardsect_size[MAJOR(dev)][MINOR(dev)];
+ return hardsect_size[MAJOR(dev)][MINOR(dev)];
case BLKBSZGET: /* get block size */
DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
@@ -552,7 +558,7 @@
values consistent with the size of the device */
heads = 0xff;
- sectors = 0x3f;
+ sectors = 0x3f;
cylinders = part->nr_sects / (heads * sectors);
if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
@@ -562,7 +568,7 @@
return 0;
- case HDIO_GETGEO_BIG:
+ case HDIO_GETGEO_BIG:
DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
if (!argument) return -EINVAL;
@@ -570,7 +576,7 @@
values consistent with the size of the device */
heads = 0xff;
- sectors = 0x3f;
+ sectors = 0x3f;
cylinders = part->nr_sects / (heads * sectors);
if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
@@ -594,7 +600,7 @@
WPRINTK("ioctl %08x not supported by XL blkif\n", command);
return -ENOSYS;
}
-
+
return 0;
}
@@ -614,7 +620,7 @@
xl_disk_t *disk;
unsigned long capacity;
int i, rc = 0;
-
+
if ( (bd = bdget(dev)) == NULL )
return -EINVAL;
@@ -662,7 +668,7 @@
/*
* blkif_queue_request
*
- * request block io
+ * request block io
*
* id: for guest use only.
* operation: BLKIF_OP_{READ,WRITE,PROBE}
@@ -696,7 +702,7 @@
buffer_ma &= PAGE_MASK;
- if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
return 1;
switch ( operation )
@@ -704,7 +710,7 @@
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
- gd = get_gendisk(device);
+ gd = get_gendisk(device);
/*
* Update the sector_number we'll pass down as appropriate; note that
@@ -714,10 +720,10 @@
sector_number += gd->part[MINOR(device)].start_sect;
/*
- * If this unit doesn't consist of virtual partitions then we clear
+ * If this unit doesn't consist of virtual partitions then we clear
* the partn bits from the device number.
*/
- if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
+ if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
GENHD_FL_VIRT_PARTNS) )
device &= ~(gd->max_p - 1);
@@ -725,20 +731,20 @@
(sg_dev == device) &&
(sg_next_sect == sector_number) )
{
- req = RING_GET_REQUEST(&blk_ring,
- blk_ring.req_prod_pvt - 1);
+ req = RING_GET_REQUEST(&info->ring,
+ info->ring.req_prod_pvt - 1);
bh = (struct buffer_head *)id;
-
+
bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request;
blk_shadow[req->id].request = (unsigned long)id;
/* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ref = gnttab_claim_grant_reference(&gref_head);
ASSERT( ref != -ENOSPC );
gnttab_grant_foreign_access_ref(
ref,
- rdomid,
+ info->backend_id,
buffer_ma >> PAGE_SHIFT,
( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
@@ -757,7 +763,7 @@
return 0;
}
- else if ( RING_FULL(&blk_ring) )
+ else if ( RING_FULL(&info->ring) )
{
return 1;
}
@@ -774,7 +780,7 @@
}
/* Fill out a communications ring structure. */
- req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
+ req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
xid = GET_ID_FROM_FREELIST();
blk_shadow[xid].request = (unsigned long)id;
@@ -782,15 +788,15 @@
req->id = xid;
req->operation = operation;
req->sector_number = (blkif_sector_t)sector_number;
- req->handle = handle;
+ req->handle = handle;
req->nr_segments = 1;
/* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ref = gnttab_claim_grant_reference(&gref_head);
ASSERT( ref != -ENOSPC );
gnttab_grant_foreign_access_ref(
ref,
- rdomid,
+ info->backend_id,
buffer_ma >> PAGE_SHIFT,
( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
@@ -798,11 +804,11 @@
req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect);
- /* Keep a private copy so we can reissue requests when recovering. */
+ /* Keep a private copy so we can reissue requests when recovering. */
pickle_request(&blk_shadow[xid], req);
- blk_ring.req_prod_pvt++;
-
+ info->ring.req_prod_pvt++;
+
return 0;
}
@@ -817,13 +823,13 @@
struct buffer_head *bh, *next_bh;
int rw, nsect, full, queued = 0;
- DPRINTK("Entered do_blkif_request\n");
+ DPRINTK("Entered do_blkif_request\n");
while ( !rq->plugged && !list_empty(&rq->queue_head))
{
- if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
+ if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
goto out;
-
+
DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
req, req->cmd, req->sector,
req->current_nr_sectors, req->nr_sectors, req->bh);
@@ -844,16 +850,16 @@
full = blkif_queue_request(
(unsigned long)bh,
- (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
+ (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
if ( full )
- {
+ {
bh->b_reqnext = next_bh;
pending_queues[nr_pending++] = rq;
if ( unlikely(nr_pending >= MAX_PENDING) )
BUG();
- goto out;
+ goto out;
}
queued++;
@@ -861,7 +867,7 @@
/* Dequeue the buffer head from the request. */
nsect = bh->b_size >> 9;
bh = req->bh = next_bh;
-
+
if ( bh != NULL )
{
/* There's another buffer head to do. Update the request. */
@@ -891,27 +897,27 @@
static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
{
- RING_IDX i, rp;
- unsigned long flags;
+ RING_IDX i, rp;
+ unsigned long flags;
struct buffer_head *bh, *next_bh;
-
- spin_lock_irqsave(&io_request_lock, flags);
-
- if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) )
+
+ spin_lock_irqsave(&io_request_lock, flags);
+
+ if ( unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery) )
{
spin_unlock_irqrestore(&io_request_lock, flags);
return;
}
- rp = blk_ring.sring->rsp_prod;
+ rp = info->ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for ( i = blk_ring.rsp_cons; i != rp; i++ )
+ for ( i = info->ring.rsp_cons; i != rp; i++ )
{
unsigned long id;
blkif_response_t *bret;
-
- bret = RING_GET_RESPONSE(&blk_ring, i);
+
+ bret = RING_GET_RESPONSE(&info->ring, i);
id = bret->id;
bh = (struct buffer_head *)blk_shadow[id].request;
@@ -943,8 +949,8 @@
}
}
- blk_ring.rsp_cons = i;
-
+ info->ring.rsp_cons = i;
+
kick_pending_request_queues();
spin_unlock_irqrestore(&io_request_lock, flags);
@@ -954,24 +960,24 @@
/***************************** COMMON CODE *******************************/
-static void blkif_free(void)
+static void blkif_free(struct blkfront_info *info)
{
/* Prevent new requests being issued until we fix things up. */
spin_lock_irq(&blkif_io_lock);
- blkif_state = BLKIF_STATE_DISCONNECTED;
+ info->connected = BLKIF_STATE_DISCONNECTED;
spin_unlock_irq(&blkif_io_lock);
/* Free resources associated with old device channel. */
- if ( blk_ring.sring != NULL )
- {
- free_page((unsigned long)blk_ring.sring);
- blk_ring.sring = NULL;
- }
- unbind_evtchn_from_irqhandler(blkif_evtchn, NULL);
- blkif_evtchn = 0;
-}
-
-static void blkif_recover(void)
+ if ( info->ring.sring != NULL )
+ {
+ free_page((unsigned long)info->ring.sring);
+ info->ring.sring = NULL;
+ }
+ unbind_evtchn_from_irqhandler(info->evtchn, NULL);
+ info->evtchn = 0;
+}
+
+static void blkif_recover(struct blkfront_info *info)
{
int i;
blkif_request_t *req;
@@ -987,7 +993,7 @@
memset(&blk_shadow, 0, sizeof(blk_shadow));
for ( i = 0; i < BLK_RING_SIZE; i++ )
blk_shadow[i].req.id = i+1;
- blk_shadow_free = blk_ring.req_prod_pvt;
+ blk_shadow_free = info->ring.req_prod_pvt;
blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
/* Stage 3: Find pending requests and requeue them. */
@@ -999,7 +1005,7 @@
/* Grab a request slot and unpickle shadow state into it. */
req = RING_GET_REQUEST(
- &blk_ring, blk_ring.req_prod_pvt);
+ &info->ring, info->ring.req_prod_pvt);
unpickle_request(req, ©[i]);
/* We get a new request id, and must reset the shadow state. */
@@ -1012,7 +1018,7 @@
if ( req->frame_and_sects[j] & GRANTREF_INVALID )
gnttab_grant_foreign_access_ref(
blkif_gref_from_fas(req->frame_and_sects[j]),
- rdomid,
+ info->backend_id,
blk_shadow[req->id].frame[j],
rq_data_dir((struct request *)
blk_shadow[req->id].request));
@@ -1020,32 +1026,31 @@
}
blk_shadow[req->id].req = *req;
- blk_ring.req_prod_pvt++;
+ info->ring.req_prod_pvt++;
}
kfree(copy);
recovery = 0;
- /* blk_ring->req_prod will be set when we flush_requests().*/
+ /* info->ring->req_prod will be set when we flush_requests().*/
wmb();
/* Kicks things back into life. */
- flush_requests();
+ flush_requests(info);
/* Now safe to left other people use the interface. */
- blkif_state = BLKIF_STATE_CONNECTED;
-}
-
-static void blkif_connect(u16 evtchn, domid_t domid)
+ info->connected = BLKIF_STATE_CONNECTED;
+}
+
+static void blkif_connect(struct blkfront_info *info, u16 evtchn)
{
int err = 0;
- blkif_evtchn = evtchn;
- rdomid = domid;
+ info->evtchn = evtchn;
err = bind_evtchn_to_irqhandler(
- blkif_evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
+ info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
if ( err != 0 )
{
WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
@@ -1059,17 +1064,6 @@
{ "" }
};
-struct blkfront_info
-{
- /* We watch the backend */
- struct xenbus_watch watch;
- int vdevice;
- u16 handle;
- int connected;
- struct xenbus_device *dev;
- char *backend;
-};
-
static void watch_for_status(struct xenbus_watch *watch, const char *node)
{
struct blkfront_info *info;
@@ -1081,35 +1075,33 @@
node += strlen(watch->node);
/* FIXME: clean up when error on the other end. */
- if (info->connected)
+ if (info->connected == BLKIF_STATE_CONNECTED)
return;
- err = xenbus_gather(watch->node,
+ err = xenbus_gather(watch->node,
"sectors", "%lu", §ors,
"info", "%u", &binfo,
"sector-size", "%lu", §or_size,
NULL);
if (err) {
- xenbus_dev_error(info->dev, err, "reading backend fields");
+ xenbus_dev_error(info->xbdev, err, "reading backend fields");
return;
}
- xlvbd_add(sectors, info->vdevice, info->handle, binfo, sector_size);
- info->connected = 1;
-
- /* First to connect? blkif is now connected. */
- if (blkif_vbds_connected++ == 0)
- blkif_state = BLKIF_STATE_CONNECTED;
-
- xenbus_dev_ok(info->dev);
+ xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+ info->connected = BLKIF_STATE_CONNECTED;
+
+ blkif_state = BLKIF_STATE_CONNECTED;
+
+ xenbus_dev_ok(info->xbdev);
/* Kick pending requests. */
spin_lock_irq(&blkif_io_lock);
- kick_pending_request_queues();
+ kick_pending_request_queues(info);
spin_unlock_irq(&blkif_io_lock);
}
-static int setup_blkring(struct xenbus_device *dev, unsigned int backend_id)
+static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
{
blkif_sring_t *sring;
evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
@@ -1121,25 +1113,28 @@
return -ENOMEM;
}
SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE);
-
- shmem_ref = gnttab_claim_grant_reference(&gref_head,
- gref_terminal);
- ASSERT(shmem_ref != -ENOSPC);
- gnttab_grant_foreign_access_ref(shmem_ref,
- backend_id,
- virt_to_mfn(blk_ring.sring),
- 0);
-
- op.u.alloc_unbound.dom = backend_id;
+ FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+ err = gnttab_grant_foreign_access(info->backend_id,
+ virt_to_mfn(info->ring.sring), 0);
+ if (err == -ENOSPC) {
+ free_page((unsigned long)info->ring.sring);
+ info->ring.sring = 0;
+ xenbus_dev_error(dev, err, "granting access to ring page");
+ return err;
+ }
+ info->grant_id = err;
+
+ op.u.alloc_unbound.dom = info->backend_id;
err = HYPERVISOR_event_channel_op(&op);
if (err) {
- free_page((unsigned long)blk_ring.sring);
- blk_ring.sring = 0;
+ gnttab_end_foreign_access(info->grant_id, 0);
+ free_page((unsigned long)info->ring.sring);
+ info->ring.sring = 0;
xenbus_dev_error(dev, err, "allocating event channel");
return err;
}
- blkif_connect(op.u.alloc_unbound.port, backend_id);
+ blkif_connect(info, op.u.alloc_unbound.port);
return 0;
}
@@ -1149,11 +1144,11 @@
{
char *backend;
const char *message;
- int err, backend_id;
+ int err;
backend = NULL;
err = xenbus_gather(dev->nodename,
- "backend-id", "%i", &backend_id,
+ "backend-id", "%i", &info->backend_id,
"backend", NULL, &backend,
NULL);
if (XENBUS_EXIST_ERR(err))
@@ -1168,12 +1163,10 @@
goto out;
}
- /* First device? We create shared ring, alloc event channel. */
- if (blkif_vbds == 0) {
- err = setup_blkring(dev, backend_id);
- if (err)
- goto out;
- }
+ /* Create shared ring, alloc event channel. */
+ err = setup_blkring(dev, info);
+ if (err)
+ goto out;
err = xenbus_transaction_start(dev->nodename);
if (err) {
@@ -1181,13 +1174,13 @@
goto destroy_blkring;
}
- err = xenbus_printf(dev->nodename, "grant-id","%u", shmem_ref);
+ err = xenbus_printf(dev->nodename, "grant-id","%u", info->grant_id);
if (err) {
message = "writing grant-id";
goto abort_transaction;
}
err = xenbus_printf(dev->nodename,
- "event-channel", "%u", blkif_evtchn);
+ "event-channel", "%u", info->evtchn);
if (err) {
message = "writing event-channel";
goto abort_transaction;
@@ -1220,8 +1213,7 @@
/* Have to do this *outside* transaction. */
xenbus_dev_error(dev, err, "%s", message);
destroy_blkring:
- if (blkif_vbds == 0)
- blkif_free();
+ blkif_free(info);
goto out;
}
@@ -1250,9 +1242,11 @@
xenbus_dev_error(dev, err, "allocating info structure");
return err;
}
- info->dev = dev;
+ info->xbdev = dev;
info->vdevice = vdevice;
- info->connected = 0;
+ info->connected = BLKIF_STATE_DISCONNECTED;
+ info->mi = NULL;
+ INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
@@ -1266,7 +1260,6 @@
/* Call once in case entries already there. */
watch_for_status(&info->watch, info->watch.node);
- blkif_vbds++;
return 0;
}
@@ -1277,15 +1270,13 @@
if (info->backend)
unregister_xenbus_watch(&info->watch);
- if (info->connected) {
- xlvbd_del(info->handle);
- blkif_vbds_connected--;
- }
+ if (info->mi)
+ xlvbd_del(info);
+
+ blkif_free(info);
+
kfree(info->backend);
kfree(info);
-
- if (--blkif_vbds == 0)
- blkif_free();
return 0;
}
@@ -1298,10 +1289,8 @@
kfree(info->backend);
info->backend = NULL;
- if (--blkif_vbds == 0) {
- recovery = 1;
- blkif_free();
- }
+ recovery = 1;
+ blkif_free(info);
return 0;
}
@@ -1314,8 +1303,7 @@
/* FIXME: Check geometry hasn't changed here... */
err = talk_to_backend(dev, info);
if (!err) {
- if (blkif_vbds++ == 0)
- blkif_recover();
+ blkif_recover(info);
}
return err;
}
@@ -1363,11 +1351,6 @@
{
int i;
- /* A grant for every ring slot, plus one for the ring itself. */
- if (gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1,
- &gref_head, &gref_terminal) < 0)
- return 1;
-
if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
(xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
return 0;
@@ -1391,6 +1374,6 @@
{
int i;
for ( i = 0; i < s->req.nr_segments; i++ )
- gnttab_release_grant_reference(
- &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i]));
-}
+ gnttab_free_grant_reference(
+ blkif_gref_from_fas(s->req.frame_and_sects[i]));
+}
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Tue Aug 23 18:27:22 2005
@@ -46,6 +46,7 @@
#include <linux/major.h>
#include <linux/devfs_fs_kernel.h>
#include <asm-xen/hypervisor.h>
+#include <asm-xen/xenbus.h>
#include <asm-xen/xen-public/xen.h>
#include <asm-xen/xen-public/io/blkif.h>
#include <asm-xen/xen-public/io/ring.h>
@@ -79,11 +80,20 @@
#define DPRINTK_IOCTL(_f, _a...) ((void)0)
#endif
-struct xlbd_type_info {
- int partn_shift;
- int disks_per_major;
- char *devname;
- char *diskname;
+struct xlbd_type_info
+{
+ int partn_shift;
+ int disks_per_major;
+ char *devname;
+ char *diskname;
+};
+
+struct xlbd_major_info
+{
+ int major;
+ int index;
+ int usage;
+ struct xlbd_type_info *type;
};
/*
@@ -91,26 +101,27 @@
* hang in private_data off the gendisk structure. We may end up
* putting all kinds of interesting stuff here :-)
*/
-struct xlbd_major_info {
- int major;
- int index;
- int usage;
- struct xlbd_type_info *type;
+struct blkfront_info
+{
+ struct xenbus_device *xbdev;
+ /* We watch the backend */
+ struct xenbus_watch watch;
+ dev_t dev;
+ int vdevice;
+ blkif_vdev_t handle;
+ int connected;
+ char *backend;
+ int backend_id;
+ int grant_id;
+ blkif_front_ring_t ring;
+ unsigned int evtchn;
+ struct xlbd_major_info *mi;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ request_queue_t *rq;
+#endif
+ struct work_struct work;
+ struct gnttab_free_callback callback;
};
-
-struct xlbd_disk_info {
- int xd_device;
- blkif_vdev_t handle;
- struct xlbd_major_info *mi;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- struct xlbd_disk_info *next_waiting;
- request_queue_t *rq;
-#endif
-};
-
-typedef struct xen_block {
- int usage;
-} xen_block_t;
extern spinlock_t blkif_io_lock;
@@ -123,7 +134,7 @@
extern void do_blkif_request (request_queue_t *rq);
/* Virtual block-device subsystem. */
-int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle,
- u16 info, u16 sector_size);
-void xlvbd_del(blkif_vdev_t handle);
+int xlvbd_add(blkif_sector_t capacity, int device,
+ u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
+void xlvbd_del(struct blkfront_info *info);
#endif /* __XEN_DRIVERS_BLOCK_H__ */
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Tue Aug 23 18:27:22 2005
@@ -43,325 +43,269 @@
#define NUM_SCSI_MAJORS 9
#define NUM_VBD_MAJORS 1
-struct lvdisk
-{
- blkif_sector_t capacity; /* 0: Size in terms of 512-byte sectors. */
- blkif_vdev_t handle; /* 8: Device number (opaque 16 bit value). */
- u16 info;
- dev_t dev;
- struct list_head list;
+static struct xlbd_type_info xlbd_ide_type = {
+ .partn_shift = 6,
+ .disks_per_major = 2,
+ .devname = "ide",
+ .diskname = "hd",
};
-static struct xlbd_type_info xlbd_ide_type = {
- .partn_shift = 6,
- .disks_per_major = 2,
- .devname = "ide",
- .diskname = "hd",
+static struct xlbd_type_info xlbd_scsi_type = {
+ .partn_shift = 4,
+ .disks_per_major = 16,
+ .devname = "sd",
+ .diskname = "sd",
};
-static struct xlbd_type_info xlbd_scsi_type = {
- .partn_shift = 4,
- .disks_per_major = 16,
- .devname = "sd",
- .diskname = "sd",
+static struct xlbd_type_info xlbd_vbd_type = {
+ .partn_shift = 4,
+ .disks_per_major = 16,
+ .devname = "xvd",
+ .diskname = "xvd",
};
-static struct xlbd_type_info xlbd_vbd_type = {
- .partn_shift = 4,
- .disks_per_major = 16,
- .devname = "xvd",
- .diskname = "xvd",
-};
-
static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
- NUM_VBD_MAJORS];
-
-#define XLBD_MAJOR_IDE_START 0
-#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS)
-#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
-
-#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START
- 1
-#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START
- 1
-#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START
+ NUM_VBD_MAJORS - 1
+ NUM_VBD_MAJORS];
+
+#define XLBD_MAJOR_IDE_START 0
+#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS)
+#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
+
+#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START
- 1
+#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START
- 1
+#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START +
NUM_VBD_MAJORS - 1
/* Information about our VBDs. */
#define MAX_VBDS 64
static LIST_HEAD(vbds_list);
-#define MAJOR_XEN(dev) ((dev)>>8)
-#define MINOR_XEN(dev) ((dev) & 0xff)
-
-static struct block_device_operations xlvbd_block_fops =
-{
- .owner = THIS_MODULE,
- .open = blkif_open,
- .release = blkif_release,
- .ioctl = blkif_ioctl,
+static struct block_device_operations xlvbd_block_fops =
+{
+ .owner = THIS_MODULE,
+ .open = blkif_open,
+ .release = blkif_release,
+ .ioctl = blkif_ioctl,
};
spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
-static struct lvdisk *xlvbd_device_alloc(void)
-{
- struct lvdisk *disk;
-
- disk = kmalloc(sizeof(*disk), GFP_KERNEL);
- if (disk != NULL) {
- memset(disk, 0, sizeof(*disk));
- INIT_LIST_HEAD(&disk->list);
- }
- return disk;
-}
-
-static void xlvbd_device_free(struct lvdisk *disk)
-{
- list_del(&disk->list);
- kfree(disk);
-}
-
-static struct xlbd_major_info *xlbd_alloc_major_info(
- int major, int minor, int index)
-{
- struct xlbd_major_info *ptr;
-
- ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
- if (ptr == NULL)
- return NULL;
-
- memset(ptr, 0, sizeof(struct xlbd_major_info));
-
- ptr->major = major;
-
- switch (index) {
- case XLBD_MAJOR_IDE_RANGE:
- ptr->type = &xlbd_ide_type;
- ptr->index = index - XLBD_MAJOR_IDE_START;
- break;
- case XLBD_MAJOR_SCSI_RANGE:
- ptr->type = &xlbd_scsi_type;
- ptr->index = index - XLBD_MAJOR_SCSI_START;
- break;
- case XLBD_MAJOR_VBD_RANGE:
- ptr->type = &xlbd_vbd_type;
- ptr->index = index - XLBD_MAJOR_VBD_START;
- break;
- }
-
- printk("Registering block device major %i\n", ptr->major);
- if (register_blkdev(ptr->major, ptr->type->devname)) {
- WPRINTK("can't get major %d with name %s\n",
- ptr->major, ptr->type->devname);
- kfree(ptr);
- return NULL;
- }
-
- devfs_mk_dir(ptr->type->devname);
- major_info[index] = ptr;
- return ptr;
-}
-
-static struct xlbd_major_info *xlbd_get_major_info(int device)
-{
- int major, minor, index;
-
- major = MAJOR_XEN(device);
- minor = MINOR_XEN(device);
-
- switch (major) {
- case IDE0_MAJOR: index = 0; break;
- case IDE1_MAJOR: index = 1; break;
- case IDE2_MAJOR: index = 2; break;
- case IDE3_MAJOR: index = 3; break;
- case IDE4_MAJOR: index = 4; break;
- case IDE5_MAJOR: index = 5; break;
- case IDE6_MAJOR: index = 6; break;
- case IDE7_MAJOR: index = 7; break;
- case IDE8_MAJOR: index = 8; break;
- case IDE9_MAJOR: index = 9; break;
- case SCSI_DISK0_MAJOR: index = 10; break;
- case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
- index = 11 + major - SCSI_DISK1_MAJOR;
- break;
- case SCSI_CDROM_MAJOR: index = 18; break;
- default: index = 19; break;
- }
-
- return ((major_info[index] != NULL) ? major_info[index] :
- xlbd_alloc_major_info(major, minor, index));
-}
-
-static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
-{
- request_queue_t *rq;
-
- rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
- if (rq == NULL)
- return -1;
-
- elevator_init(rq, "noop");
-
- /* Hard sector size and max sectors impersonate the equiv. hardware. */
- blk_queue_hardsect_size(rq, sector_size);
- blk_queue_max_sectors(rq, 512);
-
- /* Each segment in a request is up to an aligned page in size. */
- blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
- blk_queue_max_segment_size(rq, PAGE_SIZE);
-
- /* Ensure a merged request will fit in a single I/O ring slot. */
- blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
- blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
- /* Make sure buffer addresses are sector-aligned. */
- blk_queue_dma_alignment(rq, 511);
-
- gd->queue = rq;
-
- return 0;
-}
-
-static struct gendisk *xlvbd_alloc_gendisk(
- struct xlbd_major_info *mi, int minor, blkif_sector_t capacity,
- int device, blkif_vdev_t handle, u16 info, u16 sector_size)
-{
- struct gendisk *gd;
- struct xlbd_disk_info *di;
- int nr_minors = 1;
-
- di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
- if (di == NULL)
- return NULL;
- memset(di, 0, sizeof(*di));
- di->mi = mi;
- di->xd_device = device;
- di->handle = handle;
-
- if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
- nr_minors = 1 << mi->type->partn_shift;
-
- gd = alloc_disk(nr_minors);
- if (gd == NULL)
- goto out;
-
- if (nr_minors > 1)
- sprintf(gd->disk_name, "%s%c", mi->type->diskname,
- 'a' + mi->index * mi->type->disks_per_major +
- (minor >> mi->type->partn_shift));
- else
- sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
- 'a' + mi->index * mi->type->disks_per_major +
- (minor >> mi->type->partn_shift),
- minor & ((1 << mi->type->partn_shift) - 1));
-
- gd->major = mi->major;
- gd->first_minor = minor;
- gd->fops = &xlvbd_block_fops;
- gd->private_data = di;
- set_capacity(gd, capacity);
-
- if (xlvbd_init_blk_queue(gd, sector_size)) {
- del_gendisk(gd);
- goto out;
- }
-
- di->rq = gd->queue;
-
- if (info & VDISK_READONLY)
- set_disk_ro(gd, 1);
-
- if (info & VDISK_REMOVABLE)
- gd->flags |= GENHD_FL_REMOVABLE;
-
- if (info & VDISK_CDROM)
- gd->flags |= GENHD_FL_CD;
-
- add_disk(gd);
-
- return gd;
-
-out:
- kfree(di);
- return NULL;
-}
-
-int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle,
- u16 info, u16 sector_size)
-{
- struct lvdisk *new;
- struct block_device *bd;
- struct gendisk *gd;
- struct xlbd_major_info *mi;
-
- mi = xlbd_get_major_info(device);
- if (mi == NULL)
- return -EPERM;
-
- new = xlvbd_device_alloc();
- if (new == NULL)
- return -ENOMEM;
- new->capacity = capacity;
- new->info = info;
- new->handle = handle;
- new->dev = MKDEV(MAJOR_XEN(device), MINOR_XEN(device));
-
- bd = bdget(new->dev);
- if (bd == NULL)
- goto out;
-
- gd = xlvbd_alloc_gendisk(mi, MINOR_XEN(device), capacity, device, handle,
- info, sector_size);
- if (gd == NULL)
- goto out_bd;
-
- list_add(&new->list, &vbds_list);
-out_bd:
- bdput(bd);
-out:
- return 0;
-}
-
-static int xlvbd_device_del(struct lvdisk *disk)
-{
- struct block_device *bd;
- struct gendisk *gd;
- struct xlbd_disk_info *di;
- int ret = 0, unused;
- request_queue_t *rq;
-
- bd = bdget(disk->dev);
- if (bd == NULL)
- return -1;
-
- gd = get_gendisk(disk->dev, &unused);
- di = gd->private_data;
-
-#if 0 /* This is wrong: hda and hdb share same major, for example. */
- if (di->mi->usage != 0) {
- WPRINTK("disk removal failed: used [dev=%x]\n", disk->dev);
- ret = -1;
- goto out;
- }
-#endif
-
- rq = gd->queue;
- del_gendisk(gd);
- put_disk(gd);
- blk_cleanup_queue(rq);
-
- xlvbd_device_free(disk);
- bdput(bd);
- return ret;
-}
-
-void xlvbd_del(blkif_vdev_t handle)
-{
- struct lvdisk *i;
-
- list_for_each_entry(i, &vbds_list, list) {
- if (i->handle == handle) {
- xlvbd_device_del(i);
- return;
- }
+static struct xlbd_major_info *
+xlbd_alloc_major_info(int major, int minor, int index)
+{
+ struct xlbd_major_info *ptr;
+
+ ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
+ if (ptr == NULL)
+ return NULL;
+
+ memset(ptr, 0, sizeof(struct xlbd_major_info));
+
+ ptr->major = major;
+
+ switch (index) {
+ case XLBD_MAJOR_IDE_RANGE:
+ ptr->type = &xlbd_ide_type;
+ ptr->index = index - XLBD_MAJOR_IDE_START;
+ break;
+ case XLBD_MAJOR_SCSI_RANGE:
+ ptr->type = &xlbd_scsi_type;
+ ptr->index = index - XLBD_MAJOR_SCSI_START;
+ break;
+ case XLBD_MAJOR_VBD_RANGE:
+ ptr->type = &xlbd_vbd_type;
+ ptr->index = index - XLBD_MAJOR_VBD_START;
+ break;
}
- BUG();
-}
+
+ printk("Registering block device major %i\n", ptr->major);
+ if (register_blkdev(ptr->major, ptr->type->devname)) {
+ WPRINTK("can't get major %d with name %s\n",
+ ptr->major, ptr->type->devname);
+ kfree(ptr);
+ return NULL;
+ }
+
+ devfs_mk_dir(ptr->type->devname);
+ major_info[index] = ptr;
+ return ptr;
+}
+
+static struct xlbd_major_info *
+xlbd_get_major_info(int vdevice)
+{
+ struct xlbd_major_info *mi;
+ int major, minor, index;
+
+ major = BLKIF_MAJOR(vdevice);
+ minor = BLKIF_MINOR(vdevice);
+
+ switch (major) {
+ case IDE0_MAJOR: index = 0; break;
+ case IDE1_MAJOR: index = 1; break;
+ case IDE2_MAJOR: index = 2; break;
+ case IDE3_MAJOR: index = 3; break;
+ case IDE4_MAJOR: index = 4; break;
+ case IDE5_MAJOR: index = 5; break;
+ case IDE6_MAJOR: index = 6; break;
+ case IDE7_MAJOR: index = 7; break;
+ case IDE8_MAJOR: index = 8; break;
+ case IDE9_MAJOR: index = 9; break;
+ case SCSI_DISK0_MAJOR: index = 10; break;
+ case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
+ index = 11 + major - SCSI_DISK1_MAJOR;
+ break;
+ case SCSI_CDROM_MAJOR: index = 18; break;
+ default: index = 19; break;
+ }
+
+ mi = ((major_info[index] != NULL) ? major_info[index] :
+ xlbd_alloc_major_info(major, minor, index));
+ mi->usage++;
+ return mi;
+}
+
+static void
+xlbd_put_major_info(struct xlbd_major_info *mi)
+{
+ mi->usage--;
+ /* XXX: release major if 0 */
+}
+
+static int
+xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+{
+ request_queue_t *rq;
+
+ rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
+ if (rq == NULL)
+ return -1;
+
+ elevator_init(rq, "noop");
+
+ /* Hard sector size and max sectors impersonate the equiv. hardware. */
+ blk_queue_hardsect_size(rq, sector_size);
+ blk_queue_max_sectors(rq, 512);
+
+ /* Each segment in a request is up to an aligned page in size. */
+ blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+ blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+ /* Ensure a merged request will fit in a single I/O ring slot. */
+ blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+ /* Make sure buffer addresses are sector-aligned. */
+ blk_queue_dma_alignment(rq, 511);
+
+ gd->queue = rq;
+
+ return 0;
+}
+
+static int
+xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
+ u16 vdisk_info, u16 sector_size,
+ struct blkfront_info *info)
+{
+ struct gendisk *gd;
+ struct xlbd_major_info *mi;
+ int nr_minors = 1;
+ int err = -ENODEV;
+
+ mi = xlbd_get_major_info(vdevice);
+ if (mi == NULL)
+ goto out;
+ info->mi = mi;
+
+ if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
+ nr_minors = 1 << mi->type->partn_shift;
+
+ gd = alloc_disk(nr_minors);
+ if (gd == NULL)
+ goto out;
+
+ if (nr_minors > 1)
+ sprintf(gd->disk_name, "%s%c", mi->type->diskname,
+ 'a' + mi->index * mi->type->disks_per_major +
+ (minor >> mi->type->partn_shift));
+ else
+ sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
+ 'a' + mi->index * mi->type->disks_per_major +
+ (minor >> mi->type->partn_shift),
+ minor & ((1 << mi->type->partn_shift) - 1));
+
+ gd->major = mi->major;
+ gd->first_minor = minor;
+ gd->fops = &xlvbd_block_fops;
+ gd->private_data = info;
+ set_capacity(gd, capacity);
+
+ if (xlvbd_init_blk_queue(gd, sector_size)) {
+ del_gendisk(gd);
+ goto out;
+ }
+
+ info->rq = gd->queue;
+
+ if (vdisk_info & VDISK_READONLY)
+ set_disk_ro(gd, 1);
+
+ if (vdisk_info & VDISK_REMOVABLE)
+ gd->flags |= GENHD_FL_REMOVABLE;
+
+ if (vdisk_info & VDISK_CDROM)
+ gd->flags |= GENHD_FL_CD;
+
+ add_disk(gd);
+
+ return 0;
+
+ out:
+ if (mi)
+ xlbd_put_major_info(mi);
+ return err;
+}
+
+int
+xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
+ u16 sector_size, struct blkfront_info *info)
+{
+ struct block_device *bd;
+ int err = 0;
+
+ info->dev = MKDEV(BLKIF_MAJOR(vdevice), BLKIF_MINOR(vdevice));
+
+ bd = bdget(info->dev);
+ if (bd == NULL)
+ return -ENODEV;
+
+ err = xlvbd_alloc_gendisk(BLKIF_MINOR(vdevice), capacity, vdevice,
+ vdisk_info, sector_size, info);
+
+ bdput(bd);
+ return err;
+}
+
+void
+xlvbd_del(struct blkfront_info *info)
+{
+ struct block_device *bd;
+ struct gendisk *gd;
+ int unused;
+ request_queue_t *rq;
+
+ bd = bdget(info->dev);
+ if (bd == NULL)
+ return;
+
+ gd = get_gendisk(info->dev, &unused);
+ rq = gd->queue;
+
+ del_gendisk(gd);
+ put_disk(gd);
+ xlbd_put_major_info(info->mi);
+ info->mi = NULL;
+ blk_cleanup_queue(rq);
+
+ bdput(bd);
+}
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Aug 23
18:25:51 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Aug 23
18:27:22 2005
@@ -102,12 +102,12 @@
#endif
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
-static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t gref_tx_head;
static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
#endif
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
-static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t gref_rx_head;
static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
#endif
@@ -441,8 +441,8 @@
np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- if (unlikely((ref = gnttab_claim_grant_reference(&gref_rx_head,
- gref_rx_terminal)) < 0)) {
+ ref = gnttab_claim_grant_reference(&gref_rx_head);
+ if (unlikely(ref < 0)) {
printk(KERN_ALERT "#### netfront can't claim rx reference\n");
BUG();
}
@@ -537,8 +537,8 @@
tx->id = id;
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- if (unlikely((ref = gnttab_claim_grant_reference(&gref_tx_head,
- gref_tx_terminal)) < 0)) {
+ ref = gnttab_claim_grant_reference(&gref_tx_head);
+ if (unlikely(ref < 0)) {
printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
BUG();
}
@@ -929,8 +929,7 @@
msg->handle = np->handle;
msg->tx_shmem_frame = virt_to_mfn(np->tx);
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- msg->tx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_tx_head,
- gref_tx_terminal);
+ msg->tx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_tx_head);
if(msg->tx_shmem_ref < 0) {
printk(KERN_ALERT "#### netfront can't claim tx_shmem reference\n");
BUG();
@@ -941,8 +940,7 @@
msg->rx_shmem_frame = virt_to_mfn(np->rx);
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- msg->rx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_rx_head,
- gref_rx_terminal);
+ msg->rx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_rx_head);
if(msg->rx_shmem_ref < 0) {
printk(KERN_ALERT "#### netfront can't claim rx_shmem reference\n");
BUG();
@@ -1420,7 +1418,7 @@
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
/* A grant for every ring slot, plus one for the ring itself */
if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE + 1,
- &gref_tx_head, &gref_tx_terminal) < 0) {
+ &gref_tx_head) < 0) {
printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
return 1;
}
@@ -1429,7 +1427,7 @@
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
/* A grant for every ring slot, plus one for the ring itself */
if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE + 1,
- &gref_rx_head, &gref_rx_terminal) < 0) {
+ &gref_rx_head) < 0) {
printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
return 1;
}
@@ -1457,10 +1455,10 @@
static void netif_exit(void)
{
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
- gnttab_free_grant_references(NETIF_TX_RING_SIZE + 1, gref_tx_head);
+ gnttab_free_grant_references(gref_tx_head);
#endif
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- gnttab_free_grant_references(NETIF_RX_RING_SIZE + 1, gref_rx_head);
+ gnttab_free_grant_references(gref_rx_head);
#endif
}
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue Aug 23
18:25:51 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue Aug 23
18:27:22 2005
@@ -167,7 +167,7 @@
if (ret)
goto batch_err;
- u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
+ u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
u.ptr = ptep;
if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) )
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Tue Aug 23
18:25:51 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Tue Aug 23
18:27:22 2005
@@ -60,9 +60,13 @@
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+#define INVALID_P2M_ENTRY (~0U)
+#define FOREIGN_FRAME(m) ((m) | 0x80000000U)
extern unsigned int *phys_to_machine_mapping;
-#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)]))
-#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)]))
+#define pfn_to_mfn(pfn) \
+((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL)
+#define mfn_to_pfn(mfn) \
+((unsigned long)machine_to_phys_mapping[(unsigned int)(mfn)])
/* Definitions for machine and pseudophysical addresses. */
#ifdef CONFIG_X86_PAE
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Tue Aug
23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Tue Aug
23 18:27:22 2005
@@ -63,17 +63,15 @@
*
* NB2. When deliberately mapping foreign pages into the p2m table, you *must*
* use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
- * require. In all the cases we care about, the high bit gets shifted out
- * (e.g., phys_to_machine()) so behaviour there is correct.
+ * require. In all the cases we care about, the FOREIGN_FRAME bit is
+ * masked (e.g., pfn_to_mfn()) so behaviour there is correct.
*/
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
#define pte_pfn(_pte) \
({ \
unsigned long mfn = pte_mfn(_pte); \
unsigned long pfn = mfn_to_pfn(mfn); \
- if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \
+ if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
pfn = max_mapnr; /* special: force !pfn_valid() */ \
pfn; \
})
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h Tue Aug
23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h Tue Aug
23 18:27:22 2005
@@ -150,15 +150,13 @@
return !pte.pte_low && !pte.pte_high;
}
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
#define pte_mfn(_pte) ( ((_pte).pte_low >> PAGE_SHIFT) |\
(((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)) )
#define pte_pfn(_pte) \
({ \
unsigned long mfn = pte_mfn(_pte); \
unsigned long pfn = mfn_to_pfn(mfn); \
- if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \
+ if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
pfn = max_mapnr; /* special: force !pfn_valid() */ \
pfn; \
})
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Tue Aug 23
18:25:51 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Tue Aug 23
18:27:22 2005
@@ -62,9 +62,13 @@
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+#define INVALID_P2M_ENTRY (~0U)
+#define FOREIGN_FRAME(m) ((m) | 0x80000000U)
extern u32 *phys_to_machine_mapping;
-#define pfn_to_mfn(_pfn) ((unsigned long) phys_to_machine_mapping[(unsigned
int)(_pfn)])
-#define mfn_to_pfn(_mfn) ((unsigned long) machine_to_phys_mapping[(unsigned
int)(_mfn)])
+#define pfn_to_mfn(pfn) \
+((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL)
+#define mfn_to_pfn(mfn) \
+((unsigned long)machine_to_phys_mapping[(unsigned int)(mfn)])
/* Definitions for machine and pseudophysical addresses. */
typedef unsigned long paddr_t;
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Tue Aug 23
18:25:51 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Tue Aug 23
18:27:22 2005
@@ -300,17 +300,15 @@
*
* NB2. When deliberately mapping foreign pages into the p2m table, you *must*
* use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
- * require. In all the cases we care about, the high bit gets shifted out
- * (e.g., phys_to_machine()) so behaviour there is correct.
- */
-#define INVALID_P2M_ENTRY (~0U)
-#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
+ * require. In all the cases we care about, the FOREIGN_FRAME bit is
+ * masked (e.g., pfn_to_mfn()) so behaviour there is correct.
+ */
#define pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
#define pte_pfn(_pte) \
({ \
unsigned long mfn = pte_mfn(_pte); \
unsigned pfn = mfn_to_pfn(mfn); \
- if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \
+ if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
pfn = max_mapnr; /* special: force !pfn_valid() */ \
pfn; \
})
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/include/asm-xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/asm-xen/gnttab.h Tue Aug 23 18:25:51 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/gnttab.h Tue Aug 23 18:27:22 2005
@@ -19,54 +19,46 @@
/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
#define NR_GRANT_FRAMES 4
-#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
-int
-gnttab_grant_foreign_access(
- domid_t domid, unsigned long frame, int readonly);
+struct gnttab_free_callback {
+ struct gnttab_free_callback *next;
+ void (*fn)(void *);
+ void *arg;
+ u16 count;
+};
-void
-gnttab_end_foreign_access(
- grant_ref_t ref, int readonly);
+int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
+ int readonly);
-int
-gnttab_grant_foreign_transfer(
- domid_t domid, unsigned long pfn);
+void gnttab_end_foreign_access(grant_ref_t ref, int readonly);
-unsigned long
-gnttab_end_foreign_transfer(
- grant_ref_t ref);
+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
-int
-gnttab_query_foreign_access(
- grant_ref_t ref );
+unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
+
+int gnttab_query_foreign_access(grant_ref_t ref);
/*
* operations on reserved batches of grant references
*/
-int
-gnttab_alloc_grant_references(
- u16 count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal );
+int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
-void
-gnttab_free_grant_references(
- u16 count, grant_ref_t private_head );
+void gnttab_free_grant_reference(grant_ref_t ref);
-int
-gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal
-);
+void gnttab_free_grant_references(grant_ref_t head);
-void
-gnttab_release_grant_reference(
- grant_ref_t *private_head, grant_ref_t release );
+int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
-void
-gnttab_grant_foreign_access_ref(
- grant_ref_t ref, domid_t domid, unsigned long frame, int readonly);
+void gnttab_release_grant_reference(grant_ref_t *private_head,
+ grant_ref_t release);
-void
-gnttab_grant_foreign_transfer_ref(
- grant_ref_t, domid_t domid, unsigned long pfn);
+void gnttab_request_free_callback(struct gnttab_free_callback *callback,
+ void (*fn)(void *), void *arg, u16 count);
+void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int readonly);
+
+void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
+ unsigned long pfn);
#endif /* __ASM_GNTTAB_H__ */
diff -r 6783e59e1c45 -r 522bc50588ed tools/xenstat/xentop/Makefile
--- a/tools/xenstat/xentop/Makefile Tue Aug 23 18:25:51 2005
+++ b/tools/xenstat/xentop/Makefile Tue Aug 23 18:27:22 2005
@@ -28,7 +28,7 @@
CFLAGS += -DGCC_PRINTF -Wall -Werror -I$(XEN_LIBXENSTAT)
LDFLAGS += -L$(XEN_LIBXENSTAT)
-LDLIBS += -lxenstat -lcurses
+LDLIBS += -lxenstat -lncurses
all: xentop
diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Tue Aug 23 18:25:51 2005
+++ b/xen/arch/x86/io_apic.c Tue Aug 23 18:27:22 2005
@@ -1751,8 +1751,30 @@
pin = (address - 0x10) >> 1;
+ *(u32 *)&rte = val;
rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
- *(int *)&rte = val;
+
+ /*
+ * What about weird destination types?
+ * SMI: Ignore? Ought to be set up by the BIOS.
+ * NMI: Ignore? Watchdog functionality is Xen's concern.
+ * INIT: Definitely ignore: probably a guest OS bug.
+ * ExtINT: Ignore? Linux only asserts this at start of day.
+ * For now, print a message and return an error. We can fix up on demand.
+ */
+ if ( rte.delivery_mode > dest_LowestPrio )
+ {
+ printk("ERROR: Attempt to write weird IOAPIC destination mode!\n");
+ printk(" APIC=%d/%d, lo-reg=%x\n", apicid, pin, val);
+ return -EINVAL;
+ }
+
+ /*
+ * The guest does not know physical APIC arrangement (flat vs. cluster).
+ * Apply genapic conventions for this platform.
+ */
+ rte.delivery_mode = INT_DELIVERY_MODE;
+ rte.dest_mode = INT_DEST_MODE;
if ( rte.vector >= FIRST_DEVICE_VECTOR )
{
diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Aug 23 18:25:51 2005
+++ b/xen/arch/x86/mm.c Tue Aug 23 18:27:22 2005
@@ -444,7 +444,7 @@
if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
{
- MEM_LOG("Bad L1 flags %x\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
+ MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
return 0;
}
@@ -490,7 +490,7 @@
if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
{
- MEM_LOG("Bad L2 flags %x\n", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
+ MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
return 0;
}
@@ -523,7 +523,7 @@
if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) )
{
- MEM_LOG("Bad L3 flags %x\n", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
+ MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
return 0;
}
@@ -557,7 +557,7 @@
if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
{
- MEM_LOG("Bad L4 flags %x\n", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
+ MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
return 0;
}
@@ -1025,7 +1025,7 @@
unlikely(o != l1e_get_intpte(ol1e)) )
{
MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
- ": saw %" PRIpte "\n",
+ ": saw %" PRIpte,
l1e_get_intpte(ol1e),
l1e_get_intpte(nl1e),
o);
@@ -1051,7 +1051,7 @@
{
if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
{
- MEM_LOG("Bad L1 flags %x\n",
+ MEM_LOG("Bad L1 flags %x",
l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
return 0;
}
@@ -1113,7 +1113,7 @@
{
if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
{
- MEM_LOG("Bad L2 flags %x\n",
+ MEM_LOG("Bad L2 flags %x",
l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
return 0;
}
@@ -1175,7 +1175,7 @@
{
if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) )
{
- MEM_LOG("Bad L3 flags %x\n",
+ MEM_LOG("Bad L3 flags %x",
l3e_get_flags(nl3e) & L3_DISALLOW_MASK);
return 0;
}
@@ -1237,7 +1237,7 @@
{
if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
{
- MEM_LOG("Bad L4 flags %x\n",
+ MEM_LOG("Bad L4 flags %x",
l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
return 0;
}
@@ -1598,7 +1598,7 @@
percpu_info[cpu].foreign = dom_io;
break;
default:
- MEM_LOG("Dom %u cannot set foreign dom\n", d->domain_id);
+ MEM_LOG("Dom %u cannot set foreign dom", d->domain_id);
okay = 0;
break;
}
@@ -1831,7 +1831,7 @@
case MMUEXT_FLUSH_CACHE:
if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
{
- MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n");
+ MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
okay = 0;
}
else
@@ -1845,7 +1845,7 @@
if ( shadow_mode_external(d) )
{
MEM_LOG("ignoring SET_LDT hypercall from external "
- "domain %u\n", d->domain_id);
+ "domain %u", d->domain_id);
okay = 0;
break;
}
@@ -1916,7 +1916,7 @@
unlikely(IS_XEN_HEAP_FRAME(page)) )
{
MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
- "page is in Xen heap (%lx), or dom is dying (%ld).\n",
+ "page is in Xen heap (%lx), or dom is dying (%ld).",
e->tot_pages, e->max_pages, op.mfn, e->domain_flags);
okay = 0;
goto reassign_fail;
@@ -1937,7 +1937,7 @@
unlikely(_nd != _d) )
{
MEM_LOG("Bad page values %lx: ed=%p(%u), sd=%p,"
- " caf=%08x, taf=%" PRtype_info "\n",
+ " caf=%08x, taf=%" PRtype_info,
page_to_pfn(page), d, d->domain_id,
unpickle_domptr(_nd), x, page->u.inuse.type_info);
okay = 0;
@@ -2301,7 +2301,7 @@
if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
!get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
{
- DPRINTK("Grant map attempted to update a non-L1 page\n");
+ MEM_LOG("Grant map attempted to update a non-L1 page");
rc = GNTST_general_error;
goto failed;
}
@@ -2363,7 +2363,7 @@
if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
!get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
{
- DPRINTK("Grant map attempted to update a non-L1 page\n");
+ MEM_LOG("Grant map attempted to update a non-L1 page");
rc = GNTST_general_error;
goto failed;
}
@@ -2378,7 +2378,7 @@
/* Check that the virtual address supplied is actually mapped to frame. */
if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
{
- DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n",
+ MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
(unsigned long)l1e_get_intpte(ol1e), addr, frame);
put_page_type(page);
rc = GNTST_general_error;
@@ -2388,7 +2388,7 @@
/* Delete pagetable entry. */
if ( unlikely(__put_user(0, (intpte_t *)va)))
{
- DPRINTK("Cannot delete PTE entry at %p.\n", va);
+ MEM_LOG("Cannot delete PTE entry at %p", va);
put_page_type(page);
rc = GNTST_general_error;
goto failed;
@@ -2452,7 +2452,7 @@
if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) )
{
- DPRINTK("Could not find PTE entry for address %lx\n", addr);
+ MEM_LOG("Could not find PTE entry for address %lx", addr);
return GNTST_general_error;
}
@@ -2462,7 +2462,7 @@
*/
if ( unlikely(l1e_get_pfn(ol1e) != frame) )
{
- DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n",
+ MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
l1e_get_pfn(ol1e), addr, frame);
return GNTST_general_error;
}
@@ -2470,7 +2470,7 @@
/* Delete pagetable entry. */
if ( unlikely(__put_user(0, &pl1e->l1)) )
{
- DPRINTK("Cannot delete PTE entry at %p.\n", (unsigned long *)pl1e);
+ MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
return GNTST_general_error;
}
@@ -2930,7 +2930,7 @@
if ( unlikely(!get_page_from_l1e(nl1e, d)) )
{
- MEM_LOG("ptwr: Could not re-validate l1 page\n");
+ MEM_LOG("ptwr: Could not re-validate l1 page");
/*
* Make the remaining p.t's consistent before crashing, so the
* reference counts are correct.
@@ -3056,7 +3056,7 @@
/* Aligned access only, thank you. */
if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) )
{
- MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)\n",
+ MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)",
bytes, addr);
return X86EMUL_UNHANDLEABLE;
}
@@ -3089,7 +3089,7 @@
if (__copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
sizeof(pte)))
{
- MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table\n");
+ MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table");
return X86EMUL_UNHANDLEABLE;
}
@@ -3102,7 +3102,7 @@
(page_get_owner(page) != d) )
{
MEM_LOG("ptwr_emulate: Page is mistyped or bad pte "
- "(%lx, %" PRtype_info ")\n",
+ "(%lx, %" PRtype_info ")",
l1e_get_pfn(pte), page->u.inuse.type_info);
return X86EMUL_UNHANDLEABLE;
}
diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c Tue Aug 23 18:25:51 2005
+++ b/xen/arch/x86/vmx.c Tue Aug 23 18:27:22 2005
@@ -1712,9 +1712,6 @@
default:
__vmx_bug(®s); /* should not happen */
}
-
- vmx_intr_assist(v);
- return;
}
asmlinkage void load_cr2(void)
diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c Tue Aug 23 18:25:51 2005
+++ b/xen/arch/x86/vmx_io.c Tue Aug 23 18:27:22 2005
@@ -631,12 +631,14 @@
return ((eflags & X86_EFLAGS_IF) == 0);
}
-void vmx_intr_assist(struct vcpu *v)
+asmlinkage void vmx_intr_assist(void)
{
int intr_type = 0;
- int highest_vector = find_highest_pending_irq(v, &intr_type);
+ int highest_vector;
unsigned long intr_fields, eflags, interruptibility, cpu_exec_control;
-
+ struct vcpu *v = current;
+
+ highest_vector = find_highest_pending_irq(v, &intr_type);
__vmread(CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control);
if (highest_vector == -1) {
@@ -712,9 +714,6 @@
/* We can't resume the guest if we're waiting on I/O */
ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags));
-
- /* We always check for interrupts before resuming guest */
- vmx_intr_assist(d);
}
#endif /* CONFIG_VMX */
diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S Tue Aug 23 18:25:51 2005
+++ b/xen/arch/x86/x86_32/entry.S Tue Aug 23 18:27:22 2005
@@ -140,6 +140,7 @@
jnz 2f
/* vmx_restore_all_guest */
+ call vmx_intr_assist
call load_cr2
.endif
VMX_RESTORE_ALL_NOSEGREGS
diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Tue Aug 23 18:25:51 2005
+++ b/xen/arch/x86/x86_32/traps.c Tue Aug 23 18:27:22 2005
@@ -1,5 +1,6 @@
#include <xen/config.h>
+#include <xen/domain_page.h>
#include <xen/init.h>
#include <xen/sched.h>
#include <xen/lib.h>
@@ -86,24 +87,33 @@
void show_page_walk(unsigned long addr)
{
- l2_pgentry_t pmd;
- l1_pgentry_t *pte;
-
- if ( addr < PAGE_OFFSET )
- return;
+ unsigned long pfn = read_cr3() >> PAGE_SHIFT;
+ intpte_t *ptab, ent;
printk("Pagetable walk from %08lx:\n", addr);
-
- pmd = idle_pg_table_l2[l2_linear_offset(addr)];
- printk(" L2 = %"PRIpte" %s\n", l2e_get_intpte(pmd),
- (l2e_get_flags(pmd) & _PAGE_PSE) ? "(2/4MB)" : "");
- if ( !(l2e_get_flags(pmd) & _PAGE_PRESENT) ||
- (l2e_get_flags(pmd) & _PAGE_PSE) )
- return;
-
- pte = __va(l2e_get_paddr(pmd));
- pte += l1_table_offset(addr);
- printk(" L1 = %"PRIpte"\n", l1e_get_intpte(*pte));
+
+#ifdef CONFIG_X86_PAE
+ ptab = map_domain_page(pfn);
+ ent = ptab[l3_table_offset(addr)];
+ printk(" L3 = %"PRIpte"\n", ent);
+ unmap_domain_page(ptab);
+ if ( !(ent & _PAGE_PRESENT) )
+ return;
+ pfn = ent >> PAGE_SHIFT;
+#endif
+
+ ptab = map_domain_page(pfn);
+ ent = ptab[l2_table_offset(addr)];
+ printk(" L2 = %"PRIpte" %s\n", ent, (ent & _PAGE_PSE) ? "(PSE)" : "");
+ unmap_domain_page(ptab);
+ if ( !(ent & _PAGE_PRESENT) || (ent & _PAGE_PSE) )
+ return;
+ pfn = ent >> PAGE_SHIFT;
+
+ ptab = map_domain_page(ent >> PAGE_SHIFT);
+ ent = ptab[l2_table_offset(addr)];
+ printk(" L1 = %"PRIpte"\n", ent);
+ unmap_domain_page(ptab);
}
#define DOUBLEFAULT_STACK_SIZE 1024
diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Tue Aug 23 18:25:51 2005
+++ b/xen/arch/x86/x86_64/entry.S Tue Aug 23 18:27:22 2005
@@ -233,6 +233,7 @@
jnz 2f
/* vmx_restore_all_guest */
+ call vmx_intr_assist
call load_cr2
.endif
/*
diff -r 6783e59e1c45 -r 522bc50588ed xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Tue Aug 23 18:25:51 2005
+++ b/xen/include/asm-x86/vmx.h Tue Aug 23 18:27:22 2005
@@ -31,7 +31,7 @@
extern void vmx_asm_vmexit_handler(struct cpu_user_regs);
extern void vmx_asm_do_resume(void);
extern void vmx_asm_do_launch(void);
-extern void vmx_intr_assist(struct vcpu *d);
+extern void vmx_intr_assist(void);
extern void arch_vmx_do_launch(struct vcpu *);
extern void arch_vmx_do_resume(struct vcpu *);
@@ -355,7 +355,7 @@
}
/* Make sure that xen intercepts any FP accesses from current */
-static inline void vmx_stts()
+static inline void vmx_stts(void)
{
unsigned long cr0;
diff -r 6783e59e1c45 -r 522bc50588ed xen/include/public/io/blkif.h
--- a/xen/include/public/io/blkif.h Tue Aug 23 18:25:51 2005
+++ b/xen/include/public/io/blkif.h Tue Aug 23 18:27:22 2005
@@ -58,6 +58,9 @@
#define BLKIF_RSP_ERROR -1 /* non-specific 'error' */
#define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */
+#define BLKIF_MAJOR(dev) ((dev)>>8)
+#define BLKIF_MINOR(dev) ((dev) & 0xff)
+
/*
* Generate blkif ring structures and types.
*/
diff -r 6783e59e1c45 -r 522bc50588ed
linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c Tue Aug 23 18:25:51 2005
+++ /dev/null Tue Aug 23 18:27:22 2005
@@ -1,499 +0,0 @@
-/*
- * arch/x86_64/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm/fixmap.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-
-/*
- * Reuse arch/xen/i396/mm/ioremap.c. Need to merge later
- */
-#ifndef CONFIG_XEN_PHYSDEV_ACCESS
-
-void * __ioremap(unsigned long phys_addr, unsigned long size,
- unsigned long flags)
-{
- return NULL;
-}
-
-void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
- return NULL;
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-}
-
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
- return NULL;
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
-}
-
-#else
-
-#if defined(__i386__)
-/*
- * Does @address reside within a non-highmem page that is local to this virtual
- * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
- * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand
- * why this works.
- */
-static inline int is_local_lowmem(unsigned long address)
-{
- extern unsigned long max_low_pfn;
- unsigned long mfn = address >> PAGE_SHIFT;
- unsigned long pfn = mfn_to_pfn(mfn);
- return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
-}
-#elif defined(__x86_64__)
-/*
- *
- */
-static inline int is_local_lowmem(unsigned long address)
-{
- return 0;
-}
-#endif
-
-/*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned
long flags)
-{
- void __iomem * addr;
- struct vm_struct * area;
- unsigned long offset, last_addr;
- domid_t domid = DOMID_IO;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (phys_addr >= 0x0 && last_addr < 0x100000)
- return isa_bus_to_virt(phys_addr);
-#endif
-
- /*
- * Don't allow anybody to remap normal RAM that we're using..
- */
- if (is_local_lowmem(phys_addr)) {
- char *t_addr, *t_end;
- struct page *page;
-
- t_addr = bus_to_virt(phys_addr);
- t_end = t_addr + (size - 1);
-
- for(page = virt_to_page(t_addr); page <= virt_to_page(t_end);
page++)
- if(!PageReserved(page))
- return NULL;
-
- domid = DOMID_LOCAL;
- }
-
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
- /*
- * Ok, go for it..
- */
- area = get_vm_area(size, VM_IOREMAP | (flags << 20));
- if (!area)
- return NULL;
- area->phys_addr = phys_addr;
- addr = (void __iomem *) area->addr;
- if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr,
- size, __pgprot(_PAGE_PRESENT | _PAGE_RW |
- _PAGE_DIRTY | _PAGE_ACCESSED
-#if defined(__x86_64__)
- | _PAGE_USER
-#endif
- | flags), domid)) {
- vunmap((void __force *) addr);
- return NULL;
- }
- return (void __iomem *) (offset + (char __iomem *)addr);
-}
-
-
-/**
- * ioremap_nocache - map bus memory into CPU space
- * @offset: bus address of the memory
- * @size: size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- *
- * Must be freed with iounmap.
- */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
- unsigned long last_addr;
- void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
- if (!p)
- return p;
-
- /* Guaranteed to be > phys_addr, as per __ioremap() */
- last_addr = phys_addr + size - 1;
-
- if (is_local_lowmem(last_addr)) {
- struct page *ppage = virt_to_page(bus_to_virt(phys_addr));
- unsigned long npages;
-
- phys_addr &= PAGE_MASK;
-
- /* This might overflow and become zero.. */
- last_addr = PAGE_ALIGN(last_addr);
-
- /* .. but that's ok, because modulo-2**n arithmetic will make
- * the page-aligned "last - first" come out right.
- */
- npages = (last_addr - phys_addr) >> PAGE_SHIFT;
-
- if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) {
- iounmap(p);
- p = NULL;
- }
- global_flush_tlb();
- }
-
- return p;
-}
-
-void iounmap(volatile void __iomem *addr)
-{
- struct vm_struct *p;
- if ((void __force *) addr <= high_memory)
- return;
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
- return;
-#endif
- p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
- if (!p) {
- printk("__iounmap: bad address %p\n", addr);
- return;
- }
-
- if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
- /* p->size includes the guard page, but cpa doesn't like that */
- change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
- (p->size - PAGE_SIZE) >> PAGE_SHIFT,
- PAGE_KERNEL);
- global_flush_tlb();
- }
- kfree(p);
-}
-
-#if defined(__i386__)
-void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
-{
- unsigned long offset, last_addr;
- unsigned int nrpages;
- enum fixed_addresses idx;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (phys_addr >= 0x0 && last_addr < 0x100000)
- return isa_bus_to_virt(phys_addr);
-#endif
-
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr) - phys_addr;
-
- /*
- * Mappings have to fit in the FIX_BTMAP area.
- */
- nrpages = size >> PAGE_SHIFT;
- if (nrpages > NR_FIX_BTMAPS)
- return NULL;
-
- /*
- * Ok, go for it..
- */
- idx = FIX_BTMAP_BEGIN;
- while (nrpages > 0) {
- set_fixmap(idx, phys_addr);
- phys_addr += PAGE_SIZE;
- --idx;
- --nrpages;
- }
- return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
-}
-
-void __init bt_iounmap(void *addr, unsigned long size)
-{
- unsigned long virt_addr;
- unsigned long offset;
- unsigned int nrpages;
- enum fixed_addresses idx;
-
- virt_addr = (unsigned long)addr;
- if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
- return;
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
- return;
-#endif
- offset = virt_addr & ~PAGE_MASK;
- nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
-
- idx = FIX_BTMAP_BEGIN;
- while (nrpages > 0) {
- clear_fixmap(idx);
- --idx;
- --nrpages;
- }
-}
-#endif /* defined(__i386__) */
-
-#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
-
-/* These hacky macros avoid phys->machine translations. */
-#define __direct_pte(x) ((pte_t) { (x) } )
-#define __direct_mk_pte(page_nr,pgprot) \
- __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
-#define direct_mk_pte_phys(physpage, pgprot) \
- __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
-
-static inline void direct_remap_area_pte(pte_t *pte,
- unsigned long address,
- unsigned long size,
- mmu_update_t **v)
-{
- unsigned long end;
-
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
- if (address >= end)
- BUG();
-
- do {
- (*v)->ptr = virt_to_machine(pte);
- (*v)++;
- address += PAGE_SIZE;
- pte++;
- } while (address && (address < end));
-}
-
-static inline int direct_remap_area_pmd(struct mm_struct *mm,
- pmd_t *pmd,
- unsigned long address,
- unsigned long size,
- mmu_update_t **v)
-{
- unsigned long end;
-
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- if (address >= end)
- BUG();
- do {
- pte_t *pte = (mm == &init_mm) ?
- pte_alloc_kernel(mm, pmd, address) :
- pte_alloc_map(mm, pmd, address);
- if (!pte)
- return -ENOMEM;
- direct_remap_area_pte(pte, address, end - address, v);
- pte_unmap(pte);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
- return 0;
-}
-
-int __direct_remap_area_pages(struct mm_struct *mm,
- unsigned long address,
- unsigned long size,
- mmu_update_t *v)
-{
- pgd_t * dir;
- unsigned long end = address + size;
- int error;
-
-#if defined(__i386__)
- dir = pgd_offset(mm, address);
-#elif defined (__x86_64)
- dir = (mm == &init_mm) ?
- pgd_offset_k(address):
- pgd_offset(mm, address);
-#endif
- if (address >= end)
- BUG();
- spin_lock(&mm->page_table_lock);
- do {
- pud_t *pud;
- pmd_t *pmd;
-
- error = -ENOMEM;
- pud = pud_alloc(mm, dir, address);
- if (!pud)
- break;
- pmd = pmd_alloc(mm, pud, address);
- if (!pmd)
- break;
- error = 0;
- direct_remap_area_pmd(mm, pmd, address, end - address, &v);
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- dir++;
-
- } while (address && (address < end));
- spin_unlock(&mm->page_table_lock);
- return error;
-}
-
-
-int direct_remap_area_pages(struct mm_struct *mm,
- unsigned long address,
- unsigned long machine_addr,
- unsigned long size,
- pgprot_t prot,
- domid_t domid)
-{
- int i;
- unsigned long start_address;
-#define MAX_DIRECTMAP_MMU_QUEUE 130
- mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
-
- start_address = address;
-
- flush_cache_all();
-
- for (i = 0; i < size; i += PAGE_SIZE) {
- if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
- /* Fill in the PTE pointers. */
- __direct_remap_area_pages(mm,
- start_address,
- address-start_address,
- u);
-
- if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
- return -EFAULT;
- v = u;
- start_address = address;
- }
-
- /*
- * Fill in the machine address: PTE ptr is done later by
- * __direct_remap_area_pages().
- */
- v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
-
- machine_addr += PAGE_SIZE;
- address += PAGE_SIZE;
- v++;
- }
-
- if (v != u) {
- /* get the ptep's filled in */
- __direct_remap_area_pages(mm,
- start_address,
- address-start_address,
- u);
- if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
- return -EFAULT;
- }
-
- flush_tlb_all();
-
- return 0;
-}
-
-EXPORT_SYMBOL(direct_remap_area_pages);
-
-static int lookup_pte_fn(
- pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
-{
- unsigned long *ptep = (unsigned long *)data;
- if (ptep) *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << PAGE_SHIFT)
- | ((unsigned long)pte & ~PAGE_MASK);
- return 0;
-}
-
-int create_lookup_pte_addr(struct mm_struct *mm,
- unsigned long address,
- unsigned long *ptep)
-{
- return generic_page_range(mm, address, PAGE_SIZE, lookup_pte_fn, ptep);
-}
-
-EXPORT_SYMBOL(create_lookup_pte_addr);
-
-static int noop_fn(
- pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
-{
- return 0;
-}
-
-int touch_pte_range(struct mm_struct *mm,
- unsigned long address,
- unsigned long size)
-{
- return generic_page_range(mm, address, size, noop_fn, NULL);
-}
-
-EXPORT_SYMBOL(touch_pte_range);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|