# HG changeset patch
# User cl349@xxxxxxxxxxxxxxxxxxxx
# Node ID 0255f48b757fc4a69846356e8f42e9a4ed410c8c
# Parent 64cd054aa1432b44c66e72c0c0179827aa5772a9
Temporarily remove NetBSD and FreeBSD sparse trees to avoid user confusion.
The NetBSD and FreeBSD currently don't build against the final Xen 3.0 API.
Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>
diff -r 64cd054aa143 -r 0255f48b757f buildconfigs/mk.netbsd-2.0-xenU
--- a/buildconfigs/mk.netbsd-2.0-xenU Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,45 +0,0 @@
-
-OS = netbsd
-
-NETBSD_RELEASE ?= 2.0
-NETBSD_CVSSNAP ?= 20050309
-
-EXTRAVERSION = xenU
-
-FULLVERSION = $(NETBSD_VER)-$(EXTRAVERSION)
-
-NETBSD_DIR = $(OS)-$(FULLVERSION)
-
-.PHONY: build clean mrproper mkpatch
-
-include buildconfigs/Rules.mk
-
-build: $(OS)-$(EXTRAVERSION)
-
-netbsd-%-tools.tar.bz2:
- @echo "Cannot find netbsd-$(NETBSD_VER)-tools.tar.gz in path
$(NETBSD_SRC_PATH)"
- wget
http://www.cl.cam.ac.uk/Research/SRG/netos/xen/downloads/netbsd-$*-tools.tar.bz2
-O./$@
-
-netbsd-%-tools: netbsd-%-tools.tar.bz2
- tar -jxf $<
- touch $@ # update timestamp to avoid rebuild
-
-$(NETBSD_DIR)/.valid: ref-$(OS)-$(NETBSD_VER)/.valid-ref
- $(RM) -rf $(NETBSD_DIR)
- cp -al $(<D) $(NETBSD_DIR)
- # Apply arch-xen patches
- ( cd netbsd-$(NETBSD_VER)-xen-sparse ; \
- ./mkbuildtree ../$(NETBSD_DIR) )
- @touch $(NETBSD_DIR)/.valid
-
-# build the specified netbsd tree
-netbsd-xen%: $(NETBSD_DIR)/.valid netbsd-$(NETBSD_RELEASE)-tools
- $(MAKE) -C netbsd-$(FULLVERSION) config
- $(MAKE) -C netbsd-$(FULLVERSION) netbsd
- $(MAKE) -C netbsd-$(FULLVERSION) INSTALL_PATH=$(DESTDIR)
INSTALL_NAME=boot/netbsd-$(NETBSD_VER)-xen$* install
-
-clean::
- $(MAKE) -C netbsd-$(FULLVERSION) clean
-
-delete:
- rm -rf tmp-$(OS)-$(NETBSD_VER) $(NETBSD_DIR)
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/conf/Makefile.i386-xen
--- a/freebsd-5.3-xen-sparse/conf/Makefile.i386-xen Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,51 +0,0 @@
-# Makefile.i386 -- with config changes.
-# Copyright 1990 W. Jolitz
-# from: @(#)Makefile.i386 7.1 5/10/91
-# $FreeBSD: src/sys/conf/Makefile.i386,v 1.259 2003/04/15 21:29:11 phk Exp $
-#
-# Makefile for FreeBSD
-#
-# This makefile is constructed from a machine description:
-# config machineid
-# Most changes should be made in the machine description
-# /sys/i386/conf/``machineid''
-# after which you should do
-# config machineid
-# Generic makefile changes should be made in
-# /sys/conf/Makefile.i386
-# after which config should be rerun for all machines.
-#
-
-# Which version of config(8) is required.
-%VERSREQ= 500013
-
-STD8X16FONT?= iso
-
-
-
-.if !defined(S)
-.if exists(./@/.)
-S= ./@
-.else
-S= ../../..
-.endif
-.endif
-.include "$S/conf/kern.pre.mk"
-M= i386-xen
-MKMODULESENV+= MACHINE=i386-xen
-INCLUDES+= -I../../include/xen-public
-%BEFORE_DEPEND
-
-%OBJS
-
-%FILES.c
-
-%FILES.s
-
-%FILES.m
-
-%CLEAN
-
-%RULES
-
-.include "$S/conf/kern.post.mk"
diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/conf/files.i386-xen
--- a/freebsd-5.3-xen-sparse/conf/files.i386-xen Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,295 +0,0 @@
-# This file tells config what files go into building a kernel,
-# files marked standard are always included.
-#
-# $FreeBSD: src/sys/conf/files.i386,v 1.457 2003/12/03 23:06:30 imp Exp $
-#
-# The long compile-with and dependency lines are required because of
-# limitations in config: backslash-newline doesn't work in strings, and
-# dependency lines other than the first are silently ignored.
-#
-linux_genassym.o optional compat_linux \
- dependency "$S/i386/linux/linux_genassym.c" \
- compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \
- no-obj no-implicit-rule \
- clean "linux_genassym.o"
-#
-linux_assym.h optional compat_linux \
- dependency "$S/kern/genassym.sh linux_genassym.o" \
- compile-with "sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \
- no-obj no-implicit-rule before-depend \
- clean "linux_assym.h"
-#
-svr4_genassym.o optional compat_svr4
\
- dependency "$S/i386/svr4/svr4_genassym.c" \
- compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \
- no-obj no-implicit-rule \
- clean "svr4_genassym.o"
-#
-svr4_assym.h optional compat_svr4 \
- dependency "$S/kern/genassym.sh svr4_genassym.o" \
- compile-with "sh $S/kern/genassym.sh svr4_genassym.o > ${.TARGET}" \
- no-obj no-implicit-rule before-depend \
- clean "svr4_assym.h"
-#
-font.h optional sc_dflt_font \
- compile-with "uudecode <
/usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char
dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode <
/usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char
dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode <
/usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char
dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h"
\
- no-obj no-implicit-rule before-depend \
- clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16
${SC_DFLT_FONT}-8x8"
-#
-atkbdmap.h optional atkbd_dflt_keymap \
- compile-with "/usr/sbin/kbdcontrol -L ${ATKBD_DFLT_KEYMAP} | sed -e
's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static
accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h"
\
- no-obj no-implicit-rule before-depend \
- clean "atkbdmap.h"
-#
-ukbdmap.h optional ukbd_dflt_keymap \
- compile-with "/usr/sbin/kbdcontrol -L ${UKBD_DFLT_KEYMAP} | sed -e
's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static
accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h"
\
- no-obj no-implicit-rule before-depend \
- clean "ukbdmap.h"
-#
-msysosak.o optional fla \
- dependency "$S/contrib/dev/fla/i386/msysosak.o.uu" \
- compile-with "uudecode < $S/contrib/dev/fla/i386/msysosak.o.uu" \
- no-implicit-rule
-#
-trlld.o optional oltr
\
- dependency "$S/contrib/dev/oltr/i386-elf.trlld.o.uu" \
- compile-with "uudecode < $S/contrib/dev/oltr/i386-elf.trlld.o.uu"
\
- no-implicit-rule
-#
-hal.o optional ath_hal \
- dependency "$S/contrib/dev/ath/freebsd/i386-elf.hal.o.uu" \
- compile-with "uudecode <
$S/contrib/dev/ath/freebsd/i386-elf.hal.o.uu" \
- no-implicit-rule
-#
-#
-compat/linux/linux_file.c optional compat_linux
-compat/linux/linux_getcwd.c optional compat_linux
-compat/linux/linux_ioctl.c optional compat_linux
-compat/linux/linux_ipc.c optional compat_linux
-compat/linux/linux_mib.c optional compat_linux
-compat/linux/linux_misc.c optional compat_linux
-compat/linux/linux_signal.c optional compat_linux
-compat/linux/linux_socket.c optional compat_linux
-compat/linux/linux_stats.c optional compat_linux
-compat/linux/linux_sysctl.c optional compat_linux
-compat/linux/linux_uid16.c optional compat_linux
-compat/linux/linux_util.c optional compat_linux
-compat/pecoff/imgact_pecoff.c optional pecoff_support
-compat/svr4/imgact_svr4.c optional compat_svr4
-compat/svr4/svr4_fcntl.c optional compat_svr4
-compat/svr4/svr4_filio.c optional compat_svr4
-compat/svr4/svr4_ioctl.c optional compat_svr4
-compat/svr4/svr4_ipc.c optional compat_svr4
-compat/svr4/svr4_misc.c optional compat_svr4
-compat/svr4/svr4_resource.c optional compat_svr4
-compat/svr4/svr4_signal.c optional compat_svr4
-compat/svr4/svr4_socket.c optional compat_svr4
-compat/svr4/svr4_sockio.c optional compat_svr4
-compat/svr4/svr4_stat.c optional compat_svr4
-compat/svr4/svr4_stream.c optional compat_svr4
-compat/svr4/svr4_syscallnames.c optional compat_svr4
-compat/svr4/svr4_sysent.c optional compat_svr4
-compat/svr4/svr4_sysvec.c optional compat_svr4
-compat/svr4/svr4_termios.c optional compat_svr4
-compat/svr4/svr4_ttold.c optional compat_svr4
-contrib/dev/fla/fla.c optional fla
-contrib/dev/oltr/if_oltr.c optional oltr
-contrib/dev/oltr/trlldbm.c optional oltr
-contrib/dev/oltr/trlldhm.c optional oltr
-contrib/dev/oltr/trlldmac.c optional oltr
-bf_enc.o optional ipsec ipsec_esp \
- dependency "$S/crypto/blowfish/arch/i386/bf_enc.S
$S/crypto/blowfish/arch/i386/bf_enc_586.S
$S/crypto/blowfish/arch/i386/bf_enc_686.S" \
- compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS}
${WERROR} ${.IMPSRC}" \
- no-implicit-rule
-crypto/des/arch/i386/des_enc.S optional ipsec ipsec_esp
-crypto/des/des_ecb.c optional netsmbcrypto
-crypto/des/arch/i386/des_enc.S optional netsmbcrypto
-crypto/des/des_setkey.c optional netsmbcrypto
-bf_enc.o optional crypto \
- dependency "$S/crypto/blowfish/arch/i386/bf_enc.S
$S/crypto/blowfish/arch/i386/bf_enc_586.S
$S/crypto/blowfish/arch/i386/bf_enc_686.S" \
- compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS}
${WERROR} ${.IMPSRC}" \
- no-implicit-rule
-crypto/des/arch/i386/des_enc.S optional crypto
-crypto/des/des_ecb.c optional crypto
-crypto/des/des_setkey.c optional crypto
-dev/ar/if_ar.c optional ar
-dev/ar/if_ar_pci.c optional ar pci
-dev/cx/csigma.c optional cx
-dev/cx/cxddk.c optional cx
-dev/cx/if_cx.c optional cx
-dev/dgb/dgb.c count dgb
-dev/fb/fb.c optional fb
-dev/fb/fb.c optional vga
-dev/fb/splash.c optional splash
-dev/fb/vga.c optional vga
-dev/kbd/atkbd.c optional atkbd
-dev/kbd/atkbdc.c optional atkbdc
-dev/kbd/kbd.c optional atkbd
-dev/kbd/kbd.c optional kbd
-dev/kbd/kbd.c optional sc
-dev/kbd/kbd.c optional ukbd
-dev/kbd/kbd.c optional vt
-dev/mem/memutil.c standard
-dev/random/nehemiah.c standard
-dev/ppc/ppc.c optional ppc
-dev/ppc/ppc_puc.c optional ppc puc pci
-dev/sio/sio.c optional sio
-dev/sio/sio_isa.c optional sio isa
-dev/syscons/schistory.c optional sc
-dev/syscons/scmouse.c optional sc
-dev/syscons/scterm.c optional sc
-dev/syscons/scterm-dumb.c optional sc
-dev/syscons/scterm-sc.c optional sc
-dev/syscons/scvesactl.c optional sc vga vesa
-dev/syscons/scvgarndr.c optional sc vga
-dev/syscons/scvidctl.c optional sc
-dev/syscons/scvtb.c optional sc
-dev/syscons/syscons.c optional sc
-dev/syscons/sysmouse.c optional sc
-dev/uart/uart_cpu_i386.c optional uart
-geom/geom_bsd.c standard
-geom/geom_bsd_enc.c standard
-geom/geom_mbr.c standard
-geom/geom_mbr_enc.c standard
-i386/acpica/OsdEnvironment.c optional acpi
-i386/acpica/acpi_machdep.c optional acpi
-i386/acpica/acpi_wakeup.c optional acpi
-acpi_wakecode.h optional acpi
\
- dependency "$S/i386/acpica/acpi_wakecode.S" \
- compile-with "${MAKE} -f $S/i386/acpica/Makefile
MAKESRCPATH=$S/i386/acpica" \
- no-obj no-implicit-rule before-depend \
- clean "acpi_wakecode.h acpi_wakecode.o acpi_wakecode.bin"
-#
-i386/acpica/madt.c optional acpi apic
-i386/bios/mca_machdep.c optional mca
-i386/bios/smapi.c optional smapi
-i386/bios/smapi_bios.S optional smapi
-i386/bios/smbios.c optional smbios
-i386/bios/vpd.c optional vpd
-i386/i386/apic_vector.s optional apic
-i386/i386/atomic.c standard \
- compile-with "${CC} -c ${CFLAGS}
${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}"
-i386/i386/autoconf.c standard
-i386/i386/busdma_machdep.c standard
-i386-xen/i386-xen/critical.c standard
-i386/i386/db_disasm.c optional ddb
-i386-xen/i386-xen/db_interface.c optional ddb
-i386/i386/db_trace.c optional ddb
-i386/i386/i386-gdbstub.c optional ddb
-i386/i386/dump_machdep.c standard
-i386/i386/elf_machdep.c standard
-i386-xen/i386-xen/exception.s standard
-i386-xen/i386-xen/i686_mem.c standard
-i386/i386/identcpu.c standard
-i386/i386/in_cksum.c optional inet
-i386-xen/i386-xen/initcpu.c standard
-i386-xen/i386-xen/intr_machdep.c standard
-i386-xen/i386-xen/io_apic.c optional apic
-i386/i386/legacy.c standard
-i386-xen/i386-xen/locore.s standard no-obj
-i386-xen/i386-xen/machdep.c standard
-i386/i386/mem.c standard
-i386-xen/i386-xen/mp_clock.c optional smp
-i386-xen/i386-xen/mp_machdep.c optional smp
-i386/i386/mpboot.s optional smp
-i386-xen/i386-xen/mptable.c optional apic
-i386-xen/i386-xen/local_apic.c optional apic
-i386/i386/mptable_pci.c optional apic pci
-i386/i386/nexus.c standard
-i386/i386/uio_machdep.c standard
-i386/i386/perfmon.c optional perfmon
-i386/i386/perfmon.c optional perfmon profiling-routine
-i386-xen/i386-xen/pmap.c standard
-i386-xen/i386-xen/support.s standard
-i386-xen/i386-xen/swtch.s standard
-i386-xen/i386-xen/sys_machdep.c standard
-i386-xen/i386-xen/trap.c standard
-i386/i386/tsc.c standard
-i386-xen/i386-xen/vm_machdep.c standard
-i386-xen/i386-xen/clock.c standard
-
-# xen specific arch-dep files
-i386-xen/i386-xen/hypervisor.c standard
-i386-xen/i386-xen/xen_machdep.c standard
-i386-xen/i386-xen/xen_bus.c standard
-i386-xen/i386-xen/evtchn.c standard
-i386-xen/i386-xen/ctrl_if.c standard
-i386-xen/i386-xen/gnttab.c standard
-
-
-i386/isa/asc.c count asc
-i386/isa/ctx.c optional ctx
-i386/isa/cy.c count cy
-i386/isa/elink.c optional ep
-i386/isa/elink.c optional ie
-i386/isa/gpib.c optional gp
-i386/isa/gsc.c count gsc
-i386/isa/istallion.c optional stli nowerror
-i386/isa/loran.c optional loran
-i386/isa/mse.c optional mse
-i386/isa/nmi.c standard
-
-# drivers
-i386-xen/xen/misc/npx.c optional npx
-i386-xen/xen/misc/evtchn_dev.c standard
-i386-xen/xen/char/console.c standard
-i386-xen/xen/netfront/xn_netfront.c standard
-i386-xen/xen/blkfront/xb_blkfront.c standard
-
-
-
-i386/isa/pcf.c optional pcf
-i386/isa/pcvt/pcvt_drv.c optional vt
-i386/isa/pcvt/pcvt_ext.c optional vt
-i386/isa/pcvt/pcvt_kbd.c optional vt
-i386/isa/pcvt/pcvt_out.c optional vt
-i386/isa/pcvt/pcvt_sup.c optional vt
-i386/isa/pcvt/pcvt_vtf.c optional vt
-i386/isa/pmtimer.c optional pmtimer
-i386/isa/prof_machdep.c optional profiling-routine
-i386/isa/spic.c optional spic
-i386/isa/spigot.c count spigot
-i386/isa/spkr.c optional speaker
-i386/isa/stallion.c optional stl nowerror
-i386/isa/vesa.c optional vga vesa
-i386/isa/wt.c count wt
-i386/linux/imgact_linux.c optional compat_linux
-i386/linux/linux_dummy.c optional compat_linux
-i386/linux/linux_locore.s optional compat_linux \
- dependency "linux_assym.h"
-i386/linux/linux_machdep.c optional compat_linux
-i386/linux/linux_ptrace.c optional compat_linux
-i386/linux/linux_sysent.c optional compat_linux
-i386/linux/linux_sysvec.c optional compat_linux
-i386/pci/pci_cfgreg.c optional pci
-i386/pci/pci_bus.c optional pci
-i386/svr4/svr4_locore.s optional compat_svr4
\
- dependency "svr4_assym.h" \
- warning "COMPAT_SVR4 is broken and should be avoided"
-i386/svr4/svr4_machdep.c optional compat_svr4
-isa/atkbd_isa.c optional atkbd
-isa/atkbdc_isa.c optional atkbdc
-isa/fd.c optional fdc
-isa/psm.c optional psm
-isa/syscons_isa.c optional sc
-isa/vga_isa.c optional vga
-kern/imgact_aout.c optional compat_aout
-kern/imgact_gzip.c optional gzip
-libkern/divdi3.c standard
-libkern/moddi3.c standard
-libkern/qdivrem.c standard
-libkern/ucmpdi2.c standard
-libkern/udivdi3.c standard
-libkern/umoddi3.c standard
-libkern/flsl.c standard
-libkern/ffsl.c standard
-
-pci/cy_pci.c optional cy pci
-pci/agp_intel.c optional agp
-pci/agp_via.c optional agp
-pci/agp_sis.c optional agp
-pci/agp_ali.c optional agp
-pci/agp_amd.c optional agp
-pci/agp_i810.c optional agp
-pci/agp_nvidia.c optional agp
-
diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/conf/kern.mk
--- a/freebsd-5.3-xen-sparse/conf/kern.mk Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,100 +0,0 @@
-# $FreeBSD: src/sys/conf/kern.mk,v 1.42 2004/05/14 13:35:46 cognet Exp $
-
-#
-# Warning flags for compiling the kernel and components of the kernel.
-#
-# Note that the newly added -Wcast-qual is responsible for generating
-# most of the remaining warnings. Warnings introduced with -Wall will
-# also pop up, but are easier to fix.
-.if ${CC} == "icc"
-#CWARNFLAGS= -w2 # use this if you are terribly bored
-CWARNFLAGS=
-.else
-CWARNFLAGS?= -Wall -Wredundant-decls -Wnested-externs -Wstrict-prototypes \
- -Wmissing-prototypes -Wpointer-arith -Winline -Wcast-qual \
- -fformat-extensions
-.endif
-# -std=c99 anonymous unions are non-compliant
-#
-# The following flags are next up for working on:
-# -W
-
-#
-# On the i386, do not align the stack to 16-byte boundaries. Otherwise GCC
-# 2.95 adds code to the entry and exit point of every function to align the
-# stack to 16-byte boundaries -- thus wasting approximately 12 bytes of stack
-# per function call. While the 16-byte alignment may benefit micro
benchmarks,
-# it is probably an overall loss as it makes the code bigger (less efficient
-# use of code cache tag lines) and uses more stack (less efficient use of data
-# cache tag lines)
-#
-.if ${MACHINE_ARCH} == "i386" && ${CC} != "icc"
-CFLAGS+= -mno-align-long-strings -mpreferred-stack-boundary=2
-INLINE_LIMIT?= 8000
-.endif
-
-#
-# On the alpha, make sure that we don't use floating-point registers and
-# allow the use of BWX etc instructions (only needed for low-level i/o).
-# Also, reserve register t7 to point at per-cpu global variables.
-#
-.if ${MACHINE_ARCH} == "alpha"
-CFLAGS+= -mno-fp-regs -ffixed-8 -Wa,-mev6
-INLINE_LIMIT?= 15000
-.endif
-
-.if ${MACHINE_ARCH} == "arm"
-INLINE_LIMIT?= 8000
-.endif
-#
-# For IA-64, we use r13 for the kernel globals pointer and we only use
-# a very small subset of float registers for integer divides.
-#
-.if ${MACHINE_ARCH} == "ia64"
-CFLAGS+= -ffixed-r13 -mfixed-range=f32-f127 -mno-sdata
-INLINE_LIMIT?= 15000
-.endif
-
-#
-# For sparc64 we want medlow code model, and we tell gcc to use floating
-# point emulation. This avoids using floating point registers for integer
-# operations which it has a tendency to do.
-#
-.if ${MACHINE_ARCH} == "sparc64"
-CFLAGS+= -mcmodel=medlow -msoft-float
-INLINE_LIMIT?= 15000
-.endif
-
-#
-# For AMD64, use a medium model for now. We'll switch to "kernel"
-# once pmap is ready. Be excessively careful to not generate FPU code.
-#
-.if ${MACHINE_ARCH} == "amd64"
-CFLAGS+= -mcmodel=kernel -mno-red-zone \
- -mfpmath=387 -mno-sse -mno-sse2 -mno-mmx -mno-3dnow \
- -msoft-float -fno-asynchronous-unwind-tables
-INLINE_LIMIT?= 8000
-.endif
-
-#
-# For PowerPC we tell gcc to use floating point emulation. This avoids using
-# floating point registers for integer operations which it has a tendency to
do.
-#
-.if ${MACHINE_ARCH} == "powerpc"
-CFLAGS+= -msoft-float
-INLINE_LIMIT?= 15000
-.endif
-
-#
-# GCC 3.0 and above like to do certain optimizations based on the
-# assumption that the program is linked against libc. Stop this.
-#
-.if ${CC} == "icc"
-CFLAGS+= -nolib_inline
-.else
-CFLAGS+= -ffreestanding
-.endif
-
-.if ${CC} == "icc"
-CFLAGS+= -restrict
-.endif
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/conf/ldscript.i386-xen
--- a/freebsd-5.3-xen-sparse/conf/ldscript.i386-xen Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,134 +0,0 @@
-/* $FreeBSD: src/sys/conf/ldscript.i386,v 1.9 2003/12/03 07:40:03 phk Exp $ */
-OUTPUT_FORMAT("elf32-i386-freebsd", "elf32-i386-freebsd", "elf32-i386-freebsd")
-OUTPUT_ARCH(i386)
-ENTRY(btext)
-SEARCH_DIR(/usr/lib);
-SECTIONS
-{
- /* Read-only sections, merged into text segment: */
- . = kernbase + SIZEOF_HEADERS;
- .interp : { *(.interp) }
- .hash : { *(.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
- .rel.text :
- { *(.rel.text) *(.rel.gnu.linkonce.t*) }
- .rela.text :
- { *(.rela.text) *(.rela.gnu.linkonce.t*) }
- .rel.data :
- { *(.rel.data) *(.rel.gnu.linkonce.d*) }
- .rela.data :
- { *(.rela.data) *(.rela.gnu.linkonce.d*) }
- .rel.rodata :
- { *(.rel.rodata) *(.rel.gnu.linkonce.r*) }
- .rela.rodata :
- { *(.rela.rodata) *(.rela.gnu.linkonce.r*) }
- .rel.got : { *(.rel.got) }
- .rela.got : { *(.rela.got) }
- .rel.ctors : { *(.rel.ctors) }
- .rela.ctors : { *(.rela.ctors) }
- .rel.dtors : { *(.rel.dtors) }
- .rela.dtors : { *(.rela.dtors) }
- .rel.init : { *(.rel.init) }
- .rela.init : { *(.rela.init) }
- .rel.fini : { *(.rel.fini) }
- .rela.fini : { *(.rela.fini) }
- .rel.bss : { *(.rel.bss) }
- .rela.bss : { *(.rela.bss) }
- .rel.plt : { *(.rel.plt) }
- .rela.plt : { *(.rela.plt) }
- .init : { *(.init) } =0x9090
- .plt : { *(.plt) }
- .text :
- {
- *(.text)
- *(.stub)
- /* .gnu.warning sections are handled specially by elf32.em. */
- *(.gnu.warning)
- *(.gnu.linkonce.t*)
- } =0x9090
- _etext = .;
- PROVIDE (etext = .);
- .fini : { *(.fini) } =0x9090
- .rodata : { *(.rodata) *(.gnu.linkonce.r*) }
- .rodata1 : { *(.rodata1) }
- /* Adjust the address for the data segment. We want to adjust up to
- the same address within the page on the next page up. */
- . = ALIGN(0x1000) + (. & (0x1000 - 1)) ;
- .data :
- {
- *(.data)
- *(.gnu.linkonce.d*)
- CONSTRUCTORS
- }
- .data1 : { *(.data1) }
- . = ALIGN(32 / 8);
- _start_ctors = .;
- PROVIDE (start_ctors = .);
- .ctors :
- {
- *(.ctors)
- }
- _stop_ctors = .;
- PROVIDE (stop_ctors = .);
- .dtors :
- {
- *(.dtors)
- }
- .got : { *(.got.plt) *(.got) }
- .dynamic : { *(.dynamic) }
- /* We want the small data sections together, so single-instruction offsets
- can access them all, and initialized data all before uninitialized, so
- we can shorten the on-disk segment size. */
- .sdata : { *(.sdata) }
- _edata = .;
- PROVIDE (edata = .);
- __bss_start = .;
- .sbss : { *(.sbss) *(.scommon) }
- .bss :
- {
- *(.dynbss)
- *(.bss)
- *(COMMON)
- }
- . = ALIGN(32 / 8);
- _end = . ;
- PROVIDE (end = .);
- /* Stabs debugging sections. */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
- /* DWARF debug sections.
- Symbols in the DWARF debugging sections are relative to the beginning
- of the section so we begin them at 0. */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
- /* These must appear regardless of . */
-}
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/conf/options.i386-xen
--- a/freebsd-5.3-xen-sparse/conf/options.i386-xen Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,162 +0,0 @@
-# $FreeBSD: src/sys/conf/options.i386,v 1.204 2003/12/03 23:06:30 imp Exp $
-# Options specific to the i386 platform kernels
-
-AUTO_EOI_1 opt_auto_eoi.h
-AUTO_EOI_2 opt_auto_eoi.h
-BROKEN_KEYBOARD_RESET opt_reset.h
-COMPAT_OLDISA
-I586_PMC_GUPROF opt_i586_guprof.h
-MAXMEM
-MPTABLE_FORCE_HTT
-NO_MIXED_MODE
-PERFMON
-DISABLE_PSE opt_pmap.h
-DISABLE_PG_G opt_pmap.h
-PMAP_SHPGPERPROC opt_pmap.h
-PPC_PROBE_CHIPSET opt_ppc.h
-PPC_DEBUG opt_ppc.h
-POWERFAIL_NMI opt_trap.h
-MP_WATCHDOG opt_mp_watchdog.h
-
-
-
-# Options for emulators. These should only be used at config time, so
-# they are handled like options for static filesystems
-# (see src/sys/conf/options), except for broken debugging options.
-COMPAT_AOUT opt_dontuse.h
-IBCS2 opt_dontuse.h
-COMPAT_LINUX opt_dontuse.h
-COMPAT_SVR4 opt_dontuse.h
-DEBUG_SVR4 opt_svr4.h
-PECOFF_SUPPORT opt_dontuse.h
-PECOFF_DEBUG opt_pecoff.h
-
-# Change KVM size. Changes things all over the kernel.
-KVA_PAGES opt_global.h
-XEN opt_global.h
-XENDEV opt_xen.h
-NOXENDEBUG opt_xen.h
-# Physical address extensions and support for >4G ram. As above.
-PAE opt_global.h
-
-CLK_CALIBRATION_LOOP opt_clock.h
-CLK_USE_I8254_CALIBRATION opt_clock.h
-CLK_USE_TSC_CALIBRATION opt_clock.h
-TIMER_FREQ opt_clock.h
-
-CPU_ATHLON_SSE_HACK opt_cpu.h
-CPU_BLUELIGHTNING_3X opt_cpu.h
-CPU_BLUELIGHTNING_FPU_OP_CACHE opt_cpu.h
-CPU_BTB_EN opt_cpu.h
-CPU_CYRIX_NO_LOCK opt_cpu.h
-CPU_DIRECT_MAPPED_CACHE opt_cpu.h
-CPU_DISABLE_5X86_LSSER opt_cpu.h
-CPU_DISABLE_CMPXCHG opt_global.h # XXX global, unlike other CPU_*
-CPU_DISABLE_SSE opt_cpu.h
-CPU_ELAN opt_cpu.h
-CPU_ELAN_XTAL opt_cpu.h
-CPU_ELAN_PPS opt_cpu.h
-CPU_ENABLE_SSE opt_cpu.h
-CPU_FASTER_5X86_FPU opt_cpu.h
-CPU_GEODE opt_cpu.h
-CPU_I486_ON_386 opt_cpu.h
-CPU_IORT opt_cpu.h
-CPU_L2_LATENCY opt_cpu.h
-CPU_LOOP_EN opt_cpu.h
-CPU_PPRO2CELERON opt_cpu.h
-CPU_RSTK_EN opt_cpu.h
-CPU_SOEKRIS opt_cpu.h
-CPU_SUSP_HLT opt_cpu.h
-CPU_UPGRADE_HW_CACHE opt_cpu.h
-CPU_WT_ALLOC opt_cpu.h
-CYRIX_CACHE_REALLY_WORKS opt_cpu.h
-CYRIX_CACHE_WORKS opt_cpu.h
-NO_F00F_HACK opt_cpu.h
-NO_MEMORY_HOLE opt_cpu.h
-
-# The CPU type affects the endian conversion functions all over the kernel.
-I386_CPU opt_global.h
-I486_CPU opt_global.h
-I586_CPU opt_global.h
-I686_CPU opt_global.h
-
-VGA_ALT_SEQACCESS opt_vga.h
-VGA_DEBUG opt_vga.h
-VGA_NO_FONT_LOADING opt_vga.h
-VGA_NO_MODE_CHANGE opt_vga.h
-VGA_SLOW_IOACCESS opt_vga.h
-VGA_WIDTH90 opt_vga.h
-
-VESA
-VESA_DEBUG opt_vesa.h
-
-PSM_HOOKRESUME opt_psm.h
-PSM_RESETAFTERSUSPEND opt_psm.h
-PSM_DEBUG opt_psm.h
-
-ATKBD_DFLT_KEYMAP opt_atkbd.h
-
-# pcvt(4) has a bunch of options
-FAT_CURSOR opt_pcvt.h
-XSERVER opt_pcvt.h
-PCVT_24LINESDEF opt_pcvt.h
-PCVT_CTRL_ALT_DEL opt_pcvt.h
-PCVT_META_ESC opt_pcvt.h
-PCVT_NSCREENS opt_pcvt.h
-PCVT_PRETTYSCRNS opt_pcvt.h
-PCVT_SCANSET opt_pcvt.h
-PCVT_SCREENSAVER opt_pcvt.h
-PCVT_USEKBDSEC opt_pcvt.h
-PCVT_VT220KEYB opt_pcvt.h
-PCVT_GREENSAVER opt_pcvt.h
-
-# Video spigot
-SPIGOT_UNSECURE opt_spigot.h
-
-# Enables NETGRAPH support for Cronyx adapters
-NETGRAPH_CRONYX opt_ng_cronyx.h
-
-# -------------------------------
-# isdn4bsd: passive ISA cards
-# -------------------------------
-TEL_S0_8 opt_i4b.h
-TEL_S0_16 opt_i4b.h
-TEL_S0_16_3 opt_i4b.h
-AVM_A1 opt_i4b.h
-USR_STI opt_i4b.h
-ITKIX1 opt_i4b.h
-ELSA_PCC16 opt_i4b.h
-# -------------------------------
-# isdn4bsd: passive ISA PnP cards
-# -------------------------------
-CRTX_S0_P opt_i4b.h
-DRN_NGO opt_i4b.h
-TEL_S0_16_3_P opt_i4b.h
-SEDLBAUER opt_i4b.h
-DYNALINK opt_i4b.h
-ASUSCOM_IPAC opt_i4b.h
-ELSA_QS1ISA opt_i4b.h
-SIEMENS_ISURF2 opt_i4b.h
-EICON_DIVA opt_i4b.h
-COMPAQ_M610 opt_i4b.h
-# -------------------------------
-# isdn4bsd: passive PCI cards
-# -------------------------------
-ELSA_QS1PCI opt_i4b.h
-# -------------------------------
-# isdn4bsd: misc options
-# -------------------------------
-# temporary workaround for SMP machines
-I4B_SMP_WORKAROUND opt_i4b.h
-# enable VJ compression code for ipr i/f
-IPR_VJ opt_i4b.h
-IPR_LOG opt_i4b.h
-
-# Device options
-DEV_ACPI opt_acpi.h
-DEV_APIC opt_apic.h
-DEV_NPX opt_npx.h
-
-# -------------------------------
-# EOF
-# -------------------------------
diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/fbsdxensetup
--- a/freebsd-5.3-xen-sparse/fbsdxensetup Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,44 +0,0 @@
-#!/bin/csh -f
-
-setenv XENROOT `pwd`
-cd $XENROOT
-if ( ! -d freebsd-5.3-xen-sparse ) then
- echo "Please run this script from the root of the Xen source tree"
- exit 1
-endif
-rm -rf $XENROOT/fbsdtmp $XENROOT/freebsd-5.3-xenU
-mkdir -p $XENROOT/fbsdtmp
-cd $XENROOT/fbsdtmp
-echo "step 1"
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.aa
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ab
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ac
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ad
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ae
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.af
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ag
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ah
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ai
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.aj
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ak
-wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.al
-mkdir -p foo
-cat ssys.?? | tar --unlink -xpzf - -C foo/
-mkdir -p $XENROOT/freebsd-5.3-xenU
-mv foo/sys/* $XENROOT/freebsd-5.3-xenU
-cd $XENROOT
-rm -rf $XENROOT/fbsdtmp
-echo "step 2"
-mkdir -p $XENROOT/freebsd-5.3-xenU/i386-xen/include
-cd $XENROOT/freebsd-5.3-xenU/i386-xen/include/
-foreach file (../../i386/include/*)
- ln -s $file
-end
-echo "step 3"
-cd $XENROOT/freebsd-5.3-xen-sparse
-echo "step 4"
-./mkbuildtree ../freebsd-5.3-xenU
-echo "step 5"
-cd $XENROOT/freebsd-5.3-xenU/i386-xen/include
-ln -s $XENROOT/xen/include/public xen-public
-echo "done"
diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/Makefile
--- a/freebsd-5.3-xen-sparse/i386-xen/Makefile Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,40 +0,0 @@
-# $FreeBSD: src/sys/i386/Makefile,v 1.11 2002/06/21 06:18:02 mckusick Exp $
-# @(#)Makefile 8.1 (Berkeley) 6/11/93
-
-# Makefile for i386 links, tags file
-
-# SYS is normally set in Make.tags.inc
-# SYS=/sys
-SYS=/nsys
-
-TAGDIR= i386
-
-.include "../kern/Make.tags.inc"
-
-all:
- @echo "make links or tags only"
-
-# Directories in which to place i386 tags links
-DI386= apm i386 ibcs2 include isa linux
-
-links::
- -for i in ${COMMDIR1}; do \
- (cd $$i && { rm -f tags; ln -s ../${TAGDIR}/tags tags; }) done
- -for i in ${COMMDIR2}; do \
- (cd $$i && { rm -f tags; ln -s ../../${TAGDIR}/tags tags; }) done
- -for i in ${DI386}; do \
- (cd $$i && { rm -f tags; ln -s ../tags tags; }) done
-
-SI386= ${SYS}/i386/apm/*.[ch] \
- ${SYS}/i386/i386/*.[ch] ${SYS}/i386/ibcs2/*.[ch] \
- ${SYS}/i386/include/*.[ch] ${SYS}/i386/isa/*.[ch] \
- ${SYS}/i386/linux/*.[ch]
-AI386= ${SYS}/i386/i386/*.s
-
-tags::
- -ctags -wdt ${COMM} ${SI386}
- egrep "^ENTRY(.*)|^ALTENTRY(.*)" ${AI386} | \
- sed "s;\([^:]*\):\([^(]*\)(\([^, )]*\)\(.*\);\3 \1 /^\2(\3\4$$/;" \
- >> tags
- sort -o tags tags
- chmod 444 tags
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC
--- a/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,273 +0,0 @@
-#
-# GENERIC -- Generic kernel configuration file for FreeBSD/i386
-#
-# For more information on this file, please read the handbook section on
-# Kernel Configuration Files:
-#
-#
http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
-#
-# The handbook is also available locally in /usr/share/doc/handbook
-# if you've installed the doc distribution, otherwise always see the
-# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
-# latest information.
-#
-# An exhaustive list of options and more detailed explanations of the
-# device lines is also present in the ../../conf/NOTES and NOTES files.
-# If you are in doubt as to the purpose or necessity of a line, check first
-# in NOTES.
-#
-# $FreeBSD: src/sys/i386/conf/GENERIC,v 1.394.2.3 2004/01/26 19:42:11 nectar
Exp $
-
-machine i386
-cpu I486_CPU
-cpu I586_CPU
-cpu I686_CPU
-ident GENERIC
-
-#To statically compile in device wiring instead of /boot/device.hints
-#hints "GENERIC.hints" #Default places to look for devices.
-
-#makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols
-
-options SCHED_4BSD #4BSD scheduler
-options INET #InterNETworking
-options INET6 #IPv6 communications protocols
-options FFS #Berkeley Fast Filesystem
-options SOFTUPDATES #Enable FFS soft updates support
-options UFS_ACL #Support for access control lists
-options UFS_DIRHASH #Improve performance on big directories
-options MD_ROOT #MD is a potential root device
-options NFSCLIENT #Network Filesystem Client
-options NFSSERVER #Network Filesystem Server
-options NFS_ROOT #NFS usable as /, requires NFSCLIENT
-options MSDOSFS #MSDOS Filesystem
-options CD9660 #ISO 9660 Filesystem
-options PROCFS #Process filesystem (requires PSEUDOFS)
-options PSEUDOFS #Pseudo-filesystem framework
-options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!]
-options COMPAT_FREEBSD4 #Compatible with FreeBSD4
-options SCSI_DELAY=15000 #Delay (in ms) before probing SCSI
-options KTRACE #ktrace(1) support
-options SYSVSHM #SYSV-style shared memory
-options SYSVMSG #SYSV-style message queues
-options SYSVSEM #SYSV-style semaphores
-options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions
-options KBD_INSTALL_CDEV # install a CDEV entry in /dev
-options AHC_REG_PRETTY_PRINT # Print register bitfields in debug
- # output. Adds ~128k to driver.
-options AHD_REG_PRETTY_PRINT # Print register bitfields in debug
- # output. Adds ~215k to driver.
-options PFIL_HOOKS # pfil(9) framework
-
-# Debugging for use in -current
-#options DDB #Enable the kernel debugger
-#options INVARIANTS #Enable calls of extra sanity checking
-options INVARIANT_SUPPORT #Extra sanity checks of internal
structures, required by INVARIANTS
-#options WITNESS #Enable checks to detect deadlocks and
cycles
-#options WITNESS_SKIPSPIN #Don't run witness on spinlocks for
speed
-
-# To make an SMP kernel, the next two are needed
-options SMP # Symmetric MultiProcessor Kernel
-device apic # I/O APIC
-
-device isa
-device eisa
-device pci
-
-# Floppy drives
-device fdc
-
-# ATA and ATAPI devices
-device ata
-device atadisk # ATA disk drives
-device ataraid # ATA RAID drives
-device atapicd # ATAPI CDROM drives
-device atapifd # ATAPI floppy drives
-device atapist # ATAPI tape drives
-options ATA_STATIC_ID #Static device numbering
-
-# SCSI Controllers
-device ahb # EISA AHA1742 family
-device ahc # AHA2940 and onboard AIC7xxx devices
-device ahd # AHA39320/29320 and onboard AIC79xx devices
-device amd # AMD 53C974 (Tekram DC-390(T))
-device isp # Qlogic family
-device mpt # LSI-Logic MPT-Fusion
-#device ncr # NCR/Symbios Logic
-device sym # NCR/Symbios Logic (newer chipsets + those of
`ncr')
-device trm # Tekram DC395U/UW/F DC315U adapters
-
-device adv # Advansys SCSI adapters
-device adw # Advansys wide SCSI adapters
-device aha # Adaptec 154x SCSI adapters
-device aic # Adaptec 15[012]x SCSI adapters, AIC-6[23]60.
-device bt # Buslogic/Mylex MultiMaster SCSI adapters
-
-device ncv # NCR 53C500
-device nsp # Workbit Ninja SCSI-3
-device stg # TMC 18C30/18C50
-
-# SCSI peripherals
-device scbus # SCSI bus (required for SCSI)
-device ch # SCSI media changers
-device da # Direct Access (disks)
-device sa # Sequential Access (tape etc)
-device cd # CD
-device pass # Passthrough device (direct SCSI access)
-device ses # SCSI Environmental Services (and SAF-TE)
-
-# RAID controllers interfaced to the SCSI subsystem
-device amr # AMI MegaRAID
-device asr # DPT SmartRAID V, VI and Adaptec SCSI RAID
-device ciss # Compaq Smart RAID 5*
-device dpt # DPT Smartcache III, IV - See NOTES for options
-device iir # Intel Integrated RAID
-device ips # IBM (Adaptec) ServeRAID
-device mly # Mylex AcceleRAID/eXtremeRAID
-
-# RAID controllers
-device aac # Adaptec FSA RAID
-device aacp # SCSI passthrough for aac (requires CAM)
-device ida # Compaq Smart RAID
-device mlx # Mylex DAC960 family
-device pst # Promise Supertrak SX6000
-device twe # 3ware ATA RAID
-
-# atkbdc0 controls both the keyboard and the PS/2 mouse
-device atkbdc # AT keyboard controller
-device atkbd # AT keyboard
-device psm # PS/2 mouse
-
-device vga # VGA video card driver
-
-device splash # Splash screen and screen saver support
-
-# syscons is the default console driver, resembling an SCO console
-device sc
-
-# Enable this for the pcvt (VT220 compatible) console driver
-#device vt
-#options XSERVER # support for X server on a vt console
-#options FAT_CURSOR # start with block cursor
-
-device agp # support several AGP chipsets
-
-# Floating point support - do not disable.
-device npx
-
-# Power management support (see NOTES for more options)
-#device apm
-# Add suspend/resume support for the i8254.
-device pmtimer
-
-# PCCARD (PCMCIA) support
-# Pcmcia and cardbus bridge support
-device cbb # cardbus (yenta) bridge
-#device pcic # ExCA ISA and PCI bridges
-device pccard # PC Card (16-bit) bus
-device cardbus # CardBus (32-bit) bus
-
-# Serial (COM) ports
-device sio # 8250, 16[45]50 based serial ports
-
-# Parallel port
-device ppc
-device ppbus # Parallel port bus (required)
-device lpt # Printer
-device plip # TCP/IP over parallel
-device ppi # Parallel port interface device
-#device vpo # Requires scbus and da
-
-# If you've got a "dumb" serial or parallel PCI card that is
-# supported by the puc(4) glue driver, uncomment the following
-# line to enable it (connects to the sio and/or ppc drivers):
-#device puc
-
-# PCI Ethernet NICs.
-device de # DEC/Intel DC21x4x (``Tulip'')
-device em # Intel PRO/1000 adapter Gigabit Ethernet Card
-device txp # 3Com 3cR990 (``Typhoon'')
-device vx # 3Com 3c590, 3c595 (``Vortex'')
-
-# PCI Ethernet NICs that use the common MII bus controller code.
-# NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
-device miibus # MII bus support
-device bfe # Broadcom BCM440x 10/100 ethernet
-device bge # Broadcom BCM570xx Gigabit Ethernet
-device dc # DEC/Intel 21143 and various workalikes
-device fxp # Intel EtherExpress PRO/100B (82557, 82558)
-device pcn # AMD Am79C97x PCI 10/100 (precedence over
'lnc')
-device re # RealTek 8139C+/8169/8169S/8110S
-device rl # RealTek 8129/8139
-device sf # Adaptec AIC-6915 (``Starfire'')
-device sis # Silicon Integrated Systems SiS 900/SiS 7016
-device sk # SysKonnect SK-984x and SK-982x gigabit
ethernet
-device ste # Sundance ST201 (D-Link DFE-550TX)
-device ti # Alteon Networks Tigon I/II gigabit ethernet
-device tl # Texas Instruments ThunderLAN
-device tx # SMC EtherPower II (83c170 ``EPIC'')
-device vr # VIA Rhine, Rhine II
-device wb # Winbond W89C840F
-device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'')
-
-# ISA Ethernet NICs. pccard nics included.
-device cs # Crystal Semiconductor CS89x0 NIC
-# 'device ed' requires 'device miibus'
-device ed # NE[12]000, SMC Ultra, 3c503, DS8390 cards
-device ex # Intel EtherExpress Pro/10 and Pro/10+
-device ep # Etherlink III based cards
-device fe # Fujitsu MB8696x based cards
-device ie # EtherExpress 8/16, 3C507, StarLAN 10 etc.
-device lnc # NE2100, NE32-VL Lance Ethernet cards
-device sn # SMC's 9000 series of ethernet chips
-device xe # Xircom pccard ethernet
-
-# ISA devices that use the old ISA shims
-#device le
-
-# Wireless NIC cards
-device wlan # 802.11 support
-device an # Aironet 4500/4800 802.11 wireless NICs.
-device awi # BayStack 660 and others
-device wi # WaveLAN/Intersil/Symbol 802.11 wireless NICs.
-#device wl # Older non 802.11 Wavelan wireless NIC.
-
-# Pseudo devices - the number indicates how many units to allocate.
-device random # Entropy device
-device loop # Network loopback
-device ether # Ethernet support
-device sl # Kernel SLIP
-device ppp # Kernel PPP
-device tun # Packet tunnel.
-device pty # Pseudo-ttys (telnet etc)
-device md # Memory "disks"
-device gif # IPv6 and IPv4 tunneling
-device faith # IPv6-to-IPv4 relaying (translation)
-
-# The `bpf' device enables the Berkeley Packet Filter.
-# Be aware of the administrative consequences of enabling this!
-device bpf # Berkeley packet filter
-
-# USB support
-device uhci # UHCI PCI->USB interface
-device ohci # OHCI PCI->USB interface
-device usb # USB Bus (required)
-#device udbp # USB Double Bulk Pipe devices
-device ugen # Generic
-device uhid # "Human Interface Devices"
-device ukbd # Keyboard
-device ulpt # Printer
-device umass # Disks/Mass storage - Requires scbus and da
-device ums # Mouse
-device urio # Diamond Rio 500 MP3 player
-device uscanner # Scanners
-# USB Ethernet, requires mii
-device aue # ADMtek USB ethernet
-device axe # ASIX Electronics USB ethernet
-device cue # CATC USB ethernet
-device kue # Kawasaki LSI USB ethernet
-
-# FireWire support
-device firewire # FireWire bus code
-device sbp # SCSI over FireWire (Requires scbus and da)
-device fwe # Ethernet over FireWire (non-standard!)
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC.hints
--- a/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC.hints Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,93 +0,0 @@
-# $FreeBSD: src/sys/i386/conf/GENERIC.hints,v 1.11 2002/12/05 22:49:47 jhb Exp
$
-hint.fdc.0.at="isa"
-hint.fdc.0.port="0x3F0"
-hint.fdc.0.irq="6"
-hint.fdc.0.drq="2"
-hint.fd.0.at="fdc0"
-hint.fd.0.drive="0"
-hint.fd.1.at="fdc0"
-hint.fd.1.drive="1"
-hint.ata.0.at="isa"
-hint.ata.0.port="0x1F0"
-hint.ata.0.irq="14"
-hint.ata.1.at="isa"
-hint.ata.1.port="0x170"
-hint.ata.1.irq="15"
-hint.adv.0.at="isa"
-hint.adv.0.disabled="1"
-hint.bt.0.at="isa"
-hint.bt.0.disabled="1"
-hint.aha.0.at="isa"
-hint.aha.0.disabled="1"
-hint.aic.0.at="isa"
-hint.aic.0.disabled="1"
-hint.atkbdc.0.at="isa"
-hint.atkbdc.0.port="0x060"
-hint.atkbd.0.at="atkbdc"
-hint.atkbd.0.irq="1"
-hint.atkbd.0.flags="0x1"
-hint.psm.0.at="atkbdc"
-hint.psm.0.irq="12"
-hint.vga.0.at="isa"
-hint.sc.0.at="isa"
-hint.sc.0.flags="0x100"
-hint.vt.0.at="isa"
-hint.vt.0.disabled="1"
-hint.apm.0.disabled="1"
-hint.apm.0.flags="0x20"
-hint.pcic.0.at="isa"
-# hint.pcic.0.irq="10" # Default to polling
-hint.pcic.0.port="0x3e0"
-hint.pcic.0.maddr="0xd0000"
-hint.pcic.1.at="isa"
-hint.pcic.1.irq="11"
-hint.pcic.1.port="0x3e2"
-hint.pcic.1.maddr="0xd4000"
-hint.pcic.1.disabled="1"
-hint.sio.0.at="isa"
-hint.sio.0.port="0x3F8"
-hint.sio.0.flags="0x10"
-hint.sio.0.irq="4"
-hint.sio.1.at="isa"
-hint.sio.1.port="0x2F8"
-hint.sio.1.irq="3"
-hint.sio.2.at="isa"
-hint.sio.2.disabled="1"
-hint.sio.2.port="0x3E8"
-hint.sio.2.irq="5"
-hint.sio.3.at="isa"
-hint.sio.3.disabled="1"
-hint.sio.3.port="0x2E8"
-hint.sio.3.irq="9"
-hint.ppc.0.at="isa"
-hint.ppc.0.irq="7"
-hint.ed.0.at="isa"
-hint.ed.0.disabled="1"
-hint.ed.0.port="0x280"
-hint.ed.0.irq="10"
-hint.ed.0.maddr="0xd8000"
-hint.cs.0.at="isa"
-hint.cs.0.disabled="1"
-hint.cs.0.port="0x300"
-hint.sn.0.at="isa"
-hint.sn.0.disabled="1"
-hint.sn.0.port="0x300"
-hint.sn.0.irq="10"
-hint.ie.0.at="isa"
-hint.ie.0.disabled="1"
-hint.ie.0.port="0x300"
-hint.ie.0.irq="10"
-hint.ie.0.maddr="0xd0000"
-hint.fe.0.at="isa"
-hint.fe.0.disabled="1"
-hint.fe.0.port="0x300"
-hint.le.0.at="isa"
-hint.le.0.disabled="1"
-hint.le.0.port="0x300"
-hint.le.0.irq="5"
-hint.le.0.maddr="0xd0000"
-hint.lnc.0.at="isa"
-hint.lnc.0.disabled="1"
-hint.lnc.0.port="0x280"
-hint.lnc.0.irq="10"
-hint.lnc.0.drq="0"
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/conf/Makefile
--- a/freebsd-5.3-xen-sparse/i386-xen/conf/Makefile Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,3 +0,0 @@
-# $FreeBSD: src/sys/i386/conf/Makefile,v 1.9 2003/02/26 23:36:58 ru Exp $
-
-.include "${.CURDIR}/../../conf/makeLINT.mk"
diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/conf/NOTES
--- a/freebsd-5.3-xen-sparse/i386-xen/conf/NOTES Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,1115 +0,0 @@
-#
-# NOTES -- Lines that can be cut/pasted into kernel and hints configs.
-#
-# This file contains machine dependent kernel configuration notes. For
-# machine independent notes, look in /sys/conf/NOTES.
-#
-# $FreeBSD: src/sys/i386/conf/NOTES,v 1.1108 2003/12/04 19:57:56 phk Exp $
-#
-
-#
-# This directive is mandatory; it defines the architecture to be
-# configured for; in this case, the 386 family based IBM-PC and
-# compatibles.
-#
-machine i386
-
-#
-# We want LINT to cover profiling as well
-profile 2
-
-
-#####################################################################
-# SMP OPTIONS:
-#
-# The apic device enables the use of the I/O APIC for interrupt delivery.
-# The apic device can be used in both UP and SMP kernels, but is required
-# for SMP kernels. Thus, the apic device is not strictly an SMP option,
-# but it is a prerequisite for SMP.
-#
-# Notes:
-#
-# Be sure to disable 'cpu I386_CPU' for SMP kernels.
-#
-# By default, mixed mode is used to route IRQ0 from the AT timer via
-# the 8259A master PIC through the ExtINT pin on the first I/O APIC.
-# This can be disabled via the NO_MIXED_MODE option. In that case,
-# IRQ0 will be routed via an intpin on the first I/O APIC. Not all
-# motherboards hook IRQ0 up to the first I/O APIC even though their
-# MP table or MADT may claim to do so. That is why mixed mode is
-# enabled by default.
-#
-# HTT CPUs should only be used if they are enabled in the BIOS. For
-# the ACPI case, ACPI only correctly tells us about any HTT CPUs if
-# they are enabled. However, most HTT systems do not list HTT CPUs
-# in the MP Table if they are enabled, thus we guess at the HTT CPUs
-# for the MP Table case. However, we shouldn't try to guess and use
-# these CPUs if HTTT is disabled. Thus, HTT guessing is only enabled
-# for the MP Table if the user explicitly asks for it via the
-# MPTABLE_FORCE_HTT option. Do NOT use this option if you have HTT
-# disabled in your BIOS.
-#
-
-# Mandatory:
-device apic # I/O apic
-
-# Optional:
-options MPTABLE_FORCE_HTT # Enable HTT CPUs with the MP
Table
-options NO_MIXED_MODE # Disable use of mixed mode
-
-
-#####################################################################
-# CPU OPTIONS
-
-#
-# You must specify at least one CPU (the one you intend to run on);
-# deleting the specification for CPUs you don't need to use may make
-# parts of the system run faster.
-# I386_CPU is mutually exclusive with the other CPU types.
-#
-#cpu I386_CPU
-cpu I486_CPU
-cpu I586_CPU # aka Pentium(tm)
-cpu I686_CPU # aka Pentium Pro(tm)
-
-#
-# Options for CPU features.
-#
-# CPU_ATHLON_SSE_HACK tries to enable SSE instructions when the BIOS has
-# forgotten to enable them.
-#
-# CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM
-# BlueLightning CPU. It works only with Cyrix FPU, and this option
-# should not be used with Intel FPU.
-#
-# CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning
-# CPU if CPU supports it. The default is double-clock mode on
-# BlueLightning CPU box.
-#
-# CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1).
-#
-# CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct
-# mapped mode. Default is 2-way set associative mode.
-#
-# CPU_CYRIX_NO_LOCK enables weak locking for the entire address space
-# of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1.
-# Otherwise, the NO_LOCK bit of CCR1 is cleared. (NOTE 3)
-#
-# CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables
-# reorder). This option should not be used if you use memory mapped
-# I/O device(s).
-#
-# CPU_ELAN enables support for AMDs ElanSC520 CPU.
-# CPU_ELAN_XTAL sets the clock crystal frequency in Hz
-# CPU_ELAN_PPS enables precision timestamp code.
-#
-# CPU_SOEKRIS enables support www.soekris.com hardware.
-#
-# CPU_ENABLE_SSE enables SSE/MMX2 instructions support. This is default
-# on I686_CPU and above.
-# CPU_DISABLE_SSE explicitly prevent I686_CPU from turning on SSE.
-#
-# CPU_FASTER_5X86_FPU enables faster FPU exception handler.
-#
-# CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
-# for i386 machines.
-#
-# CPU_IORT defines I/O clock delay time (NOTE 1). Default values of
-# I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively
-# (no clock delay).
-#
-# CPU_L2_LATENCY specifed the L2 cache latency value. This option is used
-# only when CPU_PPRO2CELERON is defined and Mendocino Celeron is detected.
-# The default value is 5.
-#
-# CPU_LOOP_EN prevents flushing the prefetch buffer if the destination
-# of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE
-# 1).
-#
-# CPU_PPRO2CELERON enables L2 cache of Mendocino Celeron CPUs. This option
-# is useful when you use Socket 8 to Socket 370 converter, because most Pentium
-# Pro BIOSs do not enable L2 cache of Mendocino Celeron CPUs.
-#
-# CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1).
-#
-# CPU_SUSP_HLT enables suspend on HALT. If this option is set, CPU
-# enters suspend mode following execution of HALT instruction.
-#
-# CPU_UPGRADE_HW_CACHE eliminates unneeded cache flush instruction(s).
-#
-# CPU_WT_ALLOC enables write allocation on Cyrix 6x86/6x86MX and AMD
-# K5/K6/K6-2 cpus.
-#
-# CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache
-# flush at hold state.
-#
-# CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs
-# without cache flush at hold state, and (2) write-back CPU cache on
-# Cyrix 6x86 whose revision < 2.7 (NOTE 2).
-#
-# NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY
-# Pentiums) from locking up when a LOCK CMPXCHG8B instruction is
-# executed. This option is only needed if I586_CPU is also defined,
-# and should be included for any non-Pentium CPU that defines it.
-#
-# NO_MEMORY_HOLE is an optimisation for systems with AMD K6 processors
-# which indicates that the 15-16MB range is *definitely* not being
-# occupied by an ISA memory hole.
-#
-# CPU_DISABLE_CMPXCHG disables the CMPXCHG instruction on > i386 IA32
-# machines. VmWare seems to emulate this instruction poorly, causing
-# the guest OS to run very slowly. Enabling this with a SMP kernel
-# will cause the kernel to be unusable.
-#
-# NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT,
-# CPU_LOOP_EN and CPU_RSTK_EN should not be used because of CPU bugs.
-# These options may crash your system.
-#
-# NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled
-# in write-through mode when revision < 2.7. If revision of Cyrix
-# 6x86 >= 2.7, CPU cache is always enabled in write-back mode.
-#
-# NOTE 3: This option may cause failures for software that requires
-# locked cycles in order to operate correctly.
-#
-options CPU_ATHLON_SSE_HACK
-options CPU_BLUELIGHTNING_FPU_OP_CACHE
-options CPU_BLUELIGHTNING_3X
-options CPU_BTB_EN
-options CPU_DIRECT_MAPPED_CACHE
-options CPU_DISABLE_5X86_LSSER
-options CPU_ELAN
-options CPU_SOEKRIS
-options CPU_ELAN_XTAL=32768000
-options CPU_ELAN_PPS
-options CPU_ENABLE_SSE
-#options CPU_DISABLE_SSE
-options CPU_FASTER_5X86_FPU
-options CPU_I486_ON_386
-options CPU_IORT
-options CPU_L2_LATENCY=5
-options CPU_LOOP_EN
-options CPU_PPRO2CELERON
-options CPU_RSTK_EN
-options CPU_SUSP_HLT
-options CPU_UPGRADE_HW_CACHE
-options CPU_WT_ALLOC
-options CYRIX_CACHE_WORKS
-options CYRIX_CACHE_REALLY_WORKS
-#options NO_F00F_HACK
-options CPU_DISABLE_CMPXCHG
-
-# Debug options
-options NPX_DEBUG # enable npx debugging (FPU/math emu)
- #new math emulator
-
-#
-# PERFMON causes the driver for Pentium/Pentium Pro performance counters
-# to be compiled. See perfmon(4) for more information.
-#
-options PERFMON
-
-
-#####################################################################
-# NETWORKING OPTIONS
-
-#
-# DEVICE_POLLING adds support for mixed interrupt-polling handling
-# of network device drivers, which has significant benefits in terms
-# of robustness to overloads and responsivity, as well as permitting
-# accurate scheduling of the CPU time between kernel network processing
-# and other activities. The drawback is a moderate (up to 1/HZ seconds)
-# potential increase in response times.
-# It is strongly recommended to use HZ=1000 or 2000 with DEVICE_POLLING
-# to achieve smoother behaviour.
-# Additionally, you can enable/disable polling at runtime with the
-# sysctl variable kern.polling.enable (defaults off), and select
-# the CPU fraction reserved to userland with the sysctl variable
-# kern.polling.user_frac (default 50, range 0..100).
-#
-# Only the "dc" "fxp" and "sis" devices support this mode of operation at
-# the time of this writing.
-
-options DEVICE_POLLING
-
-
-#####################################################################
-# CLOCK OPTIONS
-
-# The following options are used for debugging clock behavior only, and
-# should not be used for production systems.
-#
-# CLK_CALIBRATION_LOOP will run the clock calibration loop at startup
-# until the user presses a key.
-
-options CLK_CALIBRATION_LOOP
-
-# The following two options measure the frequency of the corresponding
-# clock relative to the RTC (onboard mc146818a).
-
-options CLK_USE_I8254_CALIBRATION
-options CLK_USE_TSC_CALIBRATION
-
-
-#####################################################################
-# MISCELLANEOUS DEVICES AND OPTIONS
-
-device speaker #Play IBM BASIC-style noises out your speaker
-hint.speaker.0.at="isa"
-hint.speaker.0.port="0x61"
-device gzip #Exec gzipped a.out's. REQUIRES COMPAT_AOUT!
-device apm_saver # Requires APM
-
-
-#####################################################################
-# HARDWARE BUS CONFIGURATION
-
-#
-# ISA bus
-#
-device isa
-
-#
-# Options for `isa':
-#
-# AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A
-# interrupt controller. This saves about 0.7-1.25 usec for each interrupt.
-# This option breaks suspend/resume on some portables.
-#
-# AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A
-# interrupt controller. This saves about 0.7-1.25 usec for each interrupt.
-# Automatic EOI is documented not to work for for the slave with the
-# original i8259A, but it works for some clones and some integrated
-# versions.
-#
-# MAXMEM specifies the amount of RAM on the machine; if this is not
-# specified, FreeBSD will first read the amount of memory from the CMOS
-# RAM, so the amount of memory will initially be limited to 64MB or 16MB
-# depending on the BIOS. If the BIOS reports 64MB, a memory probe will
-# then attempt to detect the installed amount of RAM. If this probe
-# fails to detect >64MB RAM you will have to use the MAXMEM option.
-# The amount is in kilobytes, so for a machine with 128MB of RAM, it would
-# be 131072 (128 * 1024).
-#
-# BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to
-# reset the CPU for reboot. This is needed on some systems with broken
-# keyboard controllers.
-
-options COMPAT_OLDISA #Use ISA shims and glue for old drivers
-options AUTO_EOI_1
-#options AUTO_EOI_2
-
-options MAXMEM=(128*1024)
-#options BROKEN_KEYBOARD_RESET
-
-#
-# EISA bus
-#
-# The EISA bus device is `eisa'. It provides auto-detection and
-# configuration support for all devices on the EISA bus.
-
-device eisa
-
-# By default, only 10 EISA slots are probed, since the slot numbers
-# above clash with the configuration address space of the PCI subsystem,
-# and the EISA probe is not very smart about this. This is sufficient
-# for most machines, but in particular the HP NetServer LC series comes
-# with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11,
-# thus you need to bump this figure to 12 for them.
-options EISA_SLOTS=12
-
-#
-# MCA bus:
-#
-# The MCA bus device is `mca'. It provides auto-detection and
-# configuration support for all devices on the MCA bus.
-# No hints are required for MCA.
-
-device mca
-
-#
-# PCI bus & PCI options:
-#
-device pci
-
-#
-# AGP GART support
-device agp
-
-
-#####################################################################
-# HARDWARE DEVICE CONFIGURATION
-
-#
-# Mandatory devices:
-#
-
-# To include support for VGA VESA video modes
-options VESA
-
-# Turn on extra debugging checks and output for VESA support.
-options VESA_DEBUG
-
-# The pcvt console driver (vt220 compatible).
-device vt
-hint.vt.0.at="isa"
-options XSERVER # support for running an X server on vt
-options FAT_CURSOR # start with block cursor
-# This PCVT option is for keyboards such as those used on really old ThinkPads
-options PCVT_SCANSET=2
-# Other PCVT options are documented in pcvt(4).
-options PCVT_24LINESDEF
-options PCVT_CTRL_ALT_DEL
-options PCVT_META_ESC
-options PCVT_NSCREENS=9
-options PCVT_PRETTYSCRNS
-options PCVT_SCREENSAVER
-options PCVT_USEKBDSEC
-options PCVT_VT220KEYB
-options PCVT_GREENSAVER
-
-#
-# The Numeric Processing eXtension driver. In addition to this, you
-# may configure a math emulator (see above). If your machine has a
-# hardware FPU and the kernel configuration includes the npx device
-# *and* a math emulator compiled into the kernel, the hardware FPU
-# will be used, unless it is found to be broken or unless "flags" to
-# npx0 includes "0x08", which requests preference for the emulator.
-device npx
-hint.npx.0.flags="0x0"
-hint.npx.0.irq="13"
-
-#
-# `flags' for npx0:
-# 0x01 don't use the npx registers to optimize bcopy.
-# 0x02 don't use the npx registers to optimize bzero.
-# 0x04 don't use the npx registers to optimize copyin or copyout.
-# 0x08 use emulator even if hardware FPU is available.
-# The npx registers are normally used to optimize copying and zeroing when
-# all of the following conditions are satisfied:
-# I586_CPU is an option
-# the cpu is an i586 (perhaps not a Pentium)
-# the probe for npx0 succeeds
-# INT 16 exception handling works.
-# Then copying and zeroing using the npx registers is normally 30-100% faster.
-# The flags can be used to control cases where it doesn't work or is slower.
-# Setting them at boot time using userconfig works right (the optimizations
-# are not used until later in the bootstrap when npx0 is attached).
-# Flag 0x08 automatically disables the i586 optimized routines.
-#
-
-#
-# Optional devices:
-#
-
-# 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support. This will create
-# the /dev/3dfx0 device to work with glide implementations. This should get
-# linked to /dev/3dfx and /dev/voodoo. Note that this is not the same as
-# the tdfx DRI module from XFree86 and is completely unrelated.
-#
-# To enable Linuxulator support, one must also include COMPAT_LINUX in the
-# config as well, or you will not have the dependencies. The other option
-# is to load both as modules.
-
-device tdfx # Enable 3Dfx Voodoo support
-options TDFX_LINUX # Enable Linuxulator support
-
-#
-# ACPI support using the Intel ACPI Component Architecture reference
-# implementation.
-#
-# ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer
-# kernel environment variables to select initial debugging levels for the
-# Intel ACPICA code. (Note that the Intel code must also have USE_DEBUGGER
-# defined when it is built).
-#
-# ACPI_MAX_THREADS sets the number of task threads started.
-#
-# ACPI_NO_SEMAPHORES makes the AcpiOs*Semaphore routines a no-op.
-#
-# ACPICA_PEDANTIC enables strict checking of AML. Our default is to
-# relax these checks to allow code generated by the Microsoft compiler
-# to still execute.
-#
-# Note that building ACPI into the kernel is deprecated; the module is
-# normally loaded automatically by the loader.
-#
-device acpi
-options ACPI_DEBUG
-options ACPI_MAX_THREADS=1
-#!options ACPI_NO_SEMAPHORES
-#!options ACPICA_PEDANTIC
-
-# DRM options:
-# mgadrm: AGP Matrox G200, G400, G450, G550
-# r128drm: ATI Rage 128
-# radeondrm: ATI Radeon up to 9000/9100
-# sisdrm: SiS 300/305,540,630
-# tdfxdrm: 3dfx Voodoo 3/4/5 and Banshee
-# DRM_DEBUG: include debug printfs, very slow
-#
-# mga requires AGP in the kernel, and it is recommended
-# for AGP r128 and radeon cards.
-
-device mgadrm
-device "r128drm"
-device radeondrm
-device sisdrm
-device tdfxdrm
-
-options DRM_DEBUG
-
-# M-systems DiskOnchip products see src/sys/contrib/dev/fla/README
-device fla
-hint.fla.0.at="isa"
-
-#
-# mse: Logitech and ATI InPort bus mouse ports
-
-device mse
-hint.mse.0.at="isa"
-hint.mse.0.port="0x23c"
-hint.mse.0.irq="5"
-
-#
-# Network interfaces:
-#
-
-# ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver
-# (requires sppp)
-# ath: Atheros a/b/g WiFi adapters (requires ath_hal and wlan)
-# cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing)
-# ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503
-# HP PC Lan+, various PC Card devices (refer to etc/defauls/pccard.conf)
-# (requires miibus)
-# el: 3Com 3C501 (slow!)
-# ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210;
-# Intel EtherExpress
-# le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100,
-# DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422)
-# lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL, AMD Am7990 and
-# Am79C960)
-# oltr: Olicom ISA token-ring adapters OC-3115, OC-3117, OC-3118 and OC-3133
-# (no hints needed).
-# Olicom PCI token-ring adapters OC-3136, OC-3137, OC-3139, OC-3140,
-# OC-3141, OC-3540, OC-3250
-# rdp: RealTek RTL 8002-based pocket ethernet adapters
-# sbni: Granch SBNI12-xx ISA and PCI adapters
-# sr: RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp)
-# wl: Lucent Wavelan (ISA card only).
-
-# Order for ISA/EISA devices is important here
-
-device ar
-hint.ar.0.at="isa"
-hint.ar.0.port="0x300"
-hint.ar.0.irq="10"
-hint.ar.0.maddr="0xd0000"
-device cx
-hint.cx.0.at="isa"
-hint.cx.0.port="0x240"
-hint.cx.0.irq="15"
-hint.cx.0.drq="7"
-device ed
-#options ED_NO_MIIBUS # Disable ed miibus support
-hint.ed.0.at="isa"
-hint.ed.0.port="0x280"
-hint.ed.0.irq="5"
-hint.ed.0.maddr="0xd8000"
-device el 1
-hint.el.0.at="isa"
-hint.el.0.port="0x300"
-hint.el.0.irq="9"
-device ie # Hints only required for Starlan
-hint.ie.2.at="isa"
-hint.ie.2.port="0x300"
-hint.ie.2.irq="5"
-hint.ie.2.maddr="0xd0000"
-device le 1
-hint.le.0.at="isa"
-hint.le.0.port="0x300"
-hint.le.0.irq="5"
-hint.le.0.maddr="0xd0000"
-device lnc
-hint.lnc.0.at="isa"
-hint.lnc.0.port="0x280"
-hint.lnc.0.irq="10"
-hint.lnc.0.drq="0"
-device rdp 1
-hint.rdp.0.at="isa"
-hint.rdp.0.port="0x378"
-hint.rdp.0.irq="7"
-hint.rdp.0.flags="2"
-device sbni
-hint.sbni.0.at="isa"
-hint.sbni.0.port="0x210"
-hint.sbni.0.irq="0xefdead"
-hint.sbni.0.flags="0"
-device sr
-hint.sr.0.at="isa"
-hint.sr.0.port="0x300"
-hint.sr.0.irq="5"
-hint.sr.0.maddr="0xd0000"
-device oltr
-hint.oltr.0.at="isa"
-device wl
-hint.wl.0.at="isa"
-hint.wl.0.port="0x300"
-options WLCACHE # enables the signal-strength cache
-options WLDEBUG # enables verbose debugging output
-
-device ath
-device ath_hal # Atheros HAL (includes binary component)
-#device wlan # 802.11 layer
-
-#
-# ATA raid adapters
-#
-device pst
-
-#
-# SCSI host adapters:
-#
-# ncv: NCR 53C500 based SCSI host adapters.
-# nsp: Workbit Ninja SCSI-3 based PC Card SCSI host adapters.
-# stg: TMC 18C30, 18C50 based SCSI host adapters.
-
-device ncv
-device nsp
-device stg
-hint.stg.0.at="isa"
-hint.stg.0.port="0x140"
-hint.stg.0.port="11"
-
-#
-# Adaptec FSA RAID controllers, including integrated DELL controllers,
-# the Dell PERC 2/QC and the HP NetRAID-4M
-device aac
-device aacp # SCSI Passthrough interface (optional, CAM required)
-
-#
-# IBM (now Adaptec) ServeRAID controllers
-device ips
-
-#
-# SafeNet crypto driver: can be moved to the MI NOTES as soon as
-# it's tested on a big-endian machine
-#
-device safe # SafeNet 1141
-options SAFE_DEBUG # enable debugging support:
hw.safe.debug
-options SAFE_RNDTEST # enable rndtest support
-
-#####################################################################
-
-#
-# Miscellaneous hardware:
-#
-# wt: Wangtek and Archive QIC-02/QIC-36 tape drives
-# ctx: Cortex-I frame grabber
-# apm: Laptop Advanced Power Management (experimental)
-# pmtimer: Timer device driver for power management events (APM or ACPI)
-# spigot: The Creative Labs Video Spigot video-acquisition board
-# dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!)
-# digi: Digiboard driver
-# gp: National Instruments AT-GPIB and AT-GPIB/TNT board, PCMCIA-GPIB
-# asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey
-# gsc: Genius GS-4500 hand scanner.
-# spic: Sony Programmable I/O controller (VAIO notebooks)
-# stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based)
-# stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent)
-
-# Notes on APM
-# The flags takes the following meaning for apm0:
-# 0x0020 Statclock is broken.
-# If apm is omitted, some systems require sysctl kern.timecounter.method=1
-# for correct timekeeping.
-
-# Notes on the spigot:
-# The video spigot is at 0xad6. This port address can not be changed.
-# The irq values may only be 10, 11, or 15
-# I/O memory is an 8kb region. Possible values are:
-# 0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff
-# The start address must be on an even boundary.
-# Add the following option if you want to allow non-root users to be able
-# to access the spigot. This option is not secure because it allows users
-# direct access to the I/O page.
-# options SPIGOT_UNSECURE
-
-# Notes on the Specialix SI/XIO driver:
-# The host card is memory, not IO mapped.
-# The Rev 1 host cards use a 64K chunk, on a 32K boundary.
-# The Rev 2 host cards use a 32K chunk, on a 32K boundary.
-# The cards can use an IRQ of 11, 12 or 15.
-
-# Notes on the Sony Programmable I/O controller
-# This is a temporary driver that should someday be replaced by something
-# that hooks into the ACPI layer. The device is hooked to the PIIX4's
-# General Device 10 decoder, which means you have to fiddle with PCI
-# registers to map it in, even though it is otherwise treated here as
-# an ISA device. At the moment, the driver polls, although the device
-# is capable of generating interrupts. It largely undocumented.
-# The port location in the hint is where you WANT the device to be
-# mapped. 0x10a0 seems to be traditional. At the moment the jogdial
-# is the only thing truly supported, but aparently a fair percentage
-# of the Vaio extra features are controlled by this device.
-
-# Notes on the Stallion stl and stli drivers:
-# See src/i386/isa/README.stl for complete instructions.
-# This is version 0.0.5alpha, unsupported by Stallion.
-# The stl driver has a secondary IO port hard coded at 0x280. You need
-# to change src/i386/isa/stallion.c if you reconfigure this on the boards.
-# The "flags" and "msize" settings on the stli driver depend on the board:
-# EasyConnection 8/64 ISA: flags 23 msize 0x1000
-# EasyConnection 8/64 EISA: flags 24 msize 0x10000
-# EasyConnection 8/64 MCA: flags 25 msize 0x1000
-# ONboard ISA: flags 4 msize 0x10000
-# ONboard EISA: flags 7 msize 0x10000
-# ONboard MCA: flags 3 msize 0x10000
-# Brumby: flags 2 msize 0x4000
-# Stallion: flags 1 msize 0x10000
-
-# Notes on the Digiboard PC/Xi and PC/Xe series driver
-#
-# The NDGBPORTS option specifies the number of ports controlled by the
-# dgb(4) driver. The default value is 16 ports per device.
-#
-# The following flag values have special meanings in dgb:
-# 0x01 - alternate layout of pins
-# 0x02 - use the windowed PC/Xe in 64K mode
-
-device wt 1
-hint.wt.0.at="isa"
-hint.wt.0.port="0x300"
-hint.wt.0.irq="5"
-hint.wt.0.drq="1"
-device ctx
-hint.ctx.0.at="isa"
-hint.ctx.0.port="0x230"
-hint.ctx.0.maddr="0xd0000"
-device spigot 1
-hint.spigot.0.at="isa"
-hint.spigot.0.port="0xad6"
-hint.spigot.0.irq="15"
-hint.spigot.0.maddr="0xee000"
-device apm
-hint.apm.0.flags="0x20"
-device pmtimer # Adjust system timer at wakeup time
-device gp
-hint.gp.0.at="isa"
-hint.gp.0.port="0x2c0"
-device gsc 1
-hint.gsc.0.at="isa"
-hint.gsc.0.port="0x270"
-hint.gsc.0.drq="3"
-device dgb 1
-options NDGBPORTS=17
-hint.dgb.0.at="isa"
-hint.dgb.0.port="0x220"
-hint.dgb.0.maddr="0xfc000"
-device digi
-hint.digi.0.at="isa"
-hint.digi.0.port="0x104"
-hint.digi.0.maddr="0xd0000"
-# BIOS & FEP/OS components of device digi.
-device digi_CX
-device digi_CX_PCI
-device digi_EPCX
-device digi_EPCX_PCI
-device digi_Xe
-device digi_Xem
-device digi_Xr
-device asc 1
-hint.asc.0.at="isa"
-hint.asc.0.port="0x3EB"
-hint.asc.0.drq="3"
-hint.asc.0.irq="10"
-device spic
-hint.spic.0.at="isa"
-hint.spic.0.port="0x10a0"
-device stl
-hint.stl.0.at="isa"
-hint.stl.0.port="0x2a0"
-hint.stl.0.irq="10"
-device stli
-hint.stli.0.at="isa"
-hint.stli.0.port="0x2a0"
-hint.stli.0.maddr="0xcc000"
-hint.stli.0.flags="23"
-hint.stli.0.msize="0x1000"
-# You are unlikely to have the hardware for loran <phk@xxxxxxxxxxx>
-device loran
-hint.loran.0.at="isa"
-hint.loran.0.irq="5"
-# HOT1 Xilinx 6200 card (http://www.vcc.com/)
-device xrpu
-
-#
-# Laptop/Notebook options:
-#
-# See also:
-# apm under `Miscellaneous hardware'
-# above.
-
-# For older notebooks that signal a powerfail condition (external
-# power supply dropped, or battery state low) by issuing an NMI:
-
-options POWERFAIL_NMI # make it beep instead of panicing
-
-#
-# I2C Bus
-#
-# Philips i2c bus support is provided by the `iicbus' device.
-#
-# Supported interfaces:
-# pcf Philips PCF8584 ISA-bus controller
-#
-device pcf
-hint.pcf.0.at="isa"
-hint.pcf.0.port="0x320"
-hint.pcf.0.irq="5"
-
-#---------------------------------------------------------------------------
-# ISDN4BSD
-#
-# See /usr/share/examples/isdn/ROADMAP for an introduction to isdn4bsd.
-#
-# i4b passive ISDN cards support contains the following hardware drivers:
-#
-# isic - Siemens/Infineon ISDN ISAC/HSCX/IPAC chipset driver
-# iwic - Winbond W6692 PCI bus ISDN S/T interface controller
-# ifpi - AVM Fritz!Card PCI driver
-# ifpi2 - AVM Fritz!Card PCI version 2 driver
-# ihfc - Cologne Chip HFC ISA/ISA-PnP chipset driver
-# ifpnp - AVM Fritz!Card PnP driver
-# itjc - Siemens ISAC / TJNet Tiger300/320 chipset
-#
-# i4b active ISDN cards support contains the following hardware drivers:
-#
-# iavc - AVM B1 PCI, AVM B1 ISA, AVM T1
-#
-# Note that the ``options'' (if given) and ``device'' lines must BOTH
-# be uncommented to enable support for a given card !
-#
-# In addition to a hardware driver (and probably an option) the mandatory
-# ISDN protocol stack devices and the mandatory support device must be
-# enabled as well as one or more devices from the optional devices section.
-#
-#---------------------------------------------------------------------------
-# isic driver (Siemens/Infineon chipsets)
-#
-device isic
-#
-# ISA bus non-PnP Cards:
-# ----------------------
-#
-# Teles S0/8 or Niccy 1008
-options TEL_S0_8
-hint.isic.0.at="isa"
-hint.isic.0.maddr="0xd0000"
-hint.isic.0.irq="5"
-hint.isic.0.flags="1"
-#
-# Teles S0/16 or Creatix ISDN-S0 or Niccy 1016
-options TEL_S0_16
-hint.isic.0.at="isa"
-hint.isic.0.port="0xd80"
-hint.isic.0.maddr="0xd0000"
-hint.isic.0.irq="5"
-hint.isic.0.flags="2"
-#
-# Teles S0/16.3
-options TEL_S0_16_3
-hint.isic.0.at="isa"
-hint.isic.0.port="0xd80"
-hint.isic.0.irq="5"
-hint.isic.0.flags="3"
-#
-# AVM A1 or AVM Fritz!Card
-options AVM_A1
-hint.isic.0.at="isa"
-hint.isic.0.port="0x340"
-hint.isic.0.irq="5"
-hint.isic.0.flags="4"
-#
-# USRobotics Sportster ISDN TA intern
-options USR_STI
-hint.isic.0.at="isa"
-hint.isic.0.port="0x268"
-hint.isic.0.irq="5"
-hint.isic.0.flags="7"
-#
-# ITK ix1 Micro ( < V.3, non-PnP version )
-options ITKIX1
-hint.isic.0.at="isa"
-hint.isic.0.port="0x398"
-hint.isic.0.irq="10"
-hint.isic.0.flags="18"
-#
-# ELSA PCC-16
-options ELSA_PCC16
-hint.isic.0.at="isa"
-hint.isic.0.port="0x360"
-hint.isic.0.irq="10"
-hint.isic.0.flags="20"
-#
-# ISA bus PnP Cards:
-# ------------------
-#
-# Teles S0/16.3 PnP
-options TEL_S0_16_3_P
-#
-# Creatix ISDN-S0 P&P
-options CRTX_S0_P
-#
-# Dr. Neuhaus Niccy Go@
-options DRN_NGO
-#
-# Sedlbauer Win Speed
-options SEDLBAUER
-#
-# Dynalink IS64PH
-options DYNALINK
-#
-# ELSA QuickStep 1000pro ISA
-options ELSA_QS1ISA
-#
-# Siemens I-Surf 2.0
-options SIEMENS_ISURF2
-#
-# Asuscom ISDNlink 128K ISA
-options ASUSCOM_IPAC
-#
-# Eicon Diehl DIVA 2.0 and 2.02
-options EICON_DIVA
-#
-# Compaq Microcom 610 ISDN card (Compaq series PSB2222I)
-options COMPAQ_M610
-#
-# PCI bus Cards:
-# --------------
-#
-# Cyclades Cyclom-Y PCI serial driver
-device cy 1
-options CY_PCI_FASTINTR # Use with cy_pci unless irq is shared
-hint.cy.0.at="isa"
-hint.cy.0.irq="10"
-hint.cy.0.maddr="0xd4000"
-hint.cy.0.msize="0x2000"
-#
-#---------------------------------------------------------------------------
-# ELSA MicroLink ISDN/PCI (same as ELSA QuickStep 1000pro PCI)
-options ELSA_QS1PCI
-#
-#
-#---------------------------------------------------------------------------
-# ifpnp driver for AVM Fritz!Card PnP
-#
-# AVM Fritz!Card PnP
-device ifpnp
-#
-#---------------------------------------------------------------------------
-# ihfc driver for Cologne Chip ISA chipsets (experimental!)
-#
-# Teles 16.3c ISA PnP
-# AcerISDN P10 ISA PnP
-# TELEINT ISDN SPEED No.1
-device ihfc
-#
-#---------------------------------------------------------------------------
-# ifpi driver for AVM Fritz!Card PCI
-#
-# AVM Fritz!Card PCI
-device ifpi
-#
-#---------------------------------------------------------------------------
-# ifpi2 driver for AVM Fritz!Card PCI version 2
-#
-# AVM Fritz!Card PCI version 2
-device "ifpi2"
-#
-#---------------------------------------------------------------------------
-# iwic driver for Winbond W6692 chipset
-#
-# ASUSCOM P-IN100-ST-D (and other Winbond W6692 based cards)
-device iwic
-#
-#---------------------------------------------------------------------------
-# itjc driver for Simens ISAC / TJNet Tiger300/320 chipset
-#
-# Traverse Technologies NETjet-S
-# Teles PCI-TJ
-device itjc
-#
-#---------------------------------------------------------------------------
-# iavc driver (AVM active cards, needs i4bcapi driver!)
-#
-device iavc
-#
-# AVM B1 ISA bus (PnP mode not supported!)
-# ----------------------------------------
-hint.iavc.0.at="isa"
-hint.iavc.0.port="0x150"
-hint.iavc.0.irq="5"
-#
-#---------------------------------------------------------------------------
-# ISDN Protocol Stack - mandatory for all hardware drivers
-#
-# Q.921 / layer 2 - i4b passive cards D channel handling
-device "i4bq921"
-#
-# Q.931 / layer 3 - i4b passive cards D channel handling
-device "i4bq931"
-#
-# layer 4 - i4b common passive and active card handling
-device "i4b"
-#
-#---------------------------------------------------------------------------
-# ISDN devices - mandatory for all hardware drivers
-#
-# userland driver to do ISDN tracing (for passive cards only)
-device "i4btrc" 4
-#
-# userland driver to control the whole thing
-device "i4bctl"
-#
-#---------------------------------------------------------------------------
-# ISDN devices - optional
-#
-# userland driver for access to raw B channel
-device "i4brbch" 4
-#
-# userland driver for telephony
-device "i4btel" 2
-#
-# network driver for IP over raw HDLC ISDN
-device "i4bipr" 4
-# enable VJ header compression detection for ipr i/f
-options IPR_VJ
-# enable logging of the first n IP packets to isdnd (n=32 here)
-options IPR_LOG=32
-#
-# network driver for sync PPP over ISDN; requires an equivalent
-# number of sppp device to be configured
-device "i4bisppp" 4
-#
-# B-channel interface to the netgraph subsystem
-device "i4bing" 2
-#
-# CAPI driver needed for active ISDN cards (see iavc driver above)
-device "i4bcapi"
-#
-#---------------------------------------------------------------------------
-
-#
-# Set the number of PV entries per process. Increasing this can
-# stop panics related to heavy use of shared memory. However, that can
-# (combined with large amounts of physical memory) cause panics at
-# boot time due the kernel running out of VM space.
-#
-# If you're tweaking this, you might also want to increase the sysctls
-# "vm.v_free_min", "vm.v_free_reserved", and "vm.v_free_target".
-#
-# The value below is the one more than the default.
-#
-options PMAP_SHPGPERPROC=201
-
-#
-# Change the size of the kernel virtual address space. Due to
-# constraints in loader(8) on i386, this must be a multiple of 4.
-# 256 = 1 GB of kernel address space. Increasing this also causes
-# a reduction of the address space in user processes. 512 splits
-# the 4GB cpu address space in half (2GB user, 2GB kernel).
-#
-options KVA_PAGES=260
-
-
-#####################################################################
-# ABI Emulation
-
-# Enable iBCS2 runtime support for SCO and ISC binaries
-options IBCS2
-
-# Emulate spx device for client side of SVR3 local X interface
-options SPX_HACK
-
-# Enable Linux ABI emulation
-options COMPAT_LINUX
-
-# Enable i386 a.out binary support
-options COMPAT_AOUT
-
-# Enable the linux-like proc filesystem support (requires COMPAT_LINUX
-# and PSEUDOFS)
-options LINPROCFS
-
-#
-# SysVR4 ABI emulation
-#
-# The svr4 ABI emulator can be statically compiled into the kernel or loaded as
-# a KLD module.
-# The STREAMS network emulation code can also be compiled statically or as a
-# module. If loaded as a module, it must be loaded before the svr4 module
-# (the /usr/sbin/svr4 script does this for you). If compiling statically,
-# the `streams' device must be configured into any kernel which also
-# specifies COMPAT_SVR4. It is possible to have a statically-configured
-# STREAMS device and a dynamically loadable svr4 emulator; the /usr/sbin/svr4
-# script understands that it doesn't need to load the `streams' module under
-# those circumstances.
-# Caveat: At this time, `options KTRACE' is required for the svr4 emulator
-# (whether static or dynamic).
-#
-options COMPAT_SVR4 # build emulator statically
-options DEBUG_SVR4 # enable verbose debugging
-device streams # STREAMS network driver (required for svr4).
-
-
-#####################################################################
-# VM OPTIONS
-
-# Disable the 4 MByte page PSE CPU feature. The PSE feature allows the
-# kernel to use a 4 MByte pages to map the kernel instead of 4k pages.
-# This saves on the amount of memory needed for page tables needed to
-# map the kernel. You should only disable this feature as a temporary
-# workaround if you are having problems with it enabled.
-#
-#options DISABLE_PSE
-
-# Disable the global pages PGE CPU feature. The PGE feature allows pages
-# to be marked with the PG_G bit. TLB entries for these pages are not
-# flushed from the cache when %cr3 is reloaded. This can make context
-# switches less expensive. You should only disable this feature as a
-# temporary workaround if you are having problems with it enabled.
-#
-#options DISABLE_PG_G
-
-# KSTACK_PAGES is the number of memory pages to assign to the kernel
-# stack of each thread.
-
-options KSTACK_PAGES=3
-
-#####################################################################
-
-# More undocumented options for linting.
-# Note that documenting these are not considered an affront.
-
-options FB_INSTALL_CDEV # install a CDEV entry in /dev
-
-# PECOFF module (Win32 Execution Format)
-options PECOFF_SUPPORT
-options PECOFF_DEBUG
-
-options ENABLE_ALART
-options I4B_SMP_WORKAROUND
-options I586_PMC_GUPROF=0x70000
-options KBDIO_DEBUG=2
-options KBD_MAXRETRY=4
-options KBD_MAXWAIT=6
-options KBD_RESETDELAY=201
-
-options PSM_DEBUG=1
-
-options TIMER_FREQ=((14318182+6)/12)
-
-options VM_KMEM_SIZE
-options VM_KMEM_SIZE_MAX
-options VM_KMEM_SIZE_SCALE
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/conf/OLDCARD
--- a/freebsd-5.3-xen-sparse/i386-xen/conf/OLDCARD Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,17 +0,0 @@
-#
-# OLDCARD -- Generic kernel configuration file for FreeBSD/i386
-# using the OLDCARD pccard system.
-#
-# $FreeBSD: src/sys/i386/conf/OLDCARD,v 1.18 2003/02/15 02:39:13 ru Exp $
-
-include GENERIC
-
-ident OLDCARD
-
-# PCCARD (PCMCIA) support
-nodevice cbb # cardbus (yenta) bridge
-#nodevice pcic # ExCA ISA and PCI bridges
-nodevice pccard # PC Card (16-bit) bus
-nodevice cardbus # CardBus (32-bit) bus
-device card 1 # pccard bus
-device pcic # PCMCIA bridge
diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/conf/PAE
--- a/freebsd-5.3-xen-sparse/i386-xen/conf/PAE Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,99 +0,0 @@
-#
-# PAE -- Generic kernel configuration file for FreeBSD/i386 PAE
-#
-# $FreeBSD: src/sys/i386/conf/PAE,v 1.8 2003/11/03 22:49:19 jhb Exp $
-
-include GENERIC
-
-ident PAE-GENERIC
-
-# To make a PAE kernel, the next option is needed
-options PAE # Physical Address Extensions
Kernel
-
-# Compile acpi in statically since the module isn't built properly. Most
-# machines which support large amounts of memory require acpi.
-device acpi
-
-# Don't build modules with this kernel config, since they are not built with
-# the correct options headers.
-makeoptions NO_MODULES=yes
-
-# What follows is a list of drivers that are normally in GENERIC, but either
-# don't work or are untested with PAE. Be very careful before enabling any
-# of these drivers. Drivers which use DMA and don't handle 64 bit physical
-# address properly may cause data corruption when used in a machine with more
-# than 4 gigabytes of memory.
-
-nodevice ahb
-nodevice amd
-nodevice isp
-nodevice sym
-nodevice trm
-
-nodevice adv
-nodevice adw
-nodevice aha
-nodevice aic
-nodevice bt
-
-nodevice ncv
-nodevice nsp
-nodevice stg
-
-nodevice asr
-nodevice dpt
-nodevice iir
-nodevice mly
-
-nodevice amr
-nodevice ida
-nodevice mlx
-nodevice pst
-
-nodevice agp
-
-nodevice de
-nodevice txp
-nodevice vx
-
-nodevice dc
-nodevice pcn
-nodevice rl
-nodevice sf
-nodevice sis
-nodevice ste
-nodevice tl
-nodevice tx
-nodevice vr
-nodevice wb
-
-nodevice cs
-nodevice ed
-nodevice ex
-nodevice ep
-nodevice fe
-nodevice ie
-nodevice lnc
-nodevice sn
-nodevice xe
-
-nodevice wlan
-nodevice an
-nodevice awi
-nodevice wi
-
-nodevice uhci
-nodevice ohci
-nodevice usb
-nodevice ugen
-nodevice uhid
-nodevice ukbd
-nodevice ulpt
-nodevice umass
-nodevice ums
-nodevice urio
-nodevice uscanner
-nodevice aue
-nodevice axe
-nodevice cue
-nodevice kue
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/conf/XENCONF
--- a/freebsd-5.3-xen-sparse/i386-xen/conf/XENCONF Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,137 +0,0 @@
-#
-# GENERIC -- Generic kernel configuration file for FreeBSD/i386
-#
-# For more information on this file, please read the handbook section on
-# Kernel Configuration Files:
-#
-#
http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
-#
-# The handbook is also available locally in /usr/share/doc/handbook
-# if you've installed the doc distribution, otherwise always see the
-# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
-# latest information.
-#
-# An exhaustive list of options and more detailed explanations of the
-# device lines is also present in the ../../conf/NOTES and NOTES files.
-# If you are in doubt as to the purpose or necessity of a line, check first
-# in NOTES.
-#
-# $FreeBSD: src/sys/i386/conf/GENERIC,v 1.394.2.3 2004/01/26 19:42:11 nectar
Exp $
-
-machine i386-xen
-cpu I686_CPU
-ident XEN
-
-#To statically compile in device wiring instead of /boot/device.hints
-#hints "GENERIC.hints" #Default places to look for devices.
-
-makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols
-
-options SCHED_4BSD #4BSD scheduler
-options INET #InterNETworking
-options INET6 #IPv6 communications protocols
-options FFS #Berkeley Fast Filesystem
-options SOFTUPDATES #Enable FFS soft updates support
-options UFS_ACL #Support for access control lists
-options UFS_DIRHASH #Improve performance on big directories
-options MD_ROOT #MD is a potential root device
-options NFSCLIENT #Network Filesystem Client
-options NFSSERVER #Network Filesystem Server
-# options NFS_ROOT #NFS usable as /, requires NFSCLIENT
-#options MSDOSFS #MSDOS Filesystem
-#options CD9660 #ISO 9660 Filesystem
-options PROCFS #Process filesystem (requires PSEUDOFS)
-options PSEUDOFS #Pseudo-filesystem framework
-options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!]
-options COMPAT_FREEBSD4 #Compatible with FreeBSD4
-options SCSI_DELAY=15000 #Delay (in ms) before probing SCSI
-options KTRACE #ktrace(1) support
-options SYSVSHM #SYSV-style shared memory
-options SYSVMSG #SYSV-style message queues
-options SYSVSEM #SYSV-style semaphores
-options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions
-options KBD_INSTALL_CDEV # install a CDEV entry in /dev
-options CPU_DISABLE_SSE # don't turn on SSE framework
with Xen
-#options PFIL_HOOKS # pfil(9) framework
-
-# Debugging for use in -current
-options KDB #Enable the kernel debugger
-options INVARIANTS #Enable calls of extra sanity checking
-options INVARIANT_SUPPORT #Extra sanity checks of internal
structures, required by INVARIANTS
-#options WITNESS #Enable checks to detect deadlocks and
cycles
-#options WITNESS_SKIPSPIN #Don't run witness on spinlocks for
speed
-
-# To make an SMP kernel, the next two are needed
-#options SMP # Symmetric MultiProcessor Kernel
-#device apic # I/O APIC
-
-# SCSI peripherals
-device scbus # SCSI bus (required for SCSI)
-#device ch # SCSI media changers
-device da # Direct Access (disks)
-#device sa # Sequential Access (tape etc)
-#device cd # CD
-device pass # Passthrough device (direct SCSI access)
-#device ses # SCSI Environmental Services (and
SAF-TE)
-
-# atkbdc0 controls both the keyboard and the PS/2 mouse
-#device atkbdc # AT keyboard controller
-#device atkbd # AT keyboard
-#device psm # PS/2 mouse
-
-# device vga # VGA video card driver
-
-#device splash # Splash screen and screen saver support
-
-# syscons is the default console driver, resembling an SCO console
-#device sc
-
-# Enable this for the pcvt (VT220 compatible) console driver
-#device vt
-#options XSERVER # support for X server on a vt console
-#options FAT_CURSOR # start with block cursor
-
-#device agp # support several AGP chipsets
-
-# Floating point support - do not disable.
-device npx
-
-# Serial (COM) ports
-#device sio # 8250, 16[45]50 based serial ports
-
-# Parallel port
-#device ppc
-#device ppbus # Parallel port bus (required)
-#device lpt # Printer
-#device plip # TCP/IP over parallel
-#device ppi # Parallel port interface device
-#device vpo # Requires scbus and da
-
-# If you've got a "dumb" serial or parallel PCI card that is
-# supported by the puc(4) glue driver, uncomment the following
-# line to enable it (connects to the sio and/or ppc drivers):
-#device puc
-
-
-# Pseudo devices - the number indicates how many units to allocate.
-device random # Entropy device
-device loop # Network loopback
-device ether # Ethernet support
-device tun # Packet tunnel.
-device pty # Pseudo-ttys (telnet etc)
-device md # Memory "disks"
-device gif # IPv6 and IPv4 tunneling
-device faith # IPv6-to-IPv4 relaying (translation)
-
-# The `bpf' device enables the Berkeley Packet Filter.
-# Be aware of the administrative consequences of enabling this!
-device bpf # Berkeley packet filter
-
-#options BOOTP
-options XEN
-options MCLSHIFT=12 # this has to be enabled for Xen as we
can only have one cluster per page
-options MSIZE=256
-options DIAGNOSTIC
-options MAXMEM=(256*1024)
-options NOXENDEBUG=1 # Turn off Debugging printfs
-
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/conf/gethints.awk
--- a/freebsd-5.3-xen-sparse/i386-xen/conf/gethints.awk Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,116 +0,0 @@
-#! /usr/bin/awk -f
-#
-# This is a transition aid. It extracts old-style configuration information
-# from a config file and writes an equivalent device.hints file to stdout.
-# You can use that with loader(8) or statically compile it in with the
-# 'hints' directive. See how GENERIC and GENERIC.hints fit together for
-# a static example. You should use loader(8) if at all possible.
-#
-# $FreeBSD: src/sys/i386/conf/gethints.awk,v 1.2 2002/07/26 03:52:30 peter Exp
$
-
-# skip commented lines, empty lines and not "device" lines
-/^[ \t]*#/ || /^[ \t]*$/ || !/[ \t]*device/ { next; }
-
-# input format :
-# device <name><unit> at <controler>[?] [key [val]]...
-# possible keys are :
-# disable, port #, irq #, drq #, drive #, iomem #, iosiz #,
-# flags #, bus #, target #, unit #.
-# output format :
-# hint.<name>.<unit>.<key>=<val>
-# mapped keys are :
-# iomem -> maddr, iosiz -> msize.
-{
- gsub ("#.*", ""); # delete comments
- gsub ("\"", ""); # and double-quotes
- nameunit = $2; # <name><unit>
- at = $3; # at
- controler = $4; # <controler>[?]
- rest = 5; # optional keys begin at indice 5
- if (at != "at" || controler == "")
- next; # skip devices w/o controlers
- name = nameunit;
- sub ("[0-9]*$", "", name); # get the name
- unit = nameunit;
- sub ("^" name, "", unit); # and the unit
- sub ("\?$", "", controler);
- printf "hint.%s.%s.at=\"%s\"\n", name, unit, controler;
- # for each keys, if any ?
- for (key = $rest; rest <= NF; key = $(++rest)) {
- # skip auto-detect keys (the one w/ a ?)
- if (key == "port?" || key == "drq?" || key == "irq?" || \
- key == "iomem?" || key == "iosiz?")
- continue;
- # disable has no value, so, give it one
- if (key == "disable") {
- printf "hint.%s.%s.disabled=\"1\"\n", name, unit;
- continue;
- }
- # recognized keys
- if (key == "port" || key == "irq" || key == "drq" || \
- key == "drive" || key == "iomem" || key == "iosiz" || \
- key == "flags" || key == "bus" || key == "target" || \
- key == "unit") {
- val = $(++rest);
- if (val == "?") # has above
- continue;
- if (key == "port") {
- # map port macros to static values
- sub ("IO_AHA0", "0x330", val);
- sub ("IO_AHA1", "0x334", val);
- sub ("IO_ASC1", "0x3EB", val);
- sub ("IO_ASC2", "0x22B", val);
- sub ("IO_ASC3", "0x26B", val);
- sub ("IO_ASC4", "0x2AB", val);
- sub ("IO_ASC5", "0x2EB", val);
- sub ("IO_ASC6", "0x32B", val);
- sub ("IO_ASC7", "0x36B", val);
- sub ("IO_ASC8", "0x3AB", val);
- sub ("IO_BT0", "0x330", val);
- sub ("IO_BT1", "0x334", val);
- sub ("IO_CGA", "0x3D0", val);
- sub ("IO_COM1", "0x3F8", val);
- sub ("IO_COM2", "0x2F8", val);
- sub ("IO_COM3", "0x3E8", val);
- sub ("IO_COM4", "0x2E8", val);
- sub ("IO_DMA1", "0x000", val);
- sub ("IO_DMA2", "0x0C0", val);
- sub ("IO_DMAPG", "0x080", val);
- sub ("IO_FD1", "0x3F0", val);
- sub ("IO_FD2", "0x370", val);
- sub ("IO_GAME", "0x201", val);
- sub ("IO_GSC1", "0x270", val);
- sub ("IO_GSC2", "0x2E0", val);
- sub ("IO_GSC3", "0x370", val);
- sub ("IO_GSC4", "0x3E0", val);
- sub ("IO_ICU1", "0x020", val);
- sub ("IO_ICU2", "0x0A0", val);
- sub ("IO_KBD", "0x060", val);
- sub ("IO_LPT1", "0x378", val);
- sub ("IO_LPT2", "0x278", val);
- sub ("IO_LPT3", "0x3BC", val);
- sub ("IO_MDA", "0x3B0", val);
- sub ("IO_NMI", "0x070", val);
- sub ("IO_NPX", "0x0F0", val);
- sub ("IO_PMP1", "0x026", val);
- sub ("IO_PMP2", "0x178", val);
- sub ("IO_PPI", "0x061", val);
- sub ("IO_RTC", "0x070", val);
- sub ("IO_TIMER1", "0x040", val);
- sub ("IO_TIMER2", "0x048", val);
- sub ("IO_UHA0", "0x330", val);
- sub ("IO_VGA", "0x3C0", val);
- sub ("IO_WD1", "0x1F0", val);
- sub ("IO_WD2", "0x170", val);
- } else {
- # map key names
- sub ("iomem", "maddr", key);
- sub ("iosiz", "msize", key);
- }
- printf "hint.%s.%s.%s=\"%s\"\n", name, unit, key, val;
- continue;
- }
- printf ("unrecognized config token '%s:%s' on line %s\n",
- rest, key, NR); # > "/dev/stderr";
- }
-}
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,559 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz and Don Ahn.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)clock.c 7.2 (Berkeley) 5/12/91
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/isa/clock.c,v 1.207 2003/11/13 10:02:12 phk
Exp $");
-
-/* #define DELAYDEBUG */
-/*
- * Routines to handle clock hardware.
- */
-
-/*
- * inittodr, settodr and support routines written
- * by Christoph Robitschko <chmr@xxxxxxxxxxxxxxxxxx>
- *
- * reintroduced and updated by Chris Stenton <chris@xxxxxxxxxxx> 8/10/94
- */
-
-#include "opt_clock.h"
-#include "opt_isa.h"
-#include "opt_mca.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/time.h>
-#include <sys/timetc.h>
-#include <sys/kernel.h>
-#include <sys/limits.h>
-#include <sys/sysctl.h>
-#include <sys/cons.h>
-#include <sys/power.h>
-
-#include <machine/clock.h>
-#include <machine/cputypes.h>
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <machine/md_var.h>
-#include <machine/psl.h>
-#if defined(SMP)
-#include <machine/smp.h>
-#endif
-#include <machine/specialreg.h>
-
-#include <i386/isa/icu.h>
-#include <i386/isa/isa.h>
-#include <isa/rtc.h>
-#include <i386/isa/timerreg.h>
-
-/* XEN specific defines */
-#include <machine/xen_intr.h>
-#include <vm/vm.h> /* needed by machine/pmap.h */
-#include <vm/pmap.h> /* needed by machine/pmap.h */
-#include <machine/pmap.h> /* needed by xen-os.h */
-#include <machine/hypervisor-ifs.h>
-#include <machine/xen-os.h> /* needed by xenfunc.h */
-#include <machine/xenfunc.h>
-
-/*
- * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
- * can use a simple formula for leap years.
- */
-#define LEAPYEAR(y) (((u_int)(y) % 4 == 0) ? 1 : 0)
-#define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31)
-
-int adjkerntz; /* local offset from GMT in seconds */
-int clkintr_pending;
-int disable_rtc_set = 1; /* disable resettodr() if != 0 */
-int pscnt = 1;
-int psdiv = 1;
-int statclock_disable;
-#ifndef TIMER_FREQ
-#define TIMER_FREQ 1193182
-#endif
-u_int timer_freq = TIMER_FREQ;
-struct mtx clock_lock;
-
-
-static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
-
-/* Values for timerX_state: */
-#define RELEASED 0
-#define RELEASE_PENDING 1
-#define ACQUIRED 2
-#define ACQUIRE_PENDING 3
-
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-static unsigned long fast_gettimeoffset_quotient;
-
-/* These are peridically updated in shared_info, and then copied here. */
-static uint32_t shadow_tsc_stamp;
-static uint64_t shadow_system_time;
-static uint32_t shadow_time_version;
-static struct timeval shadow_tv;
-
-#define DEFINE_PER_CPU(type, name) \
- __typeof__(type) per_cpu__##name
-
-#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var))
-
-
-static uint64_t processed_system_time;/* System time (ns) at last processing.
*/
-static DEFINE_PER_CPU(uint64_t, processed_system_time);
-
-
-#define NS_PER_TICK (1000000000ULL/hz)
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_mhz * 10^6))
- * ns = cycles * (10^3 / cpu_mhz)
- *
- * Then we use scaling math (suggested by george@xxxxxxxxxx) to get:
- * ns = cycles * (10^3 * SC / cpu_mhz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- * -johnstul@xxxxxxxxxx "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
-{
- cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-/*
- * Reads a consistent set of time-base values from Xen, into a shadow data
- * area. Must be called with the xtime_lock held for writing.
- */
-static void __get_time_values_from_xen(void)
-{
- shared_info_t *s = HYPERVISOR_shared_info;
-
- do {
- shadow_time_version = s->time_version2;
- rmb();
- shadow_tv.tv_sec = s->wc_sec;
- shadow_tv.tv_usec = s->wc_usec;
- shadow_tsc_stamp = (uint32_t)s->tsc_timestamp;
- shadow_system_time = s->system_time;
- rmb();
- }
- while (shadow_time_version != s->time_version1);
-}
-
-#define TIME_VALUES_UP_TO_DATE \
- (shadow_time_version == HYPERVISOR_shared_info->time_version2)
-
-static void (*timer_func)(struct clockframe *frame) = hardclock;
-
-static unsigned xen_get_offset(void);
-static unsigned xen_get_timecount(struct timecounter *tc);
-
-static struct timecounter xen_timecounter = {
- xen_get_timecount, /* get_timecount */
- 0, /* no poll_pps */
- ~0u, /* counter_mask */
- 0, /* frequency */
- "ixen", /* name */
- 0 /* quality */
-};
-
-
-static void
-clkintr(struct clockframe *frame)
-{
- int64_t cpu_delta, delta;
- int cpu = smp_processor_id();
- long ticks = 0;
-
- do {
- __get_time_values_from_xen();
- delta = cpu_delta = (int64_t)shadow_system_time +
- (int64_t)xen_get_offset() * 1000;
- delta -= processed_system_time;
- cpu_delta -= per_cpu(processed_system_time, cpu);
- } while (!TIME_VALUES_UP_TO_DATE);
-
- if (unlikely(delta < 0) || unlikely(cpu_delta < 0)) {
- printk("Timer ISR: Time went backwards: %lld\n", delta);
- return;
- }
-
- /* Process elapsed ticks since last call. */
- while ( delta >= NS_PER_TICK )
- {
- ticks++;
- delta -= NS_PER_TICK;
- processed_system_time += NS_PER_TICK;
- }
- /* Local CPU jiffy work. */
- while (cpu_delta >= NS_PER_TICK) {
- cpu_delta -= NS_PER_TICK;
- per_cpu(processed_system_time, cpu) += NS_PER_TICK;
-#if 0
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING, regs);
-#endif
- }
- if (ticks > 0) {
- if (frame) timer_func(frame);
- }
-
- if (cpu != 0)
- return;
- /*
- * Take synchronised time from Xen once a minute if we're not
- * synchronised ourselves, and we haven't chosen to keep an independent
- * time base.
- */
-
- /* XXX TODO */
-}
-
-#include "opt_ddb.h"
-static uint32_t
-getit(void)
-{
- __get_time_values_from_xen();
- return shadow_tsc_stamp;
-}
-
-/*
- * Wait "n" microseconds.
- * Relies on timer 1 counting down from (timer_freq / hz)
- * Note: timer had better have been programmed before this is first used!
- */
-void
-DELAY(int n)
-{
- int delta, ticks_left;
- uint32_t tick, prev_tick;
-#ifdef DELAYDEBUG
- int getit_calls = 1;
- int n1;
- static int state = 0;
-
- if (state == 0) {
- state = 1;
- for (n1 = 1; n1 <= 10000000; n1 *= 10)
- DELAY(n1);
- state = 2;
- }
- if (state == 1)
- printf("DELAY(%d)...", n);
-#endif
- /*
- * Read the counter first, so that the rest of the setup overhead is
- * counted. Guess the initial overhead is 20 usec (on most systems it
- * takes about 1.5 usec for each of the i/o's in getit(). The loop
- * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The
- * multiplications and divisions to scale the count take a while).
- *
- * However, if ddb is active then use a fake counter since reading
- * the i8254 counter involves acquiring a lock. ddb must not go
- * locking for many reasons, but it calls here for at least atkbd
- * input.
- */
- prev_tick = getit();
-
- n -= 0; /* XXX actually guess no initial overhead */
- /*
- * Calculate (n * (timer_freq / 1e6)) without using floating point
- * and without any avoidable overflows.
- */
- if (n <= 0)
- ticks_left = 0;
- else if (n < 256)
- /*
- * Use fixed point to avoid a slow division by 1000000.
- * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
- * 2^15 is the first power of 2 that gives exact results
- * for n between 0 and 256.
- */
- ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
- else
- /*
- * Don't bother using fixed point, although gcc-2.7.2
- * generates particularly poor code for the long long
- * division, since even the slow way will complete long
- * before the delay is up (unless we're interrupted).
- */
- ticks_left = ((u_int)n * (long long)timer_freq + 999999)
- / 1000000;
-
- while (ticks_left > 0) {
- tick = getit();
-#ifdef DELAYDEBUG
- ++getit_calls;
-#endif
- delta = tick - prev_tick;
- prev_tick = tick;
- if (delta < 0) {
- /*
- * Guard against timer0_max_count being wrong.
- * This shouldn't happen in normal operation,
- * but it may happen if set_timer_freq() is
- * traced.
- */
- /* delta += timer0_max_count; ??? */
- if (delta < 0)
- delta = 0;
- }
- ticks_left -= delta;
- }
-#ifdef DELAYDEBUG
- if (state == 1)
- printf(" %d calls to getit() at %d usec each\n",
- getit_calls, (n + 5) / getit_calls);
-#endif
-}
-
-
-int
-sysbeep(int pitch, int period)
-{
- return (0);
-}
-
-/*
- * Restore all the timers non-atomically (XXX: should be atomically).
- *
- * This function is called from pmtimer_resume() to restore all the timers.
- * This should not be necessary, but there are broken laptops that do not
- * restore all the timers on resume.
- */
-void
-timer_restore(void)
-{
- /* Get timebases for new environment. */
- __get_time_values_from_xen();
-
- /* Reset our own concept of passage of system time. */
- processed_system_time = shadow_system_time;
-}
-
-void
-startrtclock()
-{
- unsigned long long alarm;
- uint64_t __cpu_khz;
- uint32_t cpu_khz;
-
- __cpu_khz = HYPERVISOR_shared_info->cpu_freq;
- __cpu_khz /= 1000;
- cpu_khz = (uint32_t)__cpu_khz;
- printk("Xen reported: %lu.%03lu MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
-
- /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
- (2^32 * 1 / (clocks/us)) */
- {
- unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (fast_gettimeoffset_quotient), "=d" (edx)
- :"r" (cpu_khz),
- "0" (eax), "1" (edx));
- }
-
- set_cyc2ns_scale(cpu_khz/1000);
- timer_freq = tsc_freq = xen_timecounter.tc_frequency = cpu_khz * 1000;
- tc_init(&xen_timecounter);
-
-
- rdtscll(alarm);
-}
-
-/*
- * Initialize the time of day register, based on the time base which is, e.g.
- * from a filesystem.
- */
-void
-inittodr(time_t base)
-{
- int s, y;
- struct timespec ts;
-
- s = splclock();
- if (base) {
- ts.tv_sec = base;
- ts.tv_nsec = 0;
- tc_setclock(&ts);
- }
-
- y = time_second - shadow_tv.tv_sec;
- if (y <= -2 || y >= 2) {
- /* badly off, adjust it */
- ts.tv_sec = shadow_tv.tv_sec;
- ts.tv_nsec = shadow_tv.tv_usec * 1000;
- tc_setclock(&ts);
- }
- splx(s);
-}
-
-/*
- * Write system time back to RTC. Not supported for guest domains.
- */
-void
-resettodr()
-{
-}
-
-
-/*
- * Start clocks running.
- */
-void
-cpu_initclocks(void)
-{
- int diag;
- int time_irq = bind_virq_to_irq(VIRQ_TIMER);
-
- if ((diag = intr_add_handler("clk", time_irq,
- (driver_intr_t *)clkintr, NULL,
- INTR_TYPE_CLK | INTR_FAST, NULL))) {
- panic("failed to register clock interrupt: %d\n", diag);
- }
-
- /* should fast clock be enabled ? */
-
- /* initialize xen values */
- __get_time_values_from_xen();
- processed_system_time = shadow_system_time;
- per_cpu(processed_system_time, 0) = processed_system_time;
-
-}
-
-#ifdef SMP
-void
-ap_cpu_initclocks(void)
-{
- int irq;
- int cpu = smp_processor_id();
-
- per_cpu(processed_system_time, cpu) = shadow_system_time;
-
- irq = bind_virq_to_irq(VIRQ_TIMER);
- PCPU_SET(time_irq, irq);
- PANIC_IF(intr_add_handler("clk", irq, (driver_intr_t *)clkintr,
- NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
-}
-#endif
-
-void
-cpu_startprofclock(void)
-{
-
- printf("cpu_startprofclock: profiling clock is not supported\n");
-}
-
-void
-cpu_stopprofclock(void)
-{
-
- printf("cpu_stopprofclock: profiling clock is not supported\n");
-}
-
-static uint32_t
-xen_get_timecount(struct timecounter *tc)
-{
- __get_time_values_from_xen();
- return shadow_tsc_stamp;
-}
-
-/*
- * Track behavior of cur_timer->get_offset() functionality in timer_tsc.c
- */
-#undef rdtsc
-#define rdtsc(low,high) \
- __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
-
-static uint32_t
-xen_get_offset(void)
-{
- register unsigned long eax, edx;
-
- /* Read the Time Stamp Counter */
-
- rdtsc(eax,edx);
-
- /* .. relative to previous jiffy (32 bits is enough) */
- eax -= shadow_tsc_stamp;
-
- /*
- * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
- * = (tsc_low delta) * (usecs_per_clock)
- * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
- *
- * Using a mull instead of a divl saves up to 31 clock cycles
- * in the critical path.
- */
-
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
-
- /* our adjusted time offset in microseconds */
- return edx;
-}
-
-void
-idle_block(void)
-{
- if (HYPERVISOR_set_timer_op(processed_system_time + NS_PER_TICK) == 0)
- HYPERVISOR_block();
-}
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/critical.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/critical.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,46 +0,0 @@
-/*-
- * Copyright (c) 2002 Matthew Dillon. All Rights Reserved.
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
- * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
- * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/critical.c,v 1.12 2003/11/03 21:06:54
jhb Exp $");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <machine/critical.h>
-#include <machine/psl.h>
-
-/*
- * cpu_critical_fork_exit() - cleanup after fork
- *
- * Enable interrupts in the saved copy of eflags.
- */
-void
-cpu_critical_fork_exit(void)
-{
- curthread->td_md.md_savecrit = 0;
-}
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,533 +0,0 @@
-/******************************************************************************
- * ctrl_if.c
- *
- * Management functions for special interface to the domain controller.
- *
- * Copyright (c) 2004, K A Fraser
- * Copyright (c) 2004, K M Macy
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/uio.h>
-#include <sys/bus.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/selinfo.h>
-#include <sys/poll.h>
-#include <sys/conf.h>
-#include <sys/fcntl.h>
-#include <sys/ioccom.h>
-#include <sys/taskqueue.h>
-
-
-#include <machine/cpufunc.h>
-#include <machine/intr_machdep.h>
-#include <machine/xen-os.h>
-#include <machine/xen_intr.h>
-#include <machine/bus.h>
-#include <sys/rman.h>
-#include <machine/resource.h>
-#include <machine/synch_bitops.h>
-
-
-#include <machine/hypervisor-ifs.h>
-
-#include <machine/ctrl_if.h>
-#include <machine/evtchn.h>
-
-/*
- * Extra ring macros to sync a consumer index up to the public producer index.
- * Generally UNSAFE, but we use it for recovery and shutdown in some cases.
- */
-#define RING_DROP_PENDING_REQUESTS(_r) \
- do { \
- (_r)->req_cons = (_r)->sring->req_prod; \
- } while (0)
-#define RING_DROP_PENDING_RESPONSES(_r) \
- do { \
- (_r)->rsp_cons = (_r)->sring->rsp_prod; \
- } while (0)
-/*
- * Only used by initial domain which must create its own control-interface
- * event channel. This value is picked up by the user-space domain controller
- * via an ioctl.
- */
-int initdom_ctrlif_domcontroller_port = -1;
-
-static int ctrl_if_evtchn;
-static int ctrl_if_irq;
-static struct mtx ctrl_if_lock;
-static int * ctrl_if_wchan = &ctrl_if_evtchn;
-
-
-static ctrl_front_ring_t ctrl_if_tx_ring;
-static ctrl_back_ring_t ctrl_if_rx_ring;
-
-/* Incoming message requests. */
- /* Primary message type -> message handler. */
-static ctrl_msg_handler_t ctrl_if_rxmsg_handler[256];
- /* Primary message type -> callback in process context? */
-static unsigned long ctrl_if_rxmsg_blocking_context[256/sizeof(unsigned long)];
- /* Queue up messages to be handled in process context. */
-static ctrl_msg_t ctrl_if_rxmsg_deferred[CONTROL_RING_SIZE];
-static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_prod;
-static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_cons;
-
-/* Incoming message responses: message identifier -> message handler/id. */
-static struct {
- ctrl_msg_handler_t fn;
- unsigned long id;
-} ctrl_if_txmsg_id_mapping[CONTROL_RING_SIZE];
-
-/*
- * FreeBSD task queues don't allow you to requeue an already executing task.
- * Since ctrl_if_interrupt clears the TX_FULL condition and schedules any
- * waiting tasks, which themselves may need to schedule a new task
- * (due to new a TX_FULL condition), we ping-pong between these A/B task
queues.
- * The interrupt runs anything on the current queue and moves the index so that
- * future schedulings occur on the next queue. We should never get into a
- * situation where there is a task scheduleded on both the A & B queues.
- */
-TASKQUEUE_DECLARE(ctrl_if_txA);
-TASKQUEUE_DEFINE(ctrl_if_txA, NULL, NULL, {});
-TASKQUEUE_DECLARE(ctrl_if_txB);
-TASKQUEUE_DEFINE(ctrl_if_txB, NULL, NULL, {});
-struct taskqueue **taskqueue_ctrl_if_tx[2] = { &taskqueue_ctrl_if_txA,
- &taskqueue_ctrl_if_txB };
-static int ctrl_if_idx = 0;
-
-static struct task ctrl_if_rx_tasklet;
-static struct task ctrl_if_tx_tasklet;
- /* Passed to schedule_task(). */
-static struct task ctrl_if_rxmsg_deferred_task;
-
-
-
-#define get_ctrl_if() ((control_if_t *)((char *)HYPERVISOR_shared_info + 2048))
-
-static void
-ctrl_if_notify_controller(void)
-{
- notify_via_evtchn(ctrl_if_evtchn);
-}
-
-static void
-ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id)
-{
- msg->length = 0;
- ctrl_if_send_response(msg);
-}
-
-static void
-__ctrl_if_tx_tasklet(void *context __unused, int pending __unused)
-{
- ctrl_msg_t *msg;
- int was_full = RING_FULL(&ctrl_if_tx_ring);
- RING_IDX i, rp;
-
- i = ctrl_if_tx_ring.rsp_cons;
- rp = ctrl_if_tx_ring.sring->rsp_prod;
- rmb(); /* Ensure we see all requests up to 'rp'. */
-
- for ( ; i != rp; i++ )
- {
- msg = RING_GET_RESPONSE(&ctrl_if_tx_ring, i);
-
- /* Execute the callback handler, if one was specified. */
- if ( msg->id != 0xFF )
- {
- (*ctrl_if_txmsg_id_mapping[msg->id].fn)(
- msg, ctrl_if_txmsg_id_mapping[msg->id].id);
- smp_mb(); /* Execute, /then/ free. */
- ctrl_if_txmsg_id_mapping[msg->id].fn = NULL;
- }
-
- }
-
- /*
- * Step over the message in the ring /after/ finishing reading it. As
- * soon as the index is updated then the message may get blown away.
- */
- smp_mb();
- ctrl_if_tx_ring.rsp_cons = i;
-
- if ( was_full && !RING_FULL(&ctrl_if_tx_ring) )
- {
- wakeup(ctrl_if_wchan);
-
- /* bump idx so future enqueues will occur on the next taskq
- * process any currently pending tasks
- */
- ctrl_if_idx++;
- taskqueue_run(*taskqueue_ctrl_if_tx[(ctrl_if_idx-1) & 1]);
- }
-
-}
-
-static void
-__ctrl_if_rxmsg_deferred_task(void *context __unused, int pending __unused)
-{
- ctrl_msg_t *msg;
- CONTROL_RING_IDX dp;
-
- dp = ctrl_if_rxmsg_deferred_prod;
- rmb(); /* Ensure we see all deferred requests up to 'dp'. */
-
- while ( ctrl_if_rxmsg_deferred_cons != dp )
- {
- msg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(
- ctrl_if_rxmsg_deferred_cons++)];
- (*ctrl_if_rxmsg_handler[msg->type])(msg, 0);
- }
-
-}
-
-static void
-__ctrl_if_rx_tasklet(void *context __unused, int pending __unused)
-{
- ctrl_msg_t msg, *pmsg;
- CONTROL_RING_IDX dp;
- RING_IDX rp, i;
-
- i = ctrl_if_rx_ring.req_cons;
- rp = ctrl_if_rx_ring.sring->req_prod;
- dp = ctrl_if_rxmsg_deferred_prod;
-
- rmb(); /* Ensure we see all requests up to 'rp'. */
-
- for ( ; i != rp; i++)
- {
- pmsg = RING_GET_REQUEST(&ctrl_if_rx_ring, i);
- memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg));
-
- if ( msg.length > sizeof(msg.msg))
- msg.length = sizeof(msg.msg);
- if ( msg.length != 0 )
- memcpy(msg.msg, pmsg->msg, msg.length);
- if ( test_bit(msg.type, &ctrl_if_rxmsg_blocking_context) )
- {
- memcpy(&ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(dp++)],
- &msg, offsetof(ctrl_msg_t, msg) + msg.length);
- }
- else
- {
- (*ctrl_if_rxmsg_handler[msg.type])(&msg, 0);
- }
- }
- ctrl_if_rx_ring.req_cons = i;
-
- if ( dp != ctrl_if_rxmsg_deferred_prod )
- {
- wmb();
- ctrl_if_rxmsg_deferred_prod = dp;
- taskqueue_enqueue(taskqueue_thread, &ctrl_if_rxmsg_deferred_task);
- }
-
-}
-
-static void
-ctrl_if_interrupt(void *ctrl_sc)
-/* (int irq, void *dev_id, struct pt_regs *regs) */
-{
-
-
- if ( RING_HAS_UNCONSUMED_RESPONSES(&ctrl_if_tx_ring) )
- taskqueue_enqueue(taskqueue_swi, &ctrl_if_tx_tasklet);
-
-
- if ( RING_HAS_UNCONSUMED_REQUESTS(&ctrl_if_rx_ring) )
- taskqueue_enqueue(taskqueue_swi, &ctrl_if_rx_tasklet);
-
-}
-
-int
-ctrl_if_send_message_noblock(
- ctrl_msg_t *msg,
- ctrl_msg_handler_t hnd,
- unsigned long id)
-{
- unsigned long flags;
- ctrl_msg_t *dmsg;
- int i;
-
- mtx_lock_irqsave(&ctrl_if_lock, flags);
-
- if ( RING_FULL(&ctrl_if_tx_ring) )
- {
- mtx_unlock_irqrestore(&ctrl_if_lock, flags);
- return EAGAIN;
- }
-
- msg->id = 0xFF;
- if ( hnd != NULL )
- {
- for ( i = 0; ctrl_if_txmsg_id_mapping[i].fn != NULL; i++ )
- continue;
- ctrl_if_txmsg_id_mapping[i].fn = hnd;
- ctrl_if_txmsg_id_mapping[i].id = id;
- msg->id = i;
- }
-
- dmsg = RING_GET_REQUEST(&ctrl_if_tx_ring,
- ctrl_if_tx_ring.req_prod_pvt);
- memcpy(dmsg, msg, sizeof(*msg));
- ctrl_if_tx_ring.req_prod_pvt++;
- RING_PUSH_REQUESTS(&ctrl_if_tx_ring);
-
- mtx_unlock_irqrestore(&ctrl_if_lock, flags);
-
- ctrl_if_notify_controller();
-
- return 0;
-}
-
-int
-ctrl_if_send_message_block(
- ctrl_msg_t *msg,
- ctrl_msg_handler_t hnd,
- unsigned long id,
- long wait_state)
-{
- int rc, sst = 0;
-
- /* Fast path. */
- if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
- goto done;
-
- for ( ; ; )
- {
-
- if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
- break;
-
- if ( sst != 0) {
- rc = EINTR;
- goto done;
- }
-
- sst = tsleep(ctrl_if_wchan, PWAIT|PCATCH, "ctlrwt", 10);
- }
- done:
-
- return rc;
-}
-
-int
-ctrl_if_enqueue_space_callback(struct task *task)
-{
-
- /* Fast path. */
- if ( !RING_FULL(&ctrl_if_tx_ring) )
- return 0;
-
- (void)taskqueue_enqueue(*taskqueue_ctrl_if_tx[(ctrl_if_idx & 1)], task);
-
- /*
- * We may race execution of the task queue, so return re-checked status. If
- * the task is not executed despite the ring being non-full then we will
- * certainly return 'not full'.
- */
- smp_mb();
- return RING_FULL(&ctrl_if_tx_ring);
-}
-
-void
-ctrl_if_send_response(ctrl_msg_t *msg)
-{
- unsigned long flags;
- ctrl_msg_t *dmsg;
-
- /*
- * NB. The response may the original request message, modified in-place.
- * In this situation we may have src==dst, so no copying is required.
- */
- mtx_lock_irqsave(&ctrl_if_lock, flags);
- dmsg = RING_GET_RESPONSE(&ctrl_if_rx_ring,
- ctrl_if_rx_ring.rsp_prod_pvt);
- if ( dmsg != msg )
- memcpy(dmsg, msg, sizeof(*msg));
-
- ctrl_if_rx_ring.rsp_prod_pvt++;
- RING_PUSH_RESPONSES(&ctrl_if_rx_ring);
-
- mtx_unlock_irqrestore(&ctrl_if_lock, flags);
-
- ctrl_if_notify_controller();
-}
-
-int
-ctrl_if_register_receiver(
- uint8_t type,
- ctrl_msg_handler_t hnd,
- unsigned int flags)
-{
- unsigned long _flags;
- int inuse;
-
- mtx_lock_irqsave(&ctrl_if_lock, _flags);
-
- inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler);
-
- if ( inuse )
- {
- printk("Receiver %p already established for control "
- "messages of type %d.\n", ctrl_if_rxmsg_handler[type], type);
- }
- else
- {
- ctrl_if_rxmsg_handler[type] = hnd;
- clear_bit(type, &ctrl_if_rxmsg_blocking_context);
- if ( flags == CALLBACK_IN_BLOCKING_CONTEXT )
- {
- set_bit(type, &ctrl_if_rxmsg_blocking_context);
- }
- }
-
- mtx_unlock_irqrestore(&ctrl_if_lock, _flags);
-
- return !inuse;
-}
-
-void
-ctrl_if_unregister_receiver(uint8_t type, ctrl_msg_handler_t hnd)
-{
- unsigned long flags;
-
- mtx_lock_irqsave(&ctrl_if_lock, flags);
-
- if ( ctrl_if_rxmsg_handler[type] != hnd )
- printk("Receiver %p is not registered for control "
- "messages of type %d.\n", hnd, type);
- else
- ctrl_if_rxmsg_handler[type] = ctrl_if_rxmsg_default_handler;
-
- mtx_unlock_irqrestore(&ctrl_if_lock, flags);
-
- /* Ensure that @hnd will not be executed after this function returns. */
- /* XXX need rx_tasklet_lock -- can cheat for now?*/
-#ifdef notyet
- tasklet_unlock_wait(&ctrl_if_rx_tasklet);
-#endif
-}
-
-void
-ctrl_if_suspend(void)
-{
- /* I'm not sure what the equivalent is - we aren't going to support
suspend
- * yet anyway
- */
-#ifdef notyet
- free_irq(ctrl_if_irq, NULL);
-#endif
- unbind_evtchn_from_irq(ctrl_if_evtchn);
-}
-
-#if 0
-/** Reset the control interface progress pointers.
- * Marks the queues empty if 'clear' non-zero.
- */
-static void
-ctrl_if_reset(int clear)
-{
- control_if_t *ctrl_if = get_ctrl_if();
-
- if (clear) {
- *ctrl_if = (control_if_t){};
- }
-
- ctrl_if_tx_resp_cons = ctrl_if->tx_resp_prod;
- ctrl_if_rx_req_cons = ctrl_if->rx_resp_prod;
-}
-
-#endif
-void
-ctrl_if_resume(void)
-{
- control_if_t *ctrl_if = get_ctrl_if();
-
- TRACE_ENTER;
- if ( xen_start_info->flags & SIF_INITDOMAIN )
- {
- /*
- * The initial domain must create its own domain-controller link.
- * The controller is probably not running at this point, but will
- * pick up its end of the event channel from
- */
- evtchn_op_t op;
- op.cmd = EVTCHNOP_bind_interdomain;
- op.u.bind_interdomain.dom1 = DOMID_SELF;
- op.u.bind_interdomain.dom2 = DOMID_SELF;
- op.u.bind_interdomain.port1 = 0;
- op.u.bind_interdomain.port2 = 0;
- if ( HYPERVISOR_event_channel_op(&op) != 0 )
- panic("event_channel_op failed\n");
- xen_start_info->domain_controller_evtchn = op.u.bind_interdomain.port1;
- initdom_ctrlif_domcontroller_port = op.u.bind_interdomain.port2;
- }
-
-
- /* Sync up with shared indexes. */
- FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring, CONTROL_RING_MEM);
- BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring, CONTROL_RING_MEM);
-
- ctrl_if_evtchn = xen_start_info->domain_controller_evtchn;
- ctrl_if_irq = bind_evtchn_to_irq(ctrl_if_evtchn);
-
- /*
- * I have not taken the time to determine what the interrupt thread
priorities
- * correspond to - this interface is used for network and disk, network
would
- * seem higher priority, hence I'm using it
- */
-
- intr_add_handler("ctrl-if", ctrl_if_irq, (driver_intr_t*)ctrl_if_interrupt,
- NULL, INTR_TYPE_NET, NULL);
- TRACE_EXIT;
- /* XXX currently assuming not MPSAFE */
-}
-
-static void
-ctrl_if_init(void *dummy __unused)
-{
- control_if_t *ctrl_if = get_ctrl_if();
-
- int i;
-
- for ( i = 0; i < 256; i++ )
- ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler;
-
- FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring, CONTROL_RING_MEM);
- BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring, CONTROL_RING_MEM);
-
- mtx_init(&ctrl_if_lock, "ctrlif", NULL, MTX_SPIN | MTX_NOWITNESS);
-
- TASK_INIT(&ctrl_if_tx_tasklet, 0, __ctrl_if_tx_tasklet, NULL);
-
- TASK_INIT(&ctrl_if_rx_tasklet, 0, __ctrl_if_rx_tasklet, NULL);
-
- TASK_INIT(&ctrl_if_rxmsg_deferred_task, 0, __ctrl_if_rxmsg_deferred_task,
NULL);
-
-
- ctrl_if_resume();
-}
-
-/*
- * !! The following are DANGEROUS FUNCTIONS !!
- * Use with care [for example, see xencons_force_flush()].
- */
-
-int
-ctrl_if_transmitter_empty(void)
-{
- return (ctrl_if_tx_ring.sring->req_prod == ctrl_if_tx_ring.rsp_cons);
-}
-
-void
-ctrl_if_discard_responses(void)
-{
- RING_DROP_PENDING_RESPONSES(&ctrl_if_tx_ring);
-}
-
-SYSINIT(ctrl_if_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, ctrl_if_init, NULL);
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/db_interface.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/db_interface.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,209 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@xxxxxxxxxx
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/db_interface.c,v 1.77 2003/11/08
03:01:26 alc Exp $");
-
-/*
- * Interface to new debugger.
- */
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/reboot.h>
-#include <sys/cons.h>
-#include <sys/pcpu.h>
-#include <sys/proc.h>
-#include <sys/smp.h>
-
-#include <machine/cpu.h>
-#ifdef SMP
-#include <machine/smptests.h> /** CPUSTOP_ON_DDBBREAK */
-#endif
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-
-#include <ddb/ddb.h>
-
-#include <machine/setjmp.h>
-#include <machine/xenfunc.h>
-
-
-static jmp_buf *db_nofault = 0;
-extern jmp_buf db_jmpbuf;
-
-extern void gdb_handle_exception(db_regs_t *, int, int);
-
-int db_active;
-db_regs_t ddb_regs;
-
-static __inline u_short
-rss(void)
-{
- u_short ss;
-#ifdef __GNUC__
- __asm __volatile("mov %%ss,%0" : "=r" (ss));
-#else
- ss = 0; /* XXXX Fix for other compilers. */
-#endif
- return ss;
-}
-
-/*
- * kdb_trap - field a TRACE or BPT trap
- */
-int
-kdb_trap(int type, int code, struct i386_saved_state *regs)
-{
- volatile int ddb_mode = !(boothowto & RB_GDB);
-
- disable_intr();
-
- if (ddb_mode) {
- /* we can't do much as a guest domain except print a
- * backtrace and die gracefuly. The reason is that we
- * can't get character input to make this work.
- */
- db_active = 1;
- db_print_backtrace();
- db_printf("************ Domain shutting down ************\n");
- HYPERVISOR_shutdown();
- } else {
- Debugger("kdb_trap");
- }
- return (1);
-}
-
-/*
- * Read bytes from kernel address space for debugger.
- */
-void
-db_read_bytes(vm_offset_t addr, size_t size, char *data)
-{
- char *src;
-
- db_nofault = &db_jmpbuf;
-
- src = (char *)addr;
- while (size-- > 0)
- *data++ = *src++;
-
- db_nofault = 0;
-}
-
-/*
- * Write bytes to kernel address space for debugger.
- */
-void
-db_write_bytes(vm_offset_t addr, size_t size, char *data)
-{
- char *dst;
-
- pt_entry_t *ptep0 = NULL;
- pt_entry_t oldmap0 = 0;
- vm_offset_t addr1;
- pt_entry_t *ptep1 = NULL;
- pt_entry_t oldmap1 = 0;
-
- db_nofault = &db_jmpbuf;
-
- if (addr > trunc_page((vm_offset_t)btext) - size &&
- addr < round_page((vm_offset_t)etext)) {
-
- ptep0 = pmap_pte(kernel_pmap, addr);
- oldmap0 = *ptep0;
- *ptep0 |= PG_RW;
-
- /* Map another page if the data crosses a page boundary. */
- if ((*ptep0 & PG_PS) == 0) {
- addr1 = trunc_page(addr + size - 1);
- if (trunc_page(addr) != addr1) {
- ptep1 = pmap_pte(kernel_pmap, addr1);
- oldmap1 = *ptep1;
- *ptep1 |= PG_RW;
- }
- } else {
- addr1 = trunc_4mpage(addr + size - 1);
- if (trunc_4mpage(addr) != addr1) {
- ptep1 = pmap_pte(kernel_pmap, addr1);
- oldmap1 = *ptep1;
- *ptep1 |= PG_RW;
- }
- }
-
- invltlb();
- }
-
- dst = (char *)addr;
-
- while (size-- > 0)
- *dst++ = *data++;
-
- db_nofault = 0;
-
- if (ptep0) {
- *ptep0 = oldmap0;
-
- if (ptep1)
- *ptep1 = oldmap1;
-
- invltlb();
- }
-}
-
-/*
- * XXX
- * Move this to machdep.c and allow it to be called if any debugger is
- * installed.
- */
-void
-Debugger(const char *msg)
-{
- static volatile u_int in_Debugger;
-
- /*
- * XXX
- * Do nothing if the console is in graphics mode. This is
- * OK if the call is for the debugger hotkey but not if the call
- * is a weak form of panicing.
- */
- if (cons_unavail && !(boothowto & RB_GDB))
- return;
-
- if (atomic_cmpset_acq_int(&in_Debugger, 0, 1)) {
- db_printf("Debugger(\"%s\")\n", msg);
- breakpoint();
- atomic_store_rel_int(&in_Debugger, 0);
- }
-}
-
-void
-db_show_mdpcpu(struct pcpu *pc)
-{
-
- db_printf("APIC ID = %d\n", pc->pc_apic_id);
- db_printf("currentldt = 0x%x\n", pc->pc_currentldt);
-}
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,666 +0,0 @@
-/******************************************************************************
- * evtchn.c
- *
- * Communication via Xen event channels.
- *
- * Copyright (c) 2002-2004, K A Fraser
- */
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-
-#include <machine/cpufunc.h>
-#include <machine/intr_machdep.h>
-#include <machine/xen-os.h>
-#include <machine/xen_intr.h>
-#include <machine/synch_bitops.h>
-#include <machine/evtchn.h>
-#include <machine/hypervisor.h>
-#include <machine/hypervisor-ifs.h>
-
-
-static struct mtx irq_mapping_update_lock;
-
-#define TODO printf("%s: not implemented!\n", __func__)
-
-/* IRQ <-> event-channel mappings. */
-static int evtchn_to_irq[NR_EVENT_CHANNELS];
-static int irq_to_evtchn[NR_IRQS];
-
-static int virq_to_irq[MAX_VIRT_CPUS][NR_VIRQS];
-static int ipi_to_evtchn[MAX_VIRT_CPUS][NR_VIRQS];
-
-
-/* Reference counts for bindings to IRQs. */
-static int irq_bindcount[NR_IRQS];
-
-#define VALID_EVTCHN(_chn) ((_chn) != -1)
-
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-void force_evtchn_callback(void)
-{
- (void)HYPERVISOR_xen_version(0);
-}
-
-void
-evtchn_do_upcall(struct intrframe *frame)
-{
- unsigned long l1, l2;
- unsigned int l1i, l2i, port;
- int irq, owned;
- unsigned long flags;
- shared_info_t *s = HYPERVISOR_shared_info;
- vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()];
-
- local_irq_save(flags);
-
- while ( s->vcpu_data[0].evtchn_upcall_pending )
- {
- s->vcpu_data[0].evtchn_upcall_pending = 0;
- /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
- l1 = xen_xchg(&vcpu_info->evtchn_pending_sel, 0);
- while ( (l1i = ffs(l1)) != 0 )
- {
- l1i--;
- l1 &= ~(1 << l1i);
-
- l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i];
- while ( (l2i = ffs(l2)) != 0 )
- {
- l2i--;
- l2 &= ~(1 << l2i);
-
- port = (l1i << 5) + l2i;
- irq = evtchn_to_irq[port];
-#ifdef SMP
- if (irq == PCPU_GET(cpuast))
- continue;
-#endif
- if ( (owned = mtx_owned(&sched_lock)) != 0 )
- mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
- if ( irq != -1 ) {
- struct intsrc *isrc = intr_lookup_source(irq);
- intr_execute_handlers(isrc, frame);
- } else {
- evtchn_device_upcall(port);
- }
- if ( owned )
- mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
- }
- }
- }
-
- local_irq_restore(flags);
-
-}
-
-
-static int
-find_unbound_irq(void)
-{
- int irq;
-
- for ( irq = 0; irq < NR_IRQS; irq++ )
- if ( irq_bindcount[irq] == 0 )
- break;
-
- if ( irq == NR_IRQS )
- panic("No available IRQ to bind to: increase NR_IRQS!\n");
-
- return irq;
-}
-
-int
-bind_virq_to_irq(int virq)
-{
- evtchn_op_t op;
- int evtchn, irq;
-
- mtx_lock(&irq_mapping_update_lock);
-
- if ( (irq = PCPU_GET(virq_to_irq)[virq]) == -1 )
- {
- op.cmd = EVTCHNOP_bind_virq;
- op.u.bind_virq.virq = virq;
- if ( HYPERVISOR_event_channel_op(&op) != 0 )
- panic("Failed to bind virtual IRQ %d\n", virq);
- evtchn = op.u.bind_virq.port;
-
- irq = find_unbound_irq();
- evtchn_to_irq[evtchn] = irq;
- irq_to_evtchn[irq] = evtchn;
-
- PCPU_GET(virq_to_irq)[virq] = irq;
- }
-
- irq_bindcount[irq]++;
-
- mtx_unlock(&irq_mapping_update_lock);
-
- return irq;
-}
-
-void
-unbind_virq_from_irq(int virq)
-{
- evtchn_op_t op;
- int irq = PCPU_GET(virq_to_irq)[virq];
- int evtchn = irq_to_evtchn[irq];
-
- mtx_lock(&irq_mapping_update_lock);
-
- if ( --irq_bindcount[irq] == 0 )
- {
- op.cmd = EVTCHNOP_close;
- op.u.close.dom = DOMID_SELF;
- op.u.close.port = evtchn;
- if ( HYPERVISOR_event_channel_op(&op) != 0 )
- panic("Failed to unbind virtual IRQ %d\n", virq);
-
- evtchn_to_irq[evtchn] = -1;
- irq_to_evtchn[irq] = -1;
- PCPU_GET(virq_to_irq)[virq] = -1;
- }
-
- mtx_unlock(&irq_mapping_update_lock);
-}
-
-
-int
-bind_ipi_on_cpu_to_irq(int cpu, int ipi)
-{
- evtchn_op_t op;
- int evtchn, irq;
-
- mtx_lock(&irq_mapping_update_lock);
-
- if ( (evtchn = PCPU_GET(ipi_to_evtchn)[ipi]) == 0 )
- {
- op.cmd = EVTCHNOP_bind_ipi;
- op.u.bind_ipi.ipi_edom = cpu;
- if ( HYPERVISOR_event_channel_op(&op) != 0 )
- panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, cpu);
- evtchn = op.u.bind_ipi.port;
-
- irq = find_unbound_irq();
- evtchn_to_irq[evtchn] = irq;
- irq_to_evtchn[irq] = evtchn;
-
- PCPU_GET(ipi_to_evtchn)[ipi] = evtchn;
- } else
- irq = evtchn_to_irq[evtchn];
-
- irq_bindcount[irq]++;
-
- mtx_unlock(&irq_mapping_update_lock);
-
- return irq;
-}
-
-void
-unbind_ipi_on_cpu_from_irq(int cpu, int ipi)
-{
- evtchn_op_t op;
- int evtchn = PCPU_GET(ipi_to_evtchn)[ipi];
- int irq = irq_to_evtchn[evtchn];
-
- mtx_lock(&irq_mapping_update_lock);
-
- if ( --irq_bindcount[irq] == 0 )
- {
- op.cmd = EVTCHNOP_close;
- op.u.close.dom = DOMID_SELF;
- op.u.close.port = evtchn;
- if ( HYPERVISOR_event_channel_op(&op) != 0 )
- panic("Failed to unbind virtual IPI %d on cpu %d\n", ipi, cpu);
-
- evtchn_to_irq[evtchn] = -1;
- irq_to_evtchn[irq] = -1;
- PCPU_GET(ipi_to_evtchn)[ipi] = 0;
- }
-
- mtx_unlock(&irq_mapping_update_lock);
-}
-
-int
-bind_evtchn_to_irq(int evtchn)
-{
- int irq;
-
- mtx_lock(&irq_mapping_update_lock);
-
- if ( (irq = evtchn_to_irq[evtchn]) == -1 )
- {
- irq = find_unbound_irq();
- evtchn_to_irq[evtchn] = irq;
- irq_to_evtchn[irq] = evtchn;
- }
-
- irq_bindcount[irq]++;
-
- mtx_unlock(&irq_mapping_update_lock);
-
- return irq;
-}
-
-void
-unbind_evtchn_from_irq(int evtchn)
-{
- int irq = evtchn_to_irq[evtchn];
-
- mtx_lock(&irq_mapping_update_lock);
-
- if ( --irq_bindcount[irq] == 0 )
- {
- evtchn_to_irq[evtchn] = -1;
- irq_to_evtchn[irq] = -1;
- }
-
- mtx_unlock(&irq_mapping_update_lock);
-}
-
-
-/*
- * Interface to generic handling in intr_machdep.c
- */
-
-
-/*------------ interrupt handling --------------------------------------*/
-#define TODO printf("%s: not implemented!\n", __func__)
-
- struct mtx xenpic_lock;
-
-struct xenpic_intsrc {
- struct intsrc xp_intsrc;
- uint8_t xp_vector;
- boolean_t xp_masked;
-};
-
-struct xenpic {
- struct pic xp_pic; /* this MUST be first */
- uint16_t xp_numintr;
- struct xenpic_intsrc xp_pins[0];
-};
-
-static void xenpic_enable_dynirq_source(struct intsrc *isrc);
-static void xenpic_disable_dynirq_source(struct intsrc *isrc, int);
-static void xenpic_eoi_source(struct intsrc *isrc);
-static void xenpic_enable_dynirq_intr(struct intsrc *isrc);
-static int xenpic_vector(struct intsrc *isrc);
-static int xenpic_source_pending(struct intsrc *isrc);
-static void xenpic_suspend(struct intsrc *isrc);
-static void xenpic_resume(struct intsrc *isrc);
-
-
-struct pic xenpic_template = {
- xenpic_enable_dynirq_source,
- xenpic_disable_dynirq_source,
- xenpic_eoi_source,
- xenpic_enable_dynirq_intr,
- xenpic_vector,
- xenpic_source_pending,
- xenpic_suspend,
- xenpic_resume
-};
-
-
-void
-xenpic_enable_dynirq_source(struct intsrc *isrc)
-{
- unsigned int irq;
- struct xenpic_intsrc *xp;
-
- xp = (struct xenpic_intsrc *)isrc;
-
- if (xp->xp_masked) {
- irq = xenpic_vector(isrc);
- unmask_evtchn(irq_to_evtchn[irq]);
- xp->xp_masked = FALSE;
- }
-}
-
-static void
-xenpic_disable_dynirq_source(struct intsrc *isrc, int foo)
-{
- unsigned int irq;
- struct xenpic_intsrc *xp;
-
- xp = (struct xenpic_intsrc *)isrc;
-
- if (!xp->xp_masked) {
- irq = xenpic_vector(isrc);
- mask_evtchn(irq_to_evtchn[irq]);
- xp->xp_masked = TRUE;
- }
-
-}
-
-static void
-xenpic_enable_dynirq_intr(struct intsrc *isrc)
-{
- unsigned int irq;
-
- irq = xenpic_vector(isrc);
- unmask_evtchn(irq_to_evtchn[irq]);
-}
-
-static void
-xenpic_eoi_source(struct intsrc *isrc)
-{
- unsigned int irq = xenpic_vector(isrc);
- clear_evtchn(irq_to_evtchn[irq]);
-}
-
-static int
-xenpic_vector(struct intsrc *isrc)
-{
- struct xenpic_intsrc *pin = (struct xenpic_intsrc *)isrc;
- return (pin->xp_vector);
-}
-
-static int
-xenpic_source_pending(struct intsrc *isrc)
-{
- TODO;
- return 0;
-}
-
-static void
-xenpic_suspend(struct intsrc *isrc)
-{
- TODO;
-}
-
-static void
-xenpic_resume(struct intsrc *isrc)
-{
- TODO;
-}
-
-#ifdef CONFIG_PHYSDEV
-/* required for support of physical devices */
-static inline void
-pirq_unmask_notify(int pirq)
-{
- physdev_op_t op;
- if ( unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0])) )
- {
- op.cmd = PHYSDEVOP_IRQ_UNMASK_NOTIFY;
- (void)HYPERVISOR_physdev_op(&op);
- }
-}
-
-static inline void
-pirq_query_unmask(int pirq)
-{
- physdev_op_t op;
- op.cmd = PHYSDEVOP_IRQ_STATUS_QUERY;
- op.u.irq_status_query.irq = pirq;
- (void)HYPERVISOR_physdev_op(&op);
- clear_bit(pirq, &pirq_needs_unmask_notify[0]);
- if ( op.u.irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY )
- set_bit(pirq, &pirq_needs_unmask_notify[0]);
-}
-
-/*
- * On startup, if there is no action associated with the IRQ then we are
- * probing. In this case we should not share with others as it will confuse us.
- */
-#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL)
-
-static unsigned int startup_pirq(unsigned int irq)
-{
- evtchn_op_t op;
- int evtchn;
-
- op.cmd = EVTCHNOP_bind_pirq;
- op.u.bind_pirq.pirq = irq;
- /* NB. We are happy to share unless we are probing. */
- op.u.bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
- if ( HYPERVISOR_event_channel_op(&op) != 0 )
- {
- if ( !probing_irq(irq) ) /* Some failures are expected when probing. */
- printk(KERN_INFO "Failed to obtain physical IRQ %d\n", irq);
- return 0;
- }
- evtchn = op.u.bind_pirq.port;
-
- pirq_query_unmask(irq_to_pirq(irq));
-
- evtchn_to_irq[evtchn] = irq;
- irq_to_evtchn[irq] = evtchn;
-
- unmask_evtchn(evtchn);
- pirq_unmask_notify(irq_to_pirq(irq));
-
- return 0;
-}
-
-static void shutdown_pirq(unsigned int irq)
-{
- evtchn_op_t op;
- int evtchn = irq_to_evtchn[irq];
-
- if ( !VALID_EVTCHN(evtchn) )
- return;
-
- mask_evtchn(evtchn);
-
- op.cmd = EVTCHNOP_close;
- op.u.close.dom = DOMID_SELF;
- op.u.close.port = evtchn;
- if ( HYPERVISOR_event_channel_op(&op) != 0 )
- panic("Failed to unbind physical IRQ %d\n", irq);
-
- evtchn_to_irq[evtchn] = -1;
- irq_to_evtchn[irq] = -1;
-}
-
-static void enable_pirq(unsigned int irq)
-{
- int evtchn = irq_to_evtchn[irq];
- if ( !VALID_EVTCHN(evtchn) )
- return;
- unmask_evtchn(evtchn);
- pirq_unmask_notify(irq_to_pirq(irq));
-}
-
-static void disable_pirq(unsigned int irq)
-{
- int evtchn = irq_to_evtchn[irq];
- if ( !VALID_EVTCHN(evtchn) )
- return;
- mask_evtchn(evtchn);
-}
-
-static void ack_pirq(unsigned int irq)
-{
- int evtchn = irq_to_evtchn[irq];
- if ( !VALID_EVTCHN(evtchn) )
- return;
- mask_evtchn(evtchn);
- clear_evtchn(evtchn);
-}
-
-static void end_pirq(unsigned int irq)
-{
- int evtchn = irq_to_evtchn[irq];
- if ( !VALID_EVTCHN(evtchn) )
- return;
- if ( !(irq_desc[irq].status & IRQ_DISABLED) )
- {
- unmask_evtchn(evtchn);
- pirq_unmask_notify(irq_to_pirq(irq));
- }
-}
-
-static struct hw_interrupt_type pirq_type = {
- "Phys-irq",
- startup_pirq,
- shutdown_pirq,
- enable_pirq,
- disable_pirq,
- ack_pirq,
- end_pirq,
- NULL
-};
-#endif
-
-#if 0
-static void
-misdirect_interrupt(void *sc)
-{
-}
-#endif
-void irq_suspend(void)
-{
- int virq, irq, evtchn;
-
- /* Unbind VIRQs from event channels. */
- for ( virq = 0; virq < NR_VIRQS; virq++ )
- {
- if ( (irq = PCPU_GET(virq_to_irq)[virq]) == -1 )
- continue;
- evtchn = irq_to_evtchn[irq];
-
- /* Mark the event channel as unused in our table. */
- evtchn_to_irq[evtchn] = -1;
- irq_to_evtchn[irq] = -1;
- }
-
- /*
- * We should now be unbound from all event channels. Stale bindings to
- * PIRQs and/or inter-domain event channels will cause us to barf here.
- */
- for ( evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++ )
- if ( evtchn_to_irq[evtchn] != -1 )
- panic("Suspend attempted while bound to evtchn %d.\n", evtchn);
-}
-
-
-void irq_resume(void)
-{
- evtchn_op_t op;
- int virq, irq, evtchn;
-
- for ( evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++ )
- mask_evtchn(evtchn); /* New event-channel space is not 'live' yet. */
-
- for ( virq = 0; virq < NR_VIRQS; virq++ )
- {
- if ( (irq = PCPU_GET(virq_to_irq)[virq]) == -1 )
- continue;
-
- /* Get a new binding from Xen. */
- op.cmd = EVTCHNOP_bind_virq;
- op.u.bind_virq.virq = virq;
- if ( HYPERVISOR_event_channel_op(&op) != 0 )
- panic("Failed to bind virtual IRQ %d\n", virq);
- evtchn = op.u.bind_virq.port;
-
- /* Record the new mapping. */
- evtchn_to_irq[evtchn] = irq;
- irq_to_evtchn[irq] = evtchn;
-
- /* Ready for use. */
- unmask_evtchn(evtchn);
- }
-}
-
-void
-ap_evtchn_init(int cpu)
-{
- int i;
-
- /* XXX -- expedience hack */
- PCPU_SET(virq_to_irq, (int *)&virq_to_irq[cpu]);
- PCPU_SET(ipi_to_evtchn, (int *)&ipi_to_evtchn[cpu]);
-
- /* No VIRQ -> IRQ mappings. */
- for ( i = 0; i < NR_VIRQS; i++ )
- PCPU_GET(virq_to_irq)[i] = -1;
-}
-
-
-static void
-evtchn_init(void *dummy __unused)
-{
- int i;
- struct xenpic *xp;
- struct xenpic_intsrc *pin;
-
-
- /* XXX -- expedience hack */
- PCPU_SET(virq_to_irq, (int *)&virq_to_irq[0]);
- PCPU_SET(ipi_to_evtchn, (int *)&ipi_to_evtchn[0]);
-
- /* No VIRQ -> IRQ mappings. */
- for ( i = 0; i < NR_VIRQS; i++ )
- PCPU_GET(virq_to_irq)[i] = -1;
-
- /* No event-channel -> IRQ mappings. */
- for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
- {
- evtchn_to_irq[i] = -1;
- mask_evtchn(i); /* No event channels are 'live' right now. */
- }
-
- /* No IRQ -> event-channel mappings. */
- for ( i = 0; i < NR_IRQS; i++ )
- irq_to_evtchn[i] = -1;
-
- xp = malloc(sizeof(struct xenpic) + NR_DYNIRQS*sizeof(struct
xenpic_intsrc), M_DEVBUF, M_WAITOK);
- xp->xp_pic = xenpic_template;
- xp->xp_numintr = NR_DYNIRQS;
- bzero(xp->xp_pins, sizeof(struct xenpic_intsrc) * NR_DYNIRQS);
-
- for ( i = 0, pin = xp->xp_pins; i < NR_DYNIRQS; i++, pin++ )
- {
- /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
- irq_bindcount[dynirq_to_irq(i)] = 0;
-
- pin->xp_intsrc.is_pic = (struct pic *)xp;
- pin->xp_vector = i;
- intr_register_source(&pin->xp_intsrc);
- }
- /* We don't currently have any support for physical devices in XenoFreeBSD
- * so leaving this out for the moment for the sake of expediency.
- */
-#ifdef notyet
- for ( i = 0; i < NR_PIRQS; i++ )
- {
- /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
- irq_bindcount[pirq_to_irq(i)] = 1;
-
- irq_desc[pirq_to_irq(i)].status = IRQ_DISABLED;
- irq_desc[pirq_to_irq(i)].action = 0;
- irq_desc[pirq_to_irq(i)].depth = 1;
- irq_desc[pirq_to_irq(i)].handler = &pirq_type;
- }
-
-#endif
-#if 0
- (void) intr_add_handler("xb_mis", bind_virq_to_irq(VIRQ_MISDIRECT),
- (driver_intr_t *)misdirect_interrupt,
- NULL, INTR_TYPE_MISC, NULL);
-
-#endif
-}
-
-SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL);
- /*
- * xenpic_lock: in order to allow an interrupt to occur in a critical
- * section, to set pcpu->ipending (etc...) properly, we
- * must be able to get the icu lock, so it can't be
- * under witness.
- */
-
-MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp",
MTX_DEF|MTX_NOWITNESS);
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,438 +0,0 @@
-/*-
- * Copyright (c) 1989, 1990 William F. Jolitz.
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/i386/i386/exception.s,v 1.106 2003/11/03 22:08:52 jhb Exp
$
- */
-
-#include "opt_npx.h"
-
-#include <machine/asmacros.h>
-#include <machine/psl.h>
-#include <machine/trap.h>
-
-#include "assym.s"
-
-#define SEL_RPL_MASK 0x0002
-/* Offsets into shared_info_t. */
-#define evtchn_upcall_pending /* 0 */
-#define evtchn_upcall_mask 1
-#define XEN_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
-#define XEN_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
-#define XEN_TEST_PENDING(reg) testb $0x1,evtchn_upcall_pending(reg)
-
-
-#define POPA \
- popl %edi; \
- popl %esi; \
- popl %ebp; \
- popl %ebx; \
- popl %ebx; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
- .text
-
-/*****************************************************************************/
-/* Trap handling */
-/*****************************************************************************/
-/*
- * Trap and fault vector routines.
- *
- * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
- * the stack that mostly looks like an interrupt, but does not disable
- * interrupts. A few of the traps we are use are interrupt gates,
- * SDT_SYS386IGT, which are nearly the same thing except interrupts are
- * disabled on entry.
- *
- * The cpu will push a certain amount of state onto the kernel stack for
- * the current process. The amount of state depends on the type of trap
- * and whether the trap crossed rings or not. See i386/include/frame.h.
- * At the very least the current EFLAGS (status register, which includes
- * the interrupt disable state prior to the trap), the code segment register,
- * and the return instruction pointer are pushed by the cpu. The cpu
- * will also push an 'error' code for certain traps. We push a dummy
- * error code for those traps where the cpu doesn't in order to maintain
- * a consistent frame. We also push a contrived 'trap number'.
- *
- * The cpu does not push the general registers, we must do that, and we
- * must restore them prior to calling 'iret'. The cpu adjusts the %cs and
- * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
- * must load them with appropriate values for supervisor mode operation.
- */
-
-MCOUNT_LABEL(user)
-MCOUNT_LABEL(btrap)
-
-IDTVEC(div)
- pushl $0; TRAP(T_DIVIDE)
-IDTVEC(dbg)
- pushl $0; TRAP(T_TRCTRAP)
-IDTVEC(nmi)
- pushl $0; TRAP(T_NMI)
-IDTVEC(bpt)
- pushl $0; TRAP(T_BPTFLT)
-IDTVEC(ofl)
- pushl $0; TRAP(T_OFLOW)
-IDTVEC(bnd)
- pushl $0; TRAP(T_BOUND)
-IDTVEC(ill)
- pushl $0; TRAP(T_PRIVINFLT)
-IDTVEC(dna)
- pushl $0; TRAP(T_DNA)
-IDTVEC(fpusegm)
- pushl $0; TRAP(T_FPOPFLT)
-IDTVEC(tss)
- TRAP(T_TSSFLT)
-IDTVEC(missing)
- TRAP(T_SEGNPFLT)
-IDTVEC(stk)
- TRAP(T_STKFLT)
-IDTVEC(prot)
- TRAP(T_PROTFLT)
-IDTVEC(page)
- pushl %eax
- movl 4(%esp),%eax
- movl %eax,-44(%esp) # move cr2 after trap frame
- popl %eax
- addl $4,%esp
- TRAP(T_PAGEFLT)
-IDTVEC(mchk)
- pushl $0; TRAP(T_MCHK)
-IDTVEC(rsvd)
- pushl $0; TRAP(T_RESERVED)
-IDTVEC(fpu)
- pushl $0; TRAP(T_ARITHTRAP)
-IDTVEC(align)
- TRAP(T_ALIGNFLT)
-
-IDTVEC(xmm)
- pushl $0; TRAP(T_XMMFLT)
-
-IDTVEC(hypervisor_callback)
- pushl %eax; TRAP(T_HYPCALLBACK)
-
-hypervisor_callback_pending:
- movl $T_HYPCALLBACK,TF_TRAPNO(%esp)
- movl $T_HYPCALLBACK,TF_ERR(%esp)
- jmp 11f
-
- /*
- * alltraps entry point. Interrupts are enabled if this was a trap
- * gate (TGT), else disabled if this was an interrupt gate (IGT).
- * Note that int0x80_syscall is a trap gate. Only page faults
- * use an interrupt gate.
- */
-
- SUPERALIGN_TEXT
- .globl alltraps
- .type alltraps,@function
-alltraps:
- cld
- pushal
- pushl %ds
- pushl %es
- pushl %fs
-alltraps_with_regs_pushed:
- movl $KDSEL,%eax
- movl %eax,%ds
- movl %eax,%es
- movl $KPSEL,%eax
- movl %eax,%fs
- FAKE_MCOUNT(TF_EIP(%esp))
-save_cr2:
- movl TF_TRAPNO(%esp),%eax
- cmpl $T_PAGEFLT,%eax
- jne calltrap
- movl -4(%esp),%eax
- movl %eax,PCPU(CR2)
-calltrap:
- movl TF_EIP(%esp),%eax
- cmpl $scrit,%eax
- jb 11f
- cmpl $ecrit,%eax
- jb critical_region_fixup
-11: call trap
-
- /*
- * Return via doreti to handle ASTs.
- */
- MEXITCOUNT
- jmp doreti
-
-/*
- * SYSCALL CALL GATE (old entry point for a.out binaries)
- *
- * The intersegment call has been set up to specify one dummy parameter.
- *
- * This leaves a place to put eflags so that the call frame can be
- * converted to a trap frame. Note that the eflags is (semi-)bogusly
- * pushed into (what will be) tf_err and then copied later into the
- * final spot. It has to be done this way because esp can't be just
- * temporarily altered for the pushfl - an interrupt might come in
- * and clobber the saved cs/eip.
- */
- SUPERALIGN_TEXT
-IDTVEC(lcall_syscall)
- pushfl /* save eflags */
- popl 8(%esp) /* shuffle into tf_eflags */
- pushl $7 /* sizeof "lcall 7,0" */
- subl $4,%esp /* skip over tf_trapno */
- pushal
- pushl %ds
- pushl %es
- pushl %fs
- movl $KDSEL,%eax /* switch to kernel segments */
- movl %eax,%ds
- movl %eax,%es
- movl $KPSEL,%eax
- movl %eax,%fs
- FAKE_MCOUNT(TF_EIP(%esp))
- call syscall
- MEXITCOUNT
- jmp doreti
-
-/*
- * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
- *
- * Even though the name says 'int0x80', this is actually a TGT (trap gate)
- * rather then an IGT (interrupt gate). Thus interrupts are enabled on
- * entry just as they are for a normal syscall.
- */
- SUPERALIGN_TEXT
-IDTVEC(int0x80_syscall)
- pushl $2 /* sizeof "int 0x80" */
- pushl $0xBEEF
- pushal
- pushl %ds
- pushl %es
- pushl %fs
- movl $KDSEL,%eax /* switch to kernel segments */
- movl %eax,%ds
- movl %eax,%es
- movl $KPSEL,%eax
- movl %eax,%fs
- FAKE_MCOUNT(TF_EIP(%esp))
- call syscall
- MEXITCOUNT
- jmp doreti
-
-ENTRY(fork_trampoline)
- pushl %esp /* trapframe pointer */
- pushl %ebx /* arg1 */
- pushl %esi /* function */
- call fork_exit
- addl $12,%esp
- /* cut from syscall */
-
- /*
- * Return via doreti to handle ASTs.
- */
- MEXITCOUNT
- jmp doreti
-
-
-/*
-# A note on the "critical region" in our callback handler.
-# We want to avoid stacking callback handlers due to events occurring
-# during handling of the last event. To do this, we keep events disabled
-# until weve done all processing. HOWEVER, we must enable events before
-# popping the stack frame (cant be done atomically) and so it would still
-# be possible to get enough handler activations to overflow the stack.
-# Although unlikely, bugs of that kind are hard to track down, so wed
-# like to avoid the possibility.
-# So, on entry to the handler we detect whether we interrupted an
-# existing activation in its critical region -- if so, we pop the current
-# activation and restart the handler using the previous one.
-*/
-
-
-/*
- * void doreti(struct trapframe)
- *
- * Handle return from interrupts, traps and syscalls.
- */
- .text
- SUPERALIGN_TEXT
- .globl doreti
- .type doreti,@function
-doreti:
- FAKE_MCOUNT(bintr) /* init "from" bintr -> doreti */
-doreti_next:
- testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
- jz doreti_exit /* #can't handle ASTs now if not */
-
-doreti_ast:
- /*
- * Check for ASTs atomically with returning. Disabling CPU
- * interrupts provides sufficient locking even in the SMP case,
- * since we will be informed of any new ASTs by an IPI.
- */
-
- movl HYPERVISOR_shared_info,%esi
- XEN_BLOCK_EVENTS(%esi)
- movl PCPU(CURTHREAD),%eax
- testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
- je doreti_exit
- XEN_UNBLOCK_EVENTS(%esi)
- pushl %esp /* pass a pointer to the trapframe */
- call ast
- add $4,%esp
- jmp doreti_ast
-
-doreti_exit:
- /*
- * doreti_exit: pop registers, iret.
- *
- * The segment register pop is a special case, since it may
- * fault if (for example) a sigreturn specifies bad segment
- * registers. The fault is handled in trap.c.
- */
-
- movl HYPERVISOR_shared_info,%esi
- XEN_UNBLOCK_EVENTS(%esi) # reenable event callbacks (sti)
-
- .globl scrit
-scrit:
- XEN_TEST_PENDING(%esi)
- jnz hypervisor_callback_pending /* More to go */
- MEXITCOUNT
-
- .globl doreti_popl_fs
-doreti_popl_fs:
- popl %fs
- .globl doreti_popl_es
-doreti_popl_es:
- popl %es
- .globl doreti_popl_ds
-doreti_popl_ds:
- popl %ds
- POPA
- addl $8,%esp
- .globl doreti_iret
-doreti_iret:
- iret
- .globl ecrit
-ecrit:
-
- /*
- * doreti_iret_fault and friends. Alternative return code for
- * the case where we get a fault in the doreti_exit code
- * above. trap() (i386/i386/trap.c) catches this specific
- * case, sends the process a signal and continues in the
- * corresponding place in the code below.
- */
- ALIGN_TEXT
- .globl doreti_iret_fault
-doreti_iret_fault:
- subl $8,%esp
- pushal
- pushl %ds
- .globl doreti_popl_ds_fault
-doreti_popl_ds_fault:
- pushl %es
- .globl doreti_popl_es_fault
-doreti_popl_es_fault:
- pushl %fs
- .globl doreti_popl_fs_fault
-doreti_popl_fs_fault:
- movl $0,TF_ERR(%esp) /* XXX should be the error code */
- movl $T_PROTFLT,TF_TRAPNO(%esp)
- jmp alltraps_with_regs_pushed
-
-
-
-
-/*
-# [How we do the fixup]. We want to merge the current stack frame with the
-# just-interrupted frame. How we do this depends on where in the critical
-# region the interrupted handler was executing, and so how many saved
-# registers are in each frame. We do this quickly using the lookup table
-# 'critical_fixup_table'. For each byte offset in the critical region, it
-# provides the number of bytes which have already been popped from the
-# interrupted stack frame.
-*/
-
-.globl critical_region_fixup
-critical_region_fixup:
- addl $critical_fixup_table-scrit,%eax
- movzbl (%eax),%eax # %eax contains num bytes popped
- movl %esp,%esi
- add %eax,%esi # %esi points at end of src region
- movl %esp,%edi
- add $0x40,%edi # %edi points at end of dst region
- movl %eax,%ecx
- shr $2,%ecx # convert bytes to words
- je 16f # skip loop if nothing to copy
-15: subl $4,%esi # pre-decrementing copy loop
- subl $4,%edi
- movl (%esi),%eax
- movl %eax,(%edi)
- loop 15b
-16: movl %edi,%esp # final %edi is top of merged stack
- jmp hypervisor_callback_pending
-
-
-critical_fixup_table:
-.byte 0x0,0x0,0x0 #testb $0x1,(%esi)
-.byte 0x0,0x0,0x0,0x0,0x0,0x0 #jne ea
-.byte 0x0,0x0 #pop %fs
-.byte 0x04 #pop %es
-.byte 0x08 #pop %ds
-.byte 0x0c #pop %edi
-.byte 0x10 #pop %esi
-.byte 0x14 #pop %ebp
-.byte 0x18 #pop %ebx
-.byte 0x1c #pop %ebx
-.byte 0x20 #pop %edx
-.byte 0x24 #pop %ecx
-.byte 0x28 #pop %eax
-.byte 0x2c,0x2c,0x2c #add $0x8,%esp
-.byte 0x34 #iret
-
-
-/* # Hypervisor uses this for application faults while it executes.*/
-ENTRY(failsafe_callback)
- pushal
- call xen_failsafe_handler
-/*# call install_safe_pf_handler */
- movl 28(%esp),%ebx
-1: movl %ebx,%ds
- movl 32(%esp),%ebx
-2: movl %ebx,%es
- movl 36(%esp),%ebx
-3: movl %ebx,%fs
- movl 40(%esp),%ebx
-4: movl %ebx,%gs
-/*# call install_normal_pf_handler */
- popal
- addl $12,%esp
- iret
-
-
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,233 +0,0 @@
-/*-
- * Copyright (c) 1982, 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/genassym.c,v 1.146 2003/11/12 18:14:34
jhb Exp $");
-
-#include "opt_apic.h"
-#include "opt_compat.h"
-#include "opt_kstack_pages.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/assym.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/proc.h>
-#include <sys/errno.h>
-#include <sys/mount.h>
-#include <sys/mutex.h>
-#include <sys/socket.h>
-#include <sys/resourcevar.h>
-#include <sys/ucontext.h>
-#include <sys/user.h>
-#include <machine/bootinfo.h>
-#include <machine/tss.h>
-#include <sys/vmmeter.h>
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#include <vm/vm_map.h>
-#include <sys/user.h>
-#include <sys/proc.h>
-#include <net/if.h>
-#include <netinet/in.h>
-#include <nfs/nfsproto.h>
-#include <nfs/rpcv2.h>
-#include <nfsclient/nfs.h>
-#include <nfsclient/nfsdiskless.h>
-#ifdef DEV_APIC
-#include <machine/apicreg.h>
-#endif
-#include <machine/cpu.h>
-#include <machine/sigframe.h>
-#include <machine/proc.h>
-
-ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
-ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
-ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
-ASSYM(P_SFLAG, offsetof(struct proc, p_sflag));
-ASSYM(P_UAREA, offsetof(struct proc, p_uarea));
-
-ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
-ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
-ASSYM(TD_PROC, offsetof(struct thread, td_proc));
-ASSYM(TD_MD, offsetof(struct thread, td_md));
-
-ASSYM(P_MD, offsetof(struct proc, p_md));
-ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
-
-ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
-ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
-
-ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap));
-ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall));
-ASSYM(V_INTR, offsetof(struct vmmeter, v_intr));
-/* ASSYM(UPAGES, UPAGES);*/
-ASSYM(UAREA_PAGES, UAREA_PAGES);
-ASSYM(KSTACK_PAGES, KSTACK_PAGES);
-ASSYM(PAGE_SIZE, PAGE_SIZE);
-ASSYM(NPTEPG, NPTEPG);
-ASSYM(NPDEPG, NPDEPG);
-ASSYM(NPDEPTD, NPDEPTD);
-ASSYM(NPGPTD, NPGPTD);
-ASSYM(PDESIZE, sizeof(pd_entry_t));
-ASSYM(PTESIZE, sizeof(pt_entry_t));
-ASSYM(PDESHIFT, PDESHIFT);
-ASSYM(PTESHIFT, PTESHIFT);
-ASSYM(PAGE_SHIFT, PAGE_SHIFT);
-ASSYM(PAGE_MASK, PAGE_MASK);
-ASSYM(PDRSHIFT, PDRSHIFT);
-ASSYM(PDRMASK, PDRMASK);
-ASSYM(USRSTACK, USRSTACK);
-ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
-ASSYM(KERNBASE, KERNBASE);
-ASSYM(KERNLOAD, KERNLOAD);
-ASSYM(MCLBYTES, MCLBYTES);
-ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
-ASSYM(PCB_EDI, offsetof(struct pcb, pcb_edi));
-ASSYM(PCB_ESI, offsetof(struct pcb, pcb_esi));
-ASSYM(PCB_EBP, offsetof(struct pcb, pcb_ebp));
-ASSYM(PCB_ESP, offsetof(struct pcb, pcb_esp));
-ASSYM(PCB_EBX, offsetof(struct pcb, pcb_ebx));
-ASSYM(PCB_EIP, offsetof(struct pcb, pcb_eip));
-ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0));
-
-ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs));
-ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0));
-ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1));
-ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2));
-ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
-ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
-ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
-ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl));
-ASSYM(PCB_DBREGS, PCB_DBREGS);
-ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
-
-ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
-ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
-ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
-ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
-ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
-ASSYM(PCB_SWITCHOUT, offsetof(struct pcb, pcb_switchout));
-
-ASSYM(PCB_SIZE, sizeof(struct pcb));
-
-ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno));
-ASSYM(TF_ERR, offsetof(struct trapframe, tf_err));
-ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
-ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags));
-ASSYM(TF_EIP, offsetof(struct trapframe, tf_eip));
-ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
-#ifdef COMPAT_43
-ASSYM(SIGF_SC, offsetof(struct osigframe, sf_siginfo.si_sc));
-#endif
-ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc));
-#ifdef COMPAT_FREEBSD4
-ASSYM(SIGF_UC4, offsetof(struct sigframe4, sf_uc));
-#endif
-#ifdef COMPAT_43
-ASSYM(SC_PS, offsetof(struct osigcontext, sc_ps));
-ASSYM(SC_FS, offsetof(struct osigcontext, sc_fs));
-ASSYM(SC_GS, offsetof(struct osigcontext, sc_gs));
-ASSYM(SC_TRAPNO, offsetof(struct osigcontext, sc_trapno));
-#endif
-#ifdef COMPAT_FREEBSD4
-ASSYM(UC4_EFLAGS, offsetof(struct ucontext4, uc_mcontext.mc_eflags));
-ASSYM(UC4_GS, offsetof(struct ucontext4, uc_mcontext.mc_gs));
-#endif
-ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags));
-ASSYM(UC_GS, offsetof(ucontext_t, uc_mcontext.mc_gs));
-ASSYM(ENOENT, ENOENT);
-ASSYM(EFAULT, EFAULT);
-ASSYM(ENAMETOOLONG, ENAMETOOLONG);
-ASSYM(MAXCOMLEN, MAXCOMLEN);
-ASSYM(MAXPATHLEN, MAXPATHLEN);
-ASSYM(BOOTINFO_SIZE, sizeof(struct bootinfo));
-ASSYM(BI_VERSION, offsetof(struct bootinfo, bi_version));
-ASSYM(BI_KERNELNAME, offsetof(struct bootinfo, bi_kernelname));
-ASSYM(BI_NFS_DISKLESS, offsetof(struct bootinfo, bi_nfs_diskless));
-ASSYM(BI_ENDCOMMON, offsetof(struct bootinfo, bi_endcommon));
-ASSYM(NFSDISKLESS_SIZE, sizeof(struct nfs_diskless));
-ASSYM(BI_SIZE, offsetof(struct bootinfo, bi_size));
-ASSYM(BI_SYMTAB, offsetof(struct bootinfo, bi_symtab));
-ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab));
-ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend));
-ASSYM(PC_SIZEOF, sizeof(struct pcpu));
-ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace));
-ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
-ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread));
-ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread));
-ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
-ASSYM(PC_COMMON_TSS, offsetof(struct pcpu, pc_common_tss));
-ASSYM(PC_COMMON_TSSD, offsetof(struct pcpu, pc_common_tssd));
-ASSYM(PC_TSS_GDT, offsetof(struct pcpu, pc_tss_gdt));
-ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt));
-ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
-ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
-ASSYM(PC_CR2, offsetof(struct pcpu, pc_cr2));
-ASSYM(PC_CR3, offsetof(struct pcpu, pc_pdir));
-
-#ifdef DEV_APIC
-ASSYM(LA_VER, offsetof(struct LAPIC, version));
-ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
-ASSYM(LA_EOI, offsetof(struct LAPIC, eoi));
-ASSYM(LA_SVR, offsetof(struct LAPIC, svr));
-ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo));
-ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi));
-ASSYM(LA_ISR, offsetof(struct LAPIC, isr0));
-#endif
-
-ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL));
-ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
-ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
-
-ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
-ASSYM(GPROC0_SEL, GPROC0_SEL);
-
-ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
-ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse));
-
-#ifdef PC98
-#include <machine/bus.h>
-
-ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base));
-ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat));
-#endif
-
-ASSYM(HYPERVISOR_STACK_SWITCH, __HYPERVISOR_stack_switch);
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/gnttab.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/gnttab.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,367 +0,0 @@
-/******************************************************************************
- * gnttab.c
- *
- * Two sets of functionality:
- * 1. Granting foreign access to our memory reservation.
- * 2. Accessing others' memory reservations via grant references.
- * (i.e., mechanisms for both sender and recipient of grant references)
- *
- * Copyright (c) 2005, Christopher Clark
- * Copyright (c) 2004, K A Fraser
- */
-
-#include "opt_pmap.h"
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/conf.h>
-#include <sys/module.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mman.h>
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-
-#include <machine/gnttab.h>
-#include <machine/pmap.h>
-
-#include <machine/hypervisor-ifs.h>
-
-#define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c))
-
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
- __asm__ __volatile__ ( "rep;nop" : : : "memory" );
-}
-#define cpu_relax() rep_nop()
-
-#if 1
-#define ASSERT(_p) \
- if ( !(_p) ) { printk("Assertion '%s': line %d, file %s\n", \
- #_p , __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif
-
-#define WPRINTK(fmt, args...) \
- printk("xen_grant: " fmt, ##args)
-
-static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES];
-static grant_ref_t gnttab_free_head;
-
-static grant_entry_t *shared;
-#if 0
-/* /proc/xen/grant */
-static struct proc_dir_entry *grant_pde;
-#endif
-
-/*
- * Lock-free grant-entry allocator
- */
-
-static inline int
-get_free_entry(void)
-{
- grant_ref_t fh, nfh = gnttab_free_head;
- do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; }
- while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh,
- gnttab_free_list[fh])) != fh) );
- return fh;
-}
-
-static inline void
-put_free_entry(grant_ref_t ref)
-{
- grant_ref_t fh, nfh = gnttab_free_head;
- do { gnttab_free_list[ref] = fh = nfh; wmb(); }
- while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) );
-}
-
-/*
- * Public grant-issuing interface functions
- */
-
-int
-gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly)
-{
- int ref;
-
- if ( unlikely((ref = get_free_entry()) == -1) )
- return -ENOSPC;
-
- shared[ref].frame = frame;
- shared[ref].domid = domid;
- wmb();
- shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
-
- return ref;
-}
-
-void
-gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
- unsigned long frame, int readonly)
-{
- shared[ref].frame = frame;
- shared[ref].domid = domid;
- wmb();
- shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
-}
-
-
-int
-gnttab_query_foreign_access(grant_ref_t ref)
-{
- uint16_t nflags;
-
- nflags = shared[ref].flags;
-
- return (nflags & (GTF_reading|GTF_writing));
-}
-
-void
-gnttab_end_foreign_access(grant_ref_t ref, int readonly)
-{
- uint16_t flags, nflags;
-
- nflags = shared[ref].flags;
- do {
- if ( (flags = nflags) & (GTF_reading|GTF_writing) )
- printk("WARNING: g.e. still in use!\n");
- }
- while ( (nflags = cmpxchg(&shared[ref].flags, flags, 0)) != flags );
-
- put_free_entry(ref);
-}
-
-int
-gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
-{
- int ref;
-
- if ( unlikely((ref = get_free_entry()) == -1) )
- return -ENOSPC;
-
- shared[ref].frame = pfn;
- shared[ref].domid = domid;
- wmb();
- shared[ref].flags = GTF_accept_transfer;
-
- return ref;
-}
-
-void
-gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
- unsigned long pfn)
-{
- shared[ref].frame = pfn;
- shared[ref].domid = domid;
- wmb();
- shared[ref].flags = GTF_accept_transfer;
-}
-
-unsigned long
-gnttab_end_foreign_transfer(grant_ref_t ref)
-{
- unsigned long frame = 0;
- uint16_t flags;
-
- flags = shared[ref].flags;
- ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
-
- /*
- * If a transfer is committed then wait for the frame address to appear.
- * Otherwise invalidate the grant entry against future use.
- */
- if ( likely(flags != GTF_accept_transfer) ||
- (cmpxchg(&shared[ref].flags, flags, 0) != GTF_accept_transfer) )
- while ( unlikely((frame = shared[ref].frame) == 0) )
- cpu_relax();
-
- put_free_entry(ref);
-
- return frame;
-}
-
-void
-gnttab_free_grant_references(uint16_t count, grant_ref_t head)
-{
- /* TODO: O(N)...? */
- grant_ref_t to_die = 0, next = head;
- int i;
-
- for ( i = 0; i < count; i++ )
- to_die = next;
- next = gnttab_free_list[next];
- put_free_entry( to_die );
-}
-
-int
-gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head,
- grant_ref_t *terminal)
-{
- int i;
- grant_ref_t h = gnttab_free_head;
-
- for ( i = 0; i < count; i++ )
- if ( unlikely(get_free_entry() == -1) )
- goto not_enough_refs;
-
- *head = h;
- *terminal = gnttab_free_head;
-
- return 0;
-
-not_enough_refs:
- gnttab_free_head = h;
- return -ENOSPC;
-}
-
-int
-gnttab_claim_grant_reference(grant_ref_t *private_head, grant_ref_t terminal )
-{
- grant_ref_t g;
- if ( unlikely((g = *private_head) == terminal) )
- return -ENOSPC;
- *private_head = gnttab_free_list[g];
- return g;
-}
-
-void
-gnttab_release_grant_reference( grant_ref_t *private_head,
- grant_ref_t release )
-{
- gnttab_free_list[release] = *private_head;
- *private_head = release;
-}
-#ifdef notyet
-static int
-grant_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
- int flag, struct thread *td)
-{
-
- int ret;
- privcmd_hypercall_t hypercall;
-
- /* XXX Need safety checks here if using for anything other
- * than debugging */
- return -ENOSYS;
-
- if ( cmd != IOCTL_PRIVCMD_HYPERCALL )
- return -ENOSYS;
-
- if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) )
- return -EFAULT;
-
- if ( hypercall.op != __HYPERVISOR_grant_table_op )
- return -ENOSYS;
-
- /* hypercall-invoking asm taken from privcmd.c */
- __asm__ __volatile__ (
- "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; "
- "movl 4(%%eax),%%ebx ;"
- "movl 8(%%eax),%%ecx ;"
- "movl 12(%%eax),%%edx ;"
- "movl 16(%%eax),%%esi ;"
- "movl 20(%%eax),%%edi ;"
- "movl (%%eax),%%eax ;"
- TRAP_INSTR "; "
- "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
- : "=a" (ret) : "0" (&hypercall) : "memory" );
-
- return ret;
-
-}
-
-static struct cdevsw gnttab_cdevsw = {
- d_ioctl: grant_ioctl,
-};
-
-static int
-grant_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len;
- unsigned int i;
- grant_entry_t *gt;
-
- gt = (grant_entry_t *)shared;
- len = 0;
-
- for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
- /* TODO: safety catch here until this can handle >PAGE_SIZE output */
- if (len > (PAGE_SIZE - 200))
- {
- len += sprintf( page + len, "Truncated.\n");
- break;
- }
-
- if ( gt[i].flags )
- len += sprintf( page + len,
- "Grant: ref (0x%x) flags (0x%hx) dom (0x%hx) frame
(0x%x)\n",
- i,
- gt[i].flags,
- gt[i].domid,
- gt[i].frame );
-
- *eof = 1;
- return len;
-}
-
-static int
-grant_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
-{
- /* TODO: implement this */
- return -ENOSYS;
-}
-#endif
-static int
-gnttab_init(void *unused)
-{
- gnttab_setup_table_t setup;
- unsigned long frames[NR_GRANT_FRAMES];
- int i;
-
- setup.dom = DOMID_SELF;
- setup.nr_frames = NR_GRANT_FRAMES;
- setup.frame_list = frames;
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0)
- panic("grant table setup failed\n");
- if (setup.status != 0)
- panic("non-zero status in grant table setup\n");
- shared = (grant_entry_t *)kmem_alloc_nofault(kernel_map, NR_GRANT_FRAMES);
-
- for (i = 0; i < NR_GRANT_FRAMES; i++)
- pmap_kenter_ma((vm_offset_t)(shared + (i*PAGE_SIZE)), frames[i] <<
PAGE_SHIFT);
-
- for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
- gnttab_free_list[i] = i + 1;
-#if 0
- /*
- * /proc/xen/grant : used by libxc to access grant tables
- */
- if ( (grant_pde = create_xen_proc_entry("grant", 0600)) == NULL )
- {
- WPRINTK("Unable to create grant xen proc entry\n");
- return -1;
- }
-
- grant_file_ops.read = grant_pde->proc_fops->read;
- grant_file_ops.write = grant_pde->proc_fops->write;
-
- grant_pde->proc_fops = &grant_file_ops;
-
- grant_pde->read_proc = &grant_read;
- grant_pde->write_proc = &grant_write;
-#endif
- printk("Grant table initialized\n");
- return 0;
-}
-
-SYSINIT(gnttab, SI_SUB_PSEUDO, SI_ORDER_FIRST, gnttab_init, NULL);
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,107 +0,0 @@
-/******************************************************************************
- * hypervisor.c
- *
- * Communication to/from hypervisor.
- *
- * Copyright (c) 2002-2003, K A Fraser
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIEAS OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <machine/xen-os.h>
-#include <machine/hypervisor.h>
-#include <machine/xenvar.h>
-#include <machine/multicall.h>
-
-/* XXX need to verify what the caller save registers are on x86 KMM */
-#define CALLER_SAVE __asm__("pushal; ")
-#define CALLER_RESTORE __asm__("popal;")
-
-
-/* ni == non-inline - these are only intended for use from assembler
- * no reason to have them in a header -
- *
- */
-void ni_queue_multicall0(unsigned long op);
-void ni_queue_multicall1(unsigned long op, unsigned long arg1);
-void ni_queue_multicall2(unsigned long op, unsigned long arg1,
- unsigned long arg2);
-void ni_queue_multicall3(unsigned long op, unsigned long arg1,
- unsigned long arg2, unsigned long arg3);
-void ni_queue_multicall4(unsigned long op, unsigned long arg1,
- unsigned long arg2, unsigned long arg4,
- unsigned long arg5);
-
-void ni_execute_multicall_list(void);
-
-multicall_entry_t multicall_list[MAX_MULTICALL_ENTS];
-int nr_multicall_ents = 0;
-
-
-void
-ni_queue_multicall0(unsigned long op)
-{
- CALLER_SAVE;
- queue_multicall0(op);
- CALLER_RESTORE;
-}
-
-void
-ni_queue_multicall1(unsigned long op, unsigned long arg1)
-{
- CALLER_SAVE;
- queue_multicall1(op, arg1);
- CALLER_RESTORE;
-}
-
-void
-ni_queue_multicall2(unsigned long op, unsigned long arg1,
- unsigned long arg2)
-{
- CALLER_SAVE;
- queue_multicall2(op, arg1, arg2);
- CALLER_RESTORE;
-}
-
-void
-ni_queue_multicall3(unsigned long op, unsigned long arg1,
- unsigned long arg2, unsigned long arg3)
-{
- CALLER_SAVE;
- queue_multicall3(op, arg1, arg2, arg3);
- CALLER_RESTORE;
-}
-
-void
-ni_queue_multicall4(unsigned long op, unsigned long arg1,
- unsigned long arg2, unsigned long arg3,
- unsigned long arg4)
-{
- CALLER_SAVE;
- queue_multicall4(op, arg1, arg2, arg3, arg4);
- CALLER_RESTORE;
-}
-
-void
-ni_execute_multicall_list(void)
-{
- CALLER_SAVE;
- execute_multicall_list();
- CALLER_RESTORE;
-}
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,626 +0,0 @@
-/*-
- * Copyright (c) 1999 Michael Smith <msmith@xxxxxxxxxxx>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/i686_mem.c,v 1.23 2003/10/21 18:28:34
silby Exp $");
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/memrange.h>
-#include <sys/smp.h>
-#include <sys/sysctl.h>
-
-#include <machine/md_var.h>
-#include <machine/specialreg.h>
-
-/*
- * i686 memory range operations
- *
- * This code will probably be impenetrable without reference to the
- * Intel Pentium Pro documentation.
- */
-
-static char *mem_owner_bios = "BIOS";
-
-#define MR686_FIXMTRR (1<<0)
-
-#define mrwithin(mr, a) \
- (((a) >= (mr)->mr_base) && ((a) < ((mr)->mr_base + (mr)->mr_len)))
-#define mroverlap(mra, mrb) \
- (mrwithin(mra, mrb->mr_base) || mrwithin(mrb, mra->mr_base))
-
-#define mrvalid(base, len) \
- ((!(base & ((1 << 12) - 1))) && /* base is multiple of 4k */ \
- ((len) >= (1 << 12)) && /* length is >= 4k */ \
- powerof2((len)) && /* ... and power of two */ \
- !((base) & ((len) - 1))) /* range is not discontiuous */
-
-#define mrcopyflags(curr, new) (((curr) & ~MDF_ATTRMASK) | ((new) &
MDF_ATTRMASK))
-
-static int mtrrs_disabled;
-TUNABLE_INT("machdep.disable_mtrrs", &mtrrs_disabled);
-SYSCTL_INT(_machdep, OID_AUTO, disable_mtrrs, CTLFLAG_RDTUN,
- &mtrrs_disabled, 0, "Disable i686 MTRRs.");
-
-static void i686_mrinit(struct mem_range_softc *sc);
-static int i686_mrset(struct mem_range_softc *sc,
- struct mem_range_desc *mrd,
- int *arg);
-static void i686_mrAPinit(struct mem_range_softc *sc);
-
-static struct mem_range_ops i686_mrops = {
- i686_mrinit,
- i686_mrset,
- i686_mrAPinit
-};
-
-/* XXX for AP startup hook */
-static u_int64_t mtrrcap, mtrrdef;
-
-static struct mem_range_desc *mem_range_match(struct mem_range_softc *sc,
- struct mem_range_desc *mrd);
-static void i686_mrfetch(struct mem_range_softc *sc);
-static int i686_mtrrtype(int flags);
-#if 0
-static int i686_mrt2mtrr(int flags, int oldval);
-#endif
-static int i686_mtrrconflict(int flag1, int flag2);
-static void i686_mrstore(struct mem_range_softc *sc);
-static void i686_mrstoreone(void *arg);
-static struct mem_range_desc *i686_mtrrfixsearch(struct mem_range_softc *sc,
- u_int64_t addr);
-static int i686_mrsetlow(struct mem_range_softc *sc,
- struct mem_range_desc *mrd,
- int *arg);
-static int i686_mrsetvariable(struct mem_range_softc *sc,
- struct mem_range_desc *mrd,
- int *arg);
-
-/* i686 MTRR type to memory range type conversion */
-static int i686_mtrrtomrt[] = {
- MDF_UNCACHEABLE,
- MDF_WRITECOMBINE,
- MDF_UNKNOWN,
- MDF_UNKNOWN,
- MDF_WRITETHROUGH,
- MDF_WRITEPROTECT,
- MDF_WRITEBACK
-};
-
-#define MTRRTOMRTLEN (sizeof(i686_mtrrtomrt) / sizeof(i686_mtrrtomrt[0]))
-
-static int
-i686_mtrr2mrt(int val) {
- if (val < 0 || val >= MTRRTOMRTLEN)
- return MDF_UNKNOWN;
- return i686_mtrrtomrt[val];
-}
-
-/*
- * i686 MTRR conflicts. Writeback and uncachable may overlap.
- */
-static int
-i686_mtrrconflict(int flag1, int flag2) {
- flag1 &= MDF_ATTRMASK;
- flag2 &= MDF_ATTRMASK;
- if (flag1 == flag2 ||
- (flag1 == MDF_WRITEBACK && flag2 == MDF_UNCACHEABLE) ||
- (flag2 == MDF_WRITEBACK && flag1 == MDF_UNCACHEABLE))
- return 0;
- return 1;
-}
-
-/*
- * Look for an exactly-matching range.
- */
-static struct mem_range_desc *
-mem_range_match(struct mem_range_softc *sc, struct mem_range_desc *mrd)
-{
- struct mem_range_desc *cand;
- int i;
-
- for (i = 0, cand = sc->mr_desc; i < sc->mr_ndesc; i++, cand++)
- if ((cand->mr_base == mrd->mr_base) &&
- (cand->mr_len == mrd->mr_len))
- return(cand);
- return(NULL);
-}
-
-/*
- * Fetch the current mtrr settings from the current CPU (assumed to all
- * be in sync in the SMP case). Note that if we are here, we assume
- * that MTRRs are enabled, and we may or may not have fixed MTRRs.
- */
-static void
-i686_mrfetch(struct mem_range_softc *sc)
-{
- struct mem_range_desc *mrd;
- u_int64_t msrv;
- int i, j, msr;
-
- mrd = sc->mr_desc;
-
- /* Get fixed-range MTRRs */
- if (sc->mr_cap & MR686_FIXMTRR) {
- msr = MSR_MTRR64kBase;
- for (i = 0; i < (MTRR_N64K / 8); i++, msr++) {
- msrv = rdmsr(msr);
- for (j = 0; j < 8; j++, mrd++) {
- mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
- i686_mtrr2mrt(msrv & 0xff) |
- MDF_ACTIVE;
- if (mrd->mr_owner[0] == 0)
- strcpy(mrd->mr_owner, mem_owner_bios);
- msrv = msrv >> 8;
- }
- }
- msr = MSR_MTRR16kBase;
- for (i = 0; i < (MTRR_N16K / 8); i++, msr++) {
- msrv = rdmsr(msr);
- for (j = 0; j < 8; j++, mrd++) {
- mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
- i686_mtrr2mrt(msrv & 0xff) |
- MDF_ACTIVE;
- if (mrd->mr_owner[0] == 0)
- strcpy(mrd->mr_owner, mem_owner_bios);
- msrv = msrv >> 8;
- }
- }
- msr = MSR_MTRR4kBase;
- for (i = 0; i < (MTRR_N4K / 8); i++, msr++) {
- msrv = rdmsr(msr);
- for (j = 0; j < 8; j++, mrd++) {
- mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
- i686_mtrr2mrt(msrv & 0xff) |
- MDF_ACTIVE;
- if (mrd->mr_owner[0] == 0)
- strcpy(mrd->mr_owner, mem_owner_bios);
- msrv = msrv >> 8;
- }
- }
- }
-
- /* Get remainder which must be variable MTRRs */
- msr = MSR_MTRRVarBase;
- for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) {
- msrv = rdmsr(msr);
- mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
- i686_mtrr2mrt(msrv & 0xff);
- mrd->mr_base = msrv & 0x0000000ffffff000LL;
- msrv = rdmsr(msr + 1);
- mrd->mr_flags = (msrv & 0x800) ?
- (mrd->mr_flags | MDF_ACTIVE) :
- (mrd->mr_flags & ~MDF_ACTIVE);
- /* Compute the range from the mask. Ick. */
- mrd->mr_len = (~(msrv & 0x0000000ffffff000LL) & 0x0000000fffffffffLL) +
1;
- if (!mrvalid(mrd->mr_base, mrd->mr_len))
- mrd->mr_flags |= MDF_BOGUS;
- /* If unclaimed and active, must be the BIOS */
- if ((mrd->mr_flags & MDF_ACTIVE) && (mrd->mr_owner[0] == 0))
- strcpy(mrd->mr_owner, mem_owner_bios);
- }
-}
-
-/*
- * Return the MTRR memory type matching a region's flags
- */
-static int
-i686_mtrrtype(int flags)
-{
- int i;
-
- flags &= MDF_ATTRMASK;
-
- for (i = 0; i < MTRRTOMRTLEN; i++) {
- if (i686_mtrrtomrt[i] == MDF_UNKNOWN)
- continue;
- if (flags == i686_mtrrtomrt[i])
- return(i);
- }
- return(-1);
-}
-#if 0
-static int
-i686_mrt2mtrr(int flags, int oldval)
-{
- int val;
-
- if ((val = i686_mtrrtype(flags)) == -1)
- return oldval & 0xff;
- return val & 0xff;
-}
-#endif
-/*
- * Update running CPU(s) MTRRs to match the ranges in the descriptor
- * list.
- *
- * XXX Must be called with interrupts enabled.
- */
-static void
-i686_mrstore(struct mem_range_softc *sc)
-{
-#ifdef SMP
- /*
- * We should use ipi_all_but_self() to call other CPUs into a
- * locking gate, then call a target function to do this work.
- * The "proper" solution involves a generalised locking gate
- * implementation, not ready yet.
- */
- smp_rendezvous(NULL, i686_mrstoreone, NULL, (void *)sc);
-#else
- disable_intr(); /* disable interrupts */
- i686_mrstoreone((void *)sc);
- enable_intr();
-#endif
-}
-
-/*
- * Update the current CPU's MTRRs with those represented in the
- * descriptor list. Note that we do this wholesale rather than
- * just stuffing one entry; this is simpler (but slower, of course).
- */
-static void
-i686_mrstoreone(void *arg)
-{
-#if 0
- struct mem_range_softc *sc = (struct mem_range_softc *)arg;
- struct mem_range_desc *mrd;
- u_int64_t omsrv, msrv;
- int i, j, msr;
- u_int cr4save;
-
- mrd = sc->mr_desc;
-
- cr4save = rcr4(); /* save cr4 */
- if (cr4save & CR4_PGE)
- load_cr4(cr4save & ~CR4_PGE);
- load_cr0((rcr0() & ~CR0_NW) | CR0_CD); /* disable caches (CD = 1, NW =
0) */
- wbinvd(); /* flush caches, TLBs */
- wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) & ~0x800); /* disable
MTRRs (E = 0) */
-
- /* Set fixed-range MTRRs */
- if (sc->mr_cap & MR686_FIXMTRR) {
- msr = MSR_MTRR64kBase;
- for (i = 0; i < (MTRR_N64K / 8); i++, msr++) {
- msrv = 0;
- omsrv = rdmsr(msr);
- for (j = 7; j >= 0; j--) {
- msrv = msrv << 8;
- msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8));
- }
- wrmsr(msr, msrv);
- mrd += 8;
- }
- msr = MSR_MTRR16kBase;
- for (i = 0; i < (MTRR_N16K / 8); i++, msr++) {
- msrv = 0;
- omsrv = rdmsr(msr);
- for (j = 7; j >= 0; j--) {
- msrv = msrv << 8;
- msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8));
- }
- wrmsr(msr, msrv);
- mrd += 8;
- }
- msr = MSR_MTRR4kBase;
- for (i = 0; i < (MTRR_N4K / 8); i++, msr++) {
- msrv = 0;
- omsrv = rdmsr(msr);
- for (j = 7; j >= 0; j--) {
- msrv = msrv << 8;
- msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8));
- }
- wrmsr(msr, msrv);
- mrd += 8;
- }
- }
-
- /* Set remainder which must be variable MTRRs */
- msr = MSR_MTRRVarBase;
- for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) {
- /* base/type register */
- omsrv = rdmsr(msr);
- if (mrd->mr_flags & MDF_ACTIVE) {
- msrv = mrd->mr_base & 0x0000000ffffff000LL;
- msrv |= i686_mrt2mtrr(mrd->mr_flags, omsrv);
- } else {
- msrv = 0;
- }
- wrmsr(msr, msrv);
-
- /* mask/active register */
- if (mrd->mr_flags & MDF_ACTIVE) {
- msrv = 0x800 | (~(mrd->mr_len - 1) & 0x0000000ffffff000LL);
- } else {
- msrv = 0;
- }
- wrmsr(msr + 1, msrv);
- }
- wbinvd(); /* flush
caches, TLBs */
- wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) | 0x800); /* restore MTRR
state */
- load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* enable
caches CD = 0 and NW = 0 */
- load_cr4(cr4save); /* restore cr4
*/
-#endif
-}
-
-/*
- * Hunt for the fixed MTRR referencing (addr)
- */
-static struct mem_range_desc *
-i686_mtrrfixsearch(struct mem_range_softc *sc, u_int64_t addr)
-{
- struct mem_range_desc *mrd;
- int i;
-
- for (i = 0, mrd = sc->mr_desc; i < (MTRR_N64K + MTRR_N16K + MTRR_N4K);
i++, mrd++)
- if ((addr >= mrd->mr_base) && (addr < (mrd->mr_base + mrd->mr_len)))
- return(mrd);
- return(NULL);
-}
-
-/*
- * Try to satisfy the given range request by manipulating the fixed MTRRs that
- * cover low memory.
- *
- * Note that we try to be generous here; we'll bloat the range out to the
- * next higher/lower boundary to avoid the consumer having to know too much
- * about the mechanisms here.
- *
- * XXX note that this will have to be updated when we start supporting "busy"
ranges.
- */
-static int
-i686_mrsetlow(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
-{
- struct mem_range_desc *first_md, *last_md, *curr_md;
-
- /* range check */
- if (((first_md = i686_mtrrfixsearch(sc, mrd->mr_base)) == NULL) ||
- ((last_md = i686_mtrrfixsearch(sc, mrd->mr_base + mrd->mr_len - 1)) ==
NULL))
- return(EINVAL);
-
- /* check we aren't doing something risky */
- if (!(mrd->mr_flags & MDF_FORCE))
- for (curr_md = first_md; curr_md <= last_md; curr_md++) {
- if ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN)
- return (EACCES);
- }
-
- /* set flags, clear set-by-firmware flag */
- for (curr_md = first_md; curr_md <= last_md; curr_md++) {
- curr_md->mr_flags = mrcopyflags(curr_md->mr_flags & ~MDF_FIRMWARE,
mrd->mr_flags);
- bcopy(mrd->mr_owner, curr_md->mr_owner, sizeof(mrd->mr_owner));
- }
-
- return(0);
-}
-
-
-/*
- * Modify/add a variable MTRR to satisfy the request.
- *
- * XXX needs to be updated to properly support "busy" ranges.
- */
-static int
-i686_mrsetvariable(struct mem_range_softc *sc, struct mem_range_desc *mrd, int
*arg)
-{
- struct mem_range_desc *curr_md, *free_md;
- int i;
-
- /*
- * Scan the currently active variable descriptors, look for
- * one we exactly match (straight takeover) and for possible
- * accidental overlaps.
- * Keep track of the first empty variable descriptor in case we
- * can't perform a takeover.
- */
- i = (sc->mr_cap & MR686_FIXMTRR) ? MTRR_N64K + MTRR_N16K + MTRR_N4K : 0;
- curr_md = sc->mr_desc + i;
- free_md = NULL;
- for (; i < sc->mr_ndesc; i++, curr_md++) {
- if (curr_md->mr_flags & MDF_ACTIVE) {
- /* exact match? */
- if ((curr_md->mr_base == mrd->mr_base) &&
- (curr_md->mr_len == mrd->mr_len)) {
- /* whoops, owned by someone */
- if (curr_md->mr_flags & MDF_BUSY)
- return(EBUSY);
- /* check we aren't doing something risky */
- if (!(mrd->mr_flags & MDF_FORCE) &&
- ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN))
- return (EACCES);
- /* Ok, just hijack this entry */
- free_md = curr_md;
- break;
- }
- /* non-exact overlap ? */
- if (mroverlap(curr_md, mrd)) {
- /* between conflicting region types? */
- if (i686_mtrrconflict(curr_md->mr_flags, mrd->mr_flags))
- return(EINVAL);
- }
- } else if (free_md == NULL) {
- free_md = curr_md;
- }
- }
- /* got somewhere to put it? */
- if (free_md == NULL)
- return(ENOSPC);
-
- /* Set up new descriptor */
- free_md->mr_base = mrd->mr_base;
- free_md->mr_len = mrd->mr_len;
- free_md->mr_flags = mrcopyflags(MDF_ACTIVE, mrd->mr_flags);
- bcopy(mrd->mr_owner, free_md->mr_owner, sizeof(mrd->mr_owner));
- return(0);
-}
-
-/*
- * Handle requests to set memory range attributes by manipulating MTRRs.
- *
- */
-static int
-i686_mrset(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
-{
- struct mem_range_desc *targ;
- int error = 0;
-
- switch(*arg) {
- case MEMRANGE_SET_UPDATE:
- /* make sure that what's being asked for is even possible at all */
- if (!mrvalid(mrd->mr_base, mrd->mr_len) ||
- i686_mtrrtype(mrd->mr_flags) == -1)
- return(EINVAL);
-
-#define FIXTOP ((MTRR_N64K * 0x10000) + (MTRR_N16K * 0x4000) + (MTRR_N4K *
0x1000))
-
- /* are the "low memory" conditions applicable? */
- if ((sc->mr_cap & MR686_FIXMTRR) &&
- ((mrd->mr_base + mrd->mr_len) <= FIXTOP)) {
- if ((error = i686_mrsetlow(sc, mrd, arg)) != 0)
- return(error);
- } else {
- /* it's time to play with variable MTRRs */
- if ((error = i686_mrsetvariable(sc, mrd, arg)) != 0)
- return(error);
- }
- break;
-
- case MEMRANGE_SET_REMOVE:
- if ((targ = mem_range_match(sc, mrd)) == NULL)
- return(ENOENT);
- if (targ->mr_flags & MDF_FIXACTIVE)
- return(EPERM);
- if (targ->mr_flags & MDF_BUSY)
- return(EBUSY);
- targ->mr_flags &= ~MDF_ACTIVE;
- targ->mr_owner[0] = 0;
- break;
-
- default:
- return(EOPNOTSUPP);
- }
-
- /* update the hardware */
- i686_mrstore(sc);
- i686_mrfetch(sc); /* refetch to see where we're at */
- return(0);
-}
-
-/*
- * Work out how many ranges we support, initialise storage for them,
- * fetch the initial settings.
- */
-static void
-i686_mrinit(struct mem_range_softc *sc)
-{
- struct mem_range_desc *mrd;
- int nmdesc = 0;
- int i;
-
- /* XXX */
- return;
-
- mtrrcap = rdmsr(MSR_MTRRcap);
- mtrrdef = rdmsr(MSR_MTRRdefType);
-
- /* For now, bail out if MTRRs are not enabled */
- if (!(mtrrdef & 0x800)) {
- if (bootverbose)
- printf("CPU supports MTRRs but not enabled\n");
- return;
- }
- nmdesc = mtrrcap & 0xff;
- printf("Pentium Pro MTRR support enabled\n");
-
- /* If fixed MTRRs supported and enabled */
- if ((mtrrcap & 0x100) && (mtrrdef & 0x400)) {
- sc->mr_cap = MR686_FIXMTRR;
- nmdesc += MTRR_N64K + MTRR_N16K + MTRR_N4K;
- }
-
- sc->mr_desc =
- (struct mem_range_desc *)malloc(nmdesc * sizeof(struct mem_range_desc),
- M_MEMDESC, M_WAITOK | M_ZERO);
- sc->mr_ndesc = nmdesc;
-
- mrd = sc->mr_desc;
-
- /* Populate the fixed MTRR entries' base/length */
- if (sc->mr_cap & MR686_FIXMTRR) {
- for (i = 0; i < MTRR_N64K; i++, mrd++) {
- mrd->mr_base = i * 0x10000;
- mrd->mr_len = 0x10000;
- mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE;
- }
- for (i = 0; i < MTRR_N16K; i++, mrd++) {
- mrd->mr_base = i * 0x4000 + 0x80000;
- mrd->mr_len = 0x4000;
- mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE;
- }
- for (i = 0; i < MTRR_N4K; i++, mrd++) {
- mrd->mr_base = i * 0x1000 + 0xc0000;
- mrd->mr_len = 0x1000;
- mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE;
- }
- }
-
- /*
- * Get current settings, anything set now is considered to have
- * been set by the firmware. (XXX has something already played here?)
- */
- i686_mrfetch(sc);
- mrd = sc->mr_desc;
- for (i = 0; i < sc->mr_ndesc; i++, mrd++) {
- if (mrd->mr_flags & MDF_ACTIVE)
- mrd->mr_flags |= MDF_FIRMWARE;
- }
-}
-
-/*
- * Initialise MTRRs on an AP after the BSP has run the init code.
- */
-static void
-i686_mrAPinit(struct mem_range_softc *sc)
-{
- i686_mrstoreone((void *)sc); /* set MTRRs to match BSP */
- wrmsr(MSR_MTRRdefType, mtrrdef); /* set MTRR behaviour to match BSP */
-}
-
-static void
-i686_mem_drvinit(void *unused)
-{
- /* Try for i686 MTRRs */
- if (!mtrrs_disabled && (cpu_feature & CPUID_MTRR) &&
- ((cpu_id & 0xf00) == 0x600 || (cpu_id & 0xf00) == 0xf00) &&
- ((strcmp(cpu_vendor, "GenuineIntel") == 0) ||
- (strcmp(cpu_vendor, "AuthenticAMD") == 0))) {
- mem_range_softc.mr_op = &i686_mrops;
- }
-}
-
-SYSINIT(i686memdev,SI_SUB_DRIVERS,SI_ORDER_FIRST,i686_mem_drvinit,NULL)
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,889 +0,0 @@
-/*-
- * Copyright (c) KATO Takenori, 1997, 1998.
- *
- * All rights reserved. Unpublished rights reserved under the copyright
- * laws of Japan.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer as
- * the first lines of this file unmodified.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/initcpu.c,v 1.49 2003/11/10 15:48:30 jhb
Exp $");
-
-#include "opt_cpu.h"
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/systm.h>
-#include <sys/sysctl.h>
-
-#include <machine/cputypes.h>
-#include <machine/md_var.h>
-#include <machine/specialreg.h>
-
-#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
-#define CPU_ENABLE_SSE
-#endif
-#if defined(CPU_DISABLE_SSE)
-#undef CPU_ENABLE_SSE
-#endif
-
-void initializecpu(void);
-#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
-void enable_K5_wt_alloc(void);
-void enable_K6_wt_alloc(void);
-void enable_K6_2_wt_alloc(void);
-#endif
-
-#ifdef I486_CPU
-static void init_5x86(void);
-static void init_bluelightning(void);
-static void init_486dlc(void);
-static void init_cy486dx(void);
-#ifdef CPU_I486_ON_386
-static void init_i486_on_386(void);
-#endif
-static void init_6x86(void);
-#endif /* I486_CPU */
-
-#ifdef I686_CPU
-static void init_6x86MX(void);
-static void init_ppro(void);
-static void init_mendocino(void);
-#endif
-
-static int hw_instruction_sse;
-SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
- &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
-
-/* Must *NOT* be BSS or locore will bzero these after setting them */
-int cpu = 0; /* Are we 386, 386sx, 486, etc? */
-u_int cpu_feature = 0; /* Feature flags */
-u_int cpu_high = 0; /* Highest arg to CPUID */
-u_int cpu_id = 0; /* Stepping ID */
-u_int cpu_procinfo = 0; /* HyperThreading Info / Brand Index / CLFUSH */
-char cpu_vendor[20] = ""; /* CPU Origin code */
-
-#ifdef CPU_ENABLE_SSE
-u_int cpu_fxsr; /* SSE enabled */
-#endif
-
-#ifdef I486_CPU
-/*
- * IBM Blue Lightning
- */
-static void
-init_bluelightning(void)
-{
-#if 0
- u_long eflags;
-
-#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
- need_post_dma_flush = 1;
-#endif
-
- eflags = read_eflags();
- disable_intr();
-
- load_cr0(rcr0() | CR0_CD | CR0_NW);
- invd();
-
-#ifdef CPU_BLUELIGHTNING_FPU_OP_CACHE
- wrmsr(0x1000, 0x9c92LL); /* FP operand can be cacheable on Cyrix
FPU */
-#else
- wrmsr(0x1000, 0x1c92LL); /* Intel FPU */
-#endif
- /* Enables 13MB and 0-640KB cache. */
- wrmsr(0x1001, (0xd0LL << 32) | 0x3ff);
-#ifdef CPU_BLUELIGHTNING_3X
- wrmsr(0x1002, 0x04000000LL); /* Enables triple-clock mode. */
-#else
- wrmsr(0x1002, 0x03000000LL); /* Enables double-clock mode. */
-#endif
-
- /* Enable caching in CR0. */
- load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */
- invd();
- write_eflags(eflags);
-#endif
-}
-
-/*
- * Cyrix 486SLC/DLC/SR/DR series
- */
-static void
-init_486dlc(void)
-{
- u_long eflags;
- u_char ccr0;
-
- eflags = read_eflags();
- disable_intr();
- invd();
-
- ccr0 = read_cyrix_reg(CCR0);
-#ifndef CYRIX_CACHE_WORKS
- ccr0 |= CCR0_NC1 | CCR0_BARB;
- write_cyrix_reg(CCR0, ccr0);
- invd();
-#else
- ccr0 &= ~CCR0_NC0;
-#ifndef CYRIX_CACHE_REALLY_WORKS
- ccr0 |= CCR0_NC1 | CCR0_BARB;
-#else
- ccr0 |= CCR0_NC1;
-#endif
-#ifdef CPU_DIRECT_MAPPED_CACHE
- ccr0 |= CCR0_CO; /* Direct mapped mode. */
-#endif
- write_cyrix_reg(CCR0, ccr0);
-
- /* Clear non-cacheable region. */
- write_cyrix_reg(NCR1+2, NCR_SIZE_0K);
- write_cyrix_reg(NCR2+2, NCR_SIZE_0K);
- write_cyrix_reg(NCR3+2, NCR_SIZE_0K);
- write_cyrix_reg(NCR4+2, NCR_SIZE_0K);
-
- write_cyrix_reg(0, 0); /* dummy write */
-
- /* Enable caching in CR0. */
- load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */
- invd();
-#endif /* !CYRIX_CACHE_WORKS */
- write_eflags(eflags);
-}
-
-
-/*
- * Cyrix 486S/DX series
- */
-static void
-init_cy486dx(void)
-{
- u_long eflags;
- u_char ccr2;
-
- eflags = read_eflags();
- disable_intr();
- invd();
-
- ccr2 = read_cyrix_reg(CCR2);
-#ifdef CPU_SUSP_HLT
- ccr2 |= CCR2_SUSP_HLT;
-#endif
-
-#ifdef PC98
- /* Enables WB cache interface pin and Lock NW bit in CR0. */
- ccr2 |= CCR2_WB | CCR2_LOCK_NW;
- /* Unlock NW bit in CR0. */
- write_cyrix_reg(CCR2, ccr2 & ~CCR2_LOCK_NW);
- load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0, NW = 1 */
-#endif
-
- write_cyrix_reg(CCR2, ccr2);
- write_eflags(eflags);
-}
-
-
-/*
- * Cyrix 5x86
- */
-static void
-init_5x86(void)
-{
- u_long eflags;
- u_char ccr2, ccr3, ccr4, pcr0;
-
- eflags = read_eflags();
- disable_intr();
-
- load_cr0(rcr0() | CR0_CD | CR0_NW);
- wbinvd();
-
- (void)read_cyrix_reg(CCR3); /* dummy */
-
- /* Initialize CCR2. */
- ccr2 = read_cyrix_reg(CCR2);
- ccr2 |= CCR2_WB;
-#ifdef CPU_SUSP_HLT
- ccr2 |= CCR2_SUSP_HLT;
-#else
- ccr2 &= ~CCR2_SUSP_HLT;
-#endif
- ccr2 |= CCR2_WT1;
- write_cyrix_reg(CCR2, ccr2);
-
- /* Initialize CCR4. */
- ccr3 = read_cyrix_reg(CCR3);
- write_cyrix_reg(CCR3, CCR3_MAPEN0);
-
- ccr4 = read_cyrix_reg(CCR4);
- ccr4 |= CCR4_DTE;
- ccr4 |= CCR4_MEM;
-#ifdef CPU_FASTER_5X86_FPU
- ccr4 |= CCR4_FASTFPE;
-#else
- ccr4 &= ~CCR4_FASTFPE;
-#endif
- ccr4 &= ~CCR4_IOMASK;
- /********************************************************************
- * WARNING: The "BIOS Writers Guide" mentions that I/O recovery time
- * should be 0 for errata fix.
- ********************************************************************/
-#ifdef CPU_IORT
- ccr4 |= CPU_IORT & CCR4_IOMASK;
-#endif
- write_cyrix_reg(CCR4, ccr4);
-
- /* Initialize PCR0. */
- /****************************************************************
- * WARNING: RSTK_EN and LOOP_EN could make your system unstable.
- * BTB_EN might make your system unstable.
- ****************************************************************/
- pcr0 = read_cyrix_reg(PCR0);
-#ifdef CPU_RSTK_EN
- pcr0 |= PCR0_RSTK;
-#else
- pcr0 &= ~PCR0_RSTK;
-#endif
-#ifdef CPU_BTB_EN
- pcr0 |= PCR0_BTB;
-#else
- pcr0 &= ~PCR0_BTB;
-#endif
-#ifdef CPU_LOOP_EN
- pcr0 |= PCR0_LOOP;
-#else
- pcr0 &= ~PCR0_LOOP;
-#endif
-
- /****************************************************************
- * WARNING: if you use a memory mapped I/O device, don't use
- * DISABLE_5X86_LSSER option, which may reorder memory mapped
- * I/O access.
- * IF YOUR MOTHERBOARD HAS PCI BUS, DON'T DISABLE LSSER.
- ****************************************************************/
-#ifdef CPU_DISABLE_5X86_LSSER
- pcr0 &= ~PCR0_LSSER;
-#else
- pcr0 |= PCR0_LSSER;
-#endif
- write_cyrix_reg(PCR0, pcr0);
-
- /* Restore CCR3. */
- write_cyrix_reg(CCR3, ccr3);
-
- (void)read_cyrix_reg(0x80); /* dummy */
-
- /* Unlock NW bit in CR0. */
- write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW);
- load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0, NW = 1 */
- /* Lock NW bit in CR0. */
- write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW);
-
- write_eflags(eflags);
-}
-
-#ifdef CPU_I486_ON_386
-/*
- * There are i486 based upgrade products for i386 machines.
- * In this case, BIOS doesn't enables CPU cache.
- */
-static void
-init_i486_on_386(void)
-{
- u_long eflags;
-
-#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
- need_post_dma_flush = 1;
-#endif
-
- eflags = read_eflags();
- disable_intr();
-
- load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0, NW = 0 */
-
- write_eflags(eflags);
-}
-#endif
-
-/*
- * Cyrix 6x86
- *
- * XXX - What should I do here? Please let me know.
- */
-static void
-init_6x86(void)
-{
- u_long eflags;
- u_char ccr3, ccr4;
-
- eflags = read_eflags();
- disable_intr();
-
- load_cr0(rcr0() | CR0_CD | CR0_NW);
- wbinvd();
-
- /* Initialize CCR0. */
- write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1);
-
- /* Initialize CCR1. */
-#ifdef CPU_CYRIX_NO_LOCK
- write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK);
-#else
- write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK);
-#endif
-
- /* Initialize CCR2. */
-#ifdef CPU_SUSP_HLT
- write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT);
-#else
- write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT);
-#endif
-
- ccr3 = read_cyrix_reg(CCR3);
- write_cyrix_reg(CCR3, CCR3_MAPEN0);
-
- /* Initialize CCR4. */
- ccr4 = read_cyrix_reg(CCR4);
- ccr4 |= CCR4_DTE;
- ccr4 &= ~CCR4_IOMASK;
-#ifdef CPU_IORT
- write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK));
-#else
- write_cyrix_reg(CCR4, ccr4 | 7);
-#endif
-
- /* Initialize CCR5. */
-#ifdef CPU_WT_ALLOC
- write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC);
-#endif
-
- /* Restore CCR3. */
- write_cyrix_reg(CCR3, ccr3);
-
- /* Unlock NW bit in CR0. */
- write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW);
-
- /*
- * Earlier revision of the 6x86 CPU could crash the system if
- * L1 cache is in write-back mode.
- */
- if ((cyrix_did & 0xff00) > 0x1600)
- load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */
- else {
- /* Revision 2.6 and lower. */
-#ifdef CYRIX_CACHE_REALLY_WORKS
- load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */
-#else
- load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0 and NW = 1 */
-#endif
- }
-
- /* Lock NW bit in CR0. */
- write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW);
-
- write_eflags(eflags);
-}
-#endif /* I486_CPU */
-
-#ifdef I686_CPU
-/*
- * Cyrix 6x86MX (code-named M2)
- *
- * XXX - What should I do here? Please let me know.
- */
-static void
-init_6x86MX(void)
-{
-#if 0
- u_long eflags;
- u_char ccr3, ccr4;
-
- eflags = read_eflags();
- disable_intr();
-
- load_cr0(rcr0() | CR0_CD | CR0_NW);
- wbinvd();
-
- /* Initialize CCR0. */
- write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1);
-
- /* Initialize CCR1. */
-#ifdef CPU_CYRIX_NO_LOCK
- write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK);
-#else
- write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK);
-#endif
-
- /* Initialize CCR2. */
-#ifdef CPU_SUSP_HLT
- write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT);
-#else
- write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT);
-#endif
-
- ccr3 = read_cyrix_reg(CCR3);
- write_cyrix_reg(CCR3, CCR3_MAPEN0);
-
- /* Initialize CCR4. */
- ccr4 = read_cyrix_reg(CCR4);
- ccr4 &= ~CCR4_IOMASK;
-#ifdef CPU_IORT
- write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK));
-#else
- write_cyrix_reg(CCR4, ccr4 | 7);
-#endif
-
- /* Initialize CCR5. */
-#ifdef CPU_WT_ALLOC
- write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC);
-#endif
-
- /* Restore CCR3. */
- write_cyrix_reg(CCR3, ccr3);
-
- /* Unlock NW bit in CR0. */
- write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW);
-
- load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */
-
- /* Lock NW bit in CR0. */
- write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW);
-
- write_eflags(eflags);
-#endif
-}
-
-static void
-init_ppro(void)
-{
- u_int64_t apicbase;
-
- /*
- * Local APIC should be disabled if it is not going to be used.
- */
- apicbase = rdmsr(MSR_APICBASE);
- apicbase &= ~APICBASE_ENABLED;
- wrmsr(MSR_APICBASE, apicbase);
-}
-
-/*
- * Initialize BBL_CR_CTL3 (Control register 3: used to configure the
- * L2 cache).
- */
-static void
-init_mendocino(void)
-{
-#ifdef CPU_PPRO2CELERON
- u_long eflags;
- u_int64_t bbl_cr_ctl3;
-
- eflags = read_eflags();
- disable_intr();
-
- load_cr0(rcr0() | CR0_CD | CR0_NW);
- wbinvd();
-
- bbl_cr_ctl3 = rdmsr(MSR_BBL_CR_CTL3);
-
- /* If the L2 cache is configured, do nothing. */
- if (!(bbl_cr_ctl3 & 1)) {
- bbl_cr_ctl3 = 0x134052bLL;
-
- /* Set L2 Cache Latency (Default: 5). */
-#ifdef CPU_CELERON_L2_LATENCY
-#if CPU_L2_LATENCY > 15
-#error invalid CPU_L2_LATENCY.
-#endif
- bbl_cr_ctl3 |= CPU_L2_LATENCY << 1;
-#else
- bbl_cr_ctl3 |= 5 << 1;
-#endif
- wrmsr(MSR_BBL_CR_CTL3, bbl_cr_ctl3);
- }
-
- load_cr0(rcr0() & ~(CR0_CD | CR0_NW));
- write_eflags(eflags);
-#endif /* CPU_PPRO2CELERON */
-}
-
-#endif /* I686_CPU */
-
-/*
- * Initialize CR4 (Control register 4) to enable SSE instructions.
- */
-void
-enable_sse(void)
-{
-#ifdef XEN
- return;
-#endif
-#if defined(CPU_ENABLE_SSE)
- if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
- load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
- cpu_fxsr = hw_instruction_sse = 1;
- }
-#endif
-}
-
-void
-initializecpu(void)
-{
-
- switch (cpu) {
-#ifdef I486_CPU
- case CPU_BLUE:
- init_bluelightning();
- break;
- case CPU_486DLC:
- init_486dlc();
- break;
- case CPU_CY486DX:
- init_cy486dx();
- break;
- case CPU_M1SC:
- init_5x86();
- break;
-#ifdef CPU_I486_ON_386
- case CPU_486:
- init_i486_on_386();
- break;
-#endif
- case CPU_M1:
- init_6x86();
- break;
-#endif /* I486_CPU */
-#ifdef I686_CPU
- case CPU_M2:
- init_6x86MX();
- break;
- case CPU_686:
- if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
- switch (cpu_id & 0xff0) {
- case 0x610:
- init_ppro();
- break;
- case 0x660:
- init_mendocino();
- break;
- }
- } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
-#if defined(I686_CPU) && defined(CPU_ATHLON_SSE_HACK)
- /*
- * Sometimes the BIOS doesn't enable SSE instructions.
- * According to AMD document 20734, the mobile
- * Duron, the (mobile) Athlon 4 and the Athlon MP
- * support SSE. These correspond to cpu_id 0x66X
- * or 0x67X.
- */
- if ((cpu_feature & CPUID_XMM) == 0 &&
- ((cpu_id & ~0xf) == 0x660 ||
- (cpu_id & ~0xf) == 0x670 ||
- (cpu_id & ~0xf) == 0x680)) {
- u_int regs[4];
- wrmsr(0xC0010015, rdmsr(0xC0010015) & ~0x08000);
- do_cpuid(1, regs);
- cpu_feature = regs[3];
- }
-#endif
- }
- break;
-#endif
- default:
- break;
- }
- enable_sse();
-
-#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
- /*
- * OS should flush L1 cache by itself because no PC-98 supports
- * non-Intel CPUs. Use wbinvd instruction before DMA transfer
- * when need_pre_dma_flush = 1, use invd instruction after DMA
- * transfer when need_post_dma_flush = 1. If your CPU upgrade
- * product supports hardware cache control, you can add the
- * CPU_UPGRADE_HW_CACHE option in your kernel configuration file.
- * This option eliminates unneeded cache flush instruction(s).
- */
- if (strcmp(cpu_vendor, "CyrixInstead") == 0) {
- switch (cpu) {
-#ifdef I486_CPU
- case CPU_486DLC:
- need_post_dma_flush = 1;
- break;
- case CPU_M1SC:
- need_pre_dma_flush = 1;
- break;
- case CPU_CY486DX:
- need_pre_dma_flush = 1;
-#ifdef CPU_I486_ON_386
- need_post_dma_flush = 1;
-#endif
- break;
-#endif
- default:
- break;
- }
- } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
- switch (cpu_id & 0xFF0) {
- case 0x470: /* Enhanced Am486DX2 WB */
- case 0x490: /* Enhanced Am486DX4 WB */
- case 0x4F0: /* Am5x86 WB */
- need_pre_dma_flush = 1;
- break;
- }
- } else if (strcmp(cpu_vendor, "IBM") == 0) {
- need_post_dma_flush = 1;
- } else {
-#ifdef CPU_I486_ON_386
- need_pre_dma_flush = 1;
-#endif
- }
-#endif /* PC98 && !CPU_UPGRADE_HW_CACHE */
-}
-
-#if defined(I586_CPU) && defined(CPU_WT_ALLOC)
-/*
- * Enable write allocate feature of AMD processors.
- * Following two functions require the Maxmem variable being set.
- */
-void
-enable_K5_wt_alloc(void)
-{
- u_int64_t msr;
- register_t savecrit;
-
- /*
- * Write allocate is supported only on models 1, 2, and 3, with
- * a stepping of 4 or greater.
- */
- if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) {
- savecrit = intr_disable();
- msr = rdmsr(0x83); /* HWCR */
- wrmsr(0x83, msr & !(0x10));
-
- /*
- * We have to tell the chip where the top of memory is,
- * since video cards could have frame bufferes there,
- * memory-mapped I/O could be there, etc.
- */
- if(Maxmem > 0)
- msr = Maxmem / 16;
- else
- msr = 0;
- msr |= AMD_WT_ALLOC_TME | AMD_WT_ALLOC_FRE;
-#ifdef PC98
- if (!(inb(0x43b) & 4)) {
- wrmsr(0x86, 0x0ff00f0);
- msr |= AMD_WT_ALLOC_PRE;
- }
-#else
- /*
- * There is no way to know wheter 15-16M hole exists or not.
- * Therefore, we disable write allocate for this range.
- */
- wrmsr(0x86, 0x0ff00f0);
- msr |= AMD_WT_ALLOC_PRE;
-#endif
- wrmsr(0x85, msr);
-
- msr=rdmsr(0x83);
- wrmsr(0x83, msr|0x10); /* enable write allocate */
- intr_restore(savecrit);
- }
-}
-
-void
-enable_K6_wt_alloc(void)
-{
- quad_t size;
- u_int64_t whcr;
- u_long eflags;
-
- eflags = read_eflags();
- disable_intr();
- wbinvd();
-
-#ifdef CPU_DISABLE_CACHE
- /*
- * Certain K6-2 box becomes unstable when write allocation is
- * enabled.
- */
- /*
- * The AMD-K6 processer provides the 64-bit Test Register 12(TR12),
- * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported.
- * All other bits in TR12 have no effect on the processer's operation.
- * The I/O Trap Restart function (bit 9 of TR12) is always enabled
- * on the AMD-K6.
- */
- wrmsr(0x0000000e, (u_int64_t)0x0008);
-#endif
- /* Don't assume that memory size is aligned with 4M. */
- if (Maxmem > 0)
- size = ((Maxmem >> 8) + 3) >> 2;
- else
- size = 0;
-
- /* Limit is 508M bytes. */
- if (size > 0x7f)
- size = 0x7f;
- whcr = (rdmsr(0xc0000082) & ~(0x7fLL << 1)) | (size << 1);
-
-#if defined(PC98) || defined(NO_MEMORY_HOLE)
- if (whcr & (0x7fLL << 1)) {
-#ifdef PC98
- /*
- * If bit 2 of port 0x43b is 0, disable wrte allocate for the
- * 15-16M range.
- */
- if (!(inb(0x43b) & 4))
- whcr &= ~0x0001LL;
- else
-#endif
- whcr |= 0x0001LL;
- }
-#else
- /*
- * There is no way to know wheter 15-16M hole exists or not.
- * Therefore, we disable write allocate for this range.
- */
- whcr &= ~0x0001LL;
-#endif
- wrmsr(0x0c0000082, whcr);
-
- write_eflags(eflags);
-}
-
-void
-enable_K6_2_wt_alloc(void)
-{
- quad_t size;
- u_int64_t whcr;
- u_long eflags;
-
- eflags = read_eflags();
- disable_intr();
- wbinvd();
-
-#ifdef CPU_DISABLE_CACHE
- /*
- * Certain K6-2 box becomes unstable when write allocation is
- * enabled.
- */
- /*
- * The AMD-K6 processer provides the 64-bit Test Register 12(TR12),
- * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported.
- * All other bits in TR12 have no effect on the processer's operation.
- * The I/O Trap Restart function (bit 9 of TR12) is always enabled
- * on the AMD-K6.
- */
- wrmsr(0x0000000e, (u_int64_t)0x0008);
-#endif
- /* Don't assume that memory size is aligned with 4M. */
- if (Maxmem > 0)
- size = ((Maxmem >> 8) + 3) >> 2;
- else
- size = 0;
-
- /* Limit is 4092M bytes. */
- if (size > 0x3fff)
- size = 0x3ff;
- whcr = (rdmsr(0xc0000082) & ~(0x3ffLL << 22)) | (size << 22);
-
-#if defined(PC98) || defined(NO_MEMORY_HOLE)
- if (whcr & (0x3ffLL << 22)) {
-#ifdef PC98
- /*
- * If bit 2 of port 0x43b is 0, disable wrte allocate for the
- * 15-16M range.
- */
- if (!(inb(0x43b) & 4))
- whcr &= ~(1LL << 16);
- else
-#endif
- whcr |= 1LL << 16;
- }
-#else
- /*
- * There is no way to know wheter 15-16M hole exists or not.
- * Therefore, we disable write allocate for this range.
- */
- whcr &= ~(1LL << 16);
-#endif
- wrmsr(0x0c0000082, whcr);
-
- write_eflags(eflags);
-}
-#endif /* I585_CPU && CPU_WT_ALLOC */
-
-#include "opt_ddb.h"
-#ifdef DDB
-#include <ddb/ddb.h>
-#if 0
-DB_SHOW_COMMAND(cyrixreg, cyrixreg)
-{
- u_long eflags;
- u_int cr0;
- u_char ccr1, ccr2, ccr3;
- u_char ccr0 = 0, ccr4 = 0, ccr5 = 0, pcr0 = 0;
-
- cr0 = rcr0();
- if (strcmp(cpu_vendor,"CyrixInstead") == 0) {
- eflags = read_eflags();
- disable_intr();
-
-
- if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) {
- ccr0 = read_cyrix_reg(CCR0);
- }
- ccr1 = read_cyrix_reg(CCR1);
- ccr2 = read_cyrix_reg(CCR2);
- ccr3 = read_cyrix_reg(CCR3);
- if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) {
- write_cyrix_reg(CCR3, CCR3_MAPEN0);
- ccr4 = read_cyrix_reg(CCR4);
- if ((cpu == CPU_M1) || (cpu == CPU_M2))
- ccr5 = read_cyrix_reg(CCR5);
- else
- pcr0 = read_cyrix_reg(PCR0);
- write_cyrix_reg(CCR3, ccr3); /* Restore
CCR3. */
- }
- write_eflags(eflags);
-
- if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX))
- printf("CCR0=%x, ", (u_int)ccr0);
-
- printf("CCR1=%x, CCR2=%x, CCR3=%x",
- (u_int)ccr1, (u_int)ccr2, (u_int)ccr3);
- if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) {
- printf(", CCR4=%x, ", (u_int)ccr4);
- if (cpu == CPU_M1SC)
- printf("PCR0=%x\n", pcr0);
- else
- printf("CCR5=%x\n", ccr5);
- }
- }
- printf("CR0=%x\n", cr0);
-}
-#endif
-#endif /* DDB */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/intr_machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/intr_machdep.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,326 +0,0 @@
-/*-
- * Copyright (c) 2003 John Baldwin <jhb@xxxxxxxxxxx>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the author nor the names of any co-contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/i386/i386/intr_machdep.c,v 1.4 2003/11/17 06:10:14 peter
Exp $
- */
-
-/*
- * Machine dependent interrupt code for i386. For the i386, we have to
- * deal with different PICs. Thus, we use the passed in vector to lookup
- * an interrupt source associated with that vector. The interrupt source
- * describes which PIC the source belongs to and includes methods to handle
- * that source.
- */
-
-#include "opt_ddb.h"
-
-#include <sys/param.h>
-#include <sys/bus.h>
-#include <sys/interrupt.h>
-#include <sys/lock.h>
-#include <sys/ktr.h>
-#include <sys/kernel.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/syslog.h>
-#include <sys/systm.h>
-#include <machine/clock.h>
-#include <machine/intr_machdep.h>
-#ifdef DDB
-#include <ddb/ddb.h>
-#endif
-
-#define MAX_STRAY_LOG 5
-
-typedef void (*mask_fn)(uintptr_t vector);
-
-static int intrcnt_index;
-static struct intsrc *interrupt_sources[NUM_IO_INTS];
-static struct mtx intr_table_lock;
-
-static void intr_init(void *__dummy);
-static void intrcnt_setname(const char *name, int index);
-static void intrcnt_updatename(struct intsrc *is);
-static void intrcnt_register(struct intsrc *is);
-
-/*
- * Register a new interrupt source with the global interrupt system.
- * The global interrupts need to be disabled when this function is
- * called.
- */
-int
-intr_register_source(struct intsrc *isrc)
-{
- int error, vector;
-
- vector = isrc->is_pic->pic_vector(isrc);
- if (interrupt_sources[vector] != NULL)
- return (EEXIST);
- error = ithread_create(&isrc->is_ithread, (uintptr_t)isrc, 0,
- (mask_fn)isrc->is_pic->pic_disable_source,
- (mask_fn)isrc->is_pic->pic_enable_source, "irq%d:", vector);
- if (error)
- return (error);
- mtx_lock_spin(&intr_table_lock);
- if (interrupt_sources[vector] != NULL) {
- mtx_unlock_spin(&intr_table_lock);
- ithread_destroy(isrc->is_ithread);
- return (EEXIST);
- }
- intrcnt_register(isrc);
- interrupt_sources[vector] = isrc;
- mtx_unlock_spin(&intr_table_lock);
- return (0);
-}
-
-struct intsrc *
-intr_lookup_source(int vector)
-{
-
- return (interrupt_sources[vector]);
-}
-
-int
-intr_add_handler(const char *name, int vector, driver_intr_t handler,
- void *arg, enum intr_type flags, void **cookiep)
-{
- struct intsrc *isrc;
- int error;
-
- isrc = intr_lookup_source(vector);
- if (isrc == NULL)
- return (EINVAL);
-
- error = ithread_add_handler(isrc->is_ithread, name, handler, arg,
- ithread_priority(flags), flags, cookiep);
- if (error == 0) {
- intrcnt_updatename(isrc);
- isrc->is_pic->pic_enable_intr(isrc);
- isrc->is_pic->pic_enable_source(isrc);
- }
- return (error);
-}
-
-int
-intr_remove_handler(void *cookie)
-{
- int error;
-
- error = ithread_remove_handler(cookie);
-#ifdef XXX
- if (error == 0)
- intrcnt_updatename(/* XXX */);
-#endif
- return (error);
-}
-
-int
-intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol)
-{
- struct intsrc *isrc;
-
- isrc = intr_lookup_source(vector);
- if (isrc == NULL)
- return (EINVAL);
- return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
-}
-
-void
-intr_execute_handlers(struct intsrc *isrc, struct intrframe *iframe)
-{
- struct thread *td;
- struct ithd *it;
- struct intrhand *ih;
- int error, vector;
-
- td = curthread;
- td->td_intr_nesting_level++;
-
- /*
- * We count software interrupts when we process them. The
- * code here follows previous practice, but there's an
- * argument for counting hardware interrupts when they're
- * processed too.
- */
- atomic_add_long(isrc->is_count, 1);
- atomic_add_int(&cnt.v_intr, 1);
-
- it = isrc->is_ithread;
- if (it == NULL)
- ih = NULL;
- else
- ih = TAILQ_FIRST(&it->it_handlers);
-
- /*
- * XXX: We assume that IRQ 0 is only used for the ISA timer
- * device (clk).
- */
- vector = isrc->is_pic->pic_vector(isrc);
- if (vector == 0)
- clkintr_pending = 1;
-
-
- if (ih != NULL && ih->ih_flags & IH_FAST) {
- /*
- * Execute fast interrupt handlers directly.
- * To support clock handlers, if a handler registers
- * with a NULL argument, then we pass it a pointer to
- * a trapframe as its argument.
- */
- critical_enter();
- TAILQ_FOREACH(ih, &it->it_handlers, ih_next) {
- MPASS(ih->ih_flags & IH_FAST);
- CTR3(KTR_INTR, "%s: executing handler %p(%p)",
- __func__, ih->ih_handler,
- ih->ih_argument == NULL ? iframe :
- ih->ih_argument);
- if (ih->ih_argument == NULL)
- ih->ih_handler(iframe);
- else
- ih->ih_handler(ih->ih_argument);
- }
- isrc->is_pic->pic_eoi_source(isrc);
- error = 0;
- /* XXX */
- td->td_pflags &= ~TDP_OWEPREEMPT;
- critical_exit();
- } else {
- /*
- * For stray and threaded interrupts, we mask and EOI the
- * source.
- */
- isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
- if (ih == NULL)
- error = EINVAL;
- else
- error = ithread_schedule(it);
- isrc->is_pic->pic_eoi_source(isrc);
- }
-
- if (error == EINVAL) {
- atomic_add_long(isrc->is_straycount, 1);
- if (*isrc->is_straycount < MAX_STRAY_LOG)
- log(LOG_ERR, "stray irq%d\n", vector);
- else if (*isrc->is_straycount == MAX_STRAY_LOG)
- log(LOG_CRIT,
- "too many stray irq %d's: not logging anymore\n",
- vector);
- }
- td->td_intr_nesting_level--;
-
-}
-
-void
-intr_resume(void)
-{
- struct intsrc **isrc;
- int i;
-
- mtx_lock_spin(&intr_table_lock);
- for (i = 0, isrc = interrupt_sources; i < NUM_IO_INTS; i++, isrc++)
- if (*isrc != NULL && (*isrc)->is_pic->pic_resume != NULL)
- (*isrc)->is_pic->pic_resume(*isrc);
- mtx_unlock_spin(&intr_table_lock);
-}
-
-void
-intr_suspend(void)
-{
- struct intsrc **isrc;
- int i;
-
- mtx_lock_spin(&intr_table_lock);
- for (i = 0, isrc = interrupt_sources; i < NUM_IO_INTS; i++, isrc++)
- if (*isrc != NULL && (*isrc)->is_pic->pic_suspend != NULL)
- (*isrc)->is_pic->pic_suspend(*isrc);
- mtx_unlock_spin(&intr_table_lock);
-}
-
-static void
-intrcnt_setname(const char *name, int index)
-{
-
- snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s",
- MAXCOMLEN, name);
-}
-
-static void
-intrcnt_updatename(struct intsrc *is)
-{
-
- intrcnt_setname(is->is_ithread->it_td->td_proc->p_comm, is->is_index);
-}
-
-static void
-intrcnt_register(struct intsrc *is)
-{
- char straystr[MAXCOMLEN + 1];
-
- /* mtx_assert(&intr_table_lock, MA_OWNED); */
- KASSERT(is->is_ithread != NULL, ("%s: isrc with no ithread", __func__));
- is->is_index = intrcnt_index;
- intrcnt_index += 2;
- snprintf(straystr, MAXCOMLEN + 1, "stray irq%d",
- is->is_pic->pic_vector(is));
- intrcnt_updatename(is);
- is->is_count = &intrcnt[is->is_index];
- intrcnt_setname(straystr, is->is_index + 1);
- is->is_straycount = &intrcnt[is->is_index + 1];
-}
-
-static void
-intr_init(void *dummy __unused)
-{
-
- intrcnt_setname("???", 0);
- intrcnt_index = 1;
- mtx_init(&intr_table_lock, "intr table", NULL, MTX_SPIN);
-}
-SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL)
-
-#ifdef DDB
-/*
- * Dump data about interrupt handlers
- */
-DB_SHOW_COMMAND(irqs, db_show_irqs)
-{
- struct intsrc **isrc;
- int i, quit, verbose;
-
- quit = 0;
- if (strcmp(modif, "v") == 0)
- verbose = 1;
- else
- verbose = 0;
- isrc = interrupt_sources;
- db_setup_paging(db_simple_pager, &quit, DB_LINES_PER_PAGE);
- for (i = 0; i < NUM_IO_INTS && !quit; i++, isrc++)
- if (*isrc != NULL)
- db_dump_ithread((*isrc)->is_ithread, verbose);
-}
-#endif
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/io_apic.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/io_apic.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,850 +0,0 @@
-/*-
- * Copyright (c) 2003 John Baldwin <jhb@xxxxxxxxxxx>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the author nor the names of any co-contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/io_apic.c,v 1.14 2004/08/02 15:31:10
scottl Exp $");
-
-#include "opt_isa.h"
-#include "opt_no_mixed_mode.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-
-#include <machine/apicreg.h>
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <machine/apicvar.h>
-#include <machine/segments.h>
-
-#define IOAPIC_ISA_INTS 16
-#define IOAPIC_MEM_REGION 32
-#define IOAPIC_REDTBL_LO(i) (IOAPIC_REDTBL + (i) * 2)
-#define IOAPIC_REDTBL_HI(i) (IOAPIC_REDTBL_LO(i) + 1)
-
-#define VECTOR_EXTINT 252
-#define VECTOR_NMI 253
-#define VECTOR_SMI 254
-#define VECTOR_DISABLED 255
-
-#define DEST_NONE -1
-#define DEST_EXTINT -2
-
-#define TODO printf("%s: not implemented!\n", __func__)
-
-MALLOC_DEFINE(M_IOAPIC, "I/O APIC", "I/O APIC structures");
-
-/*
- * New interrupt support code..
- *
- * XXX: we really should have the interrupt cookie passed up from new-bus
- * just be a int pin, and not map 1:1 to interrupt vector number but should
- * use INTR_TYPE_FOO to set priority bands for device classes and do all the
- * magic remapping of intpin to vector in here. For now we just cheat as on
- * ia64 and map intpin X to vector NRSVIDT + X. Note that we assume that the
- * first IO APIC has ISA interrupts on pins 1-15. Not sure how you are
- * really supposed to figure out which IO APIC in a system with multiple IO
- * APIC's actually has the ISA interrupts routed to it. As far as interrupt
- * pin numbers, we use the ACPI System Interrupt number model where each
- * IO APIC has a contiguous chunk of the System Interrupt address space.
- */
-
-/*
- * Direct the ExtINT pin on the first I/O APIC to a logical cluster of
- * CPUs rather than a physical destination of just the BSP.
- *
- * Note: This is disabled by default as test systems seem to croak with it
- * enabled.
-#define ENABLE_EXTINT_LOGICAL_DESTINATION
- */
-
-struct ioapic_intsrc {
- struct intsrc io_intsrc;
- u_int io_intpin:8;
- u_int io_vector:8;
- u_int io_activehi:1;
- u_int io_edgetrigger:1;
- u_int io_masked:1;
- int io_dest:5;
- int io_bus:4;
-};
-
-struct ioapic {
- struct pic io_pic;
- u_int io_id:8; /* logical ID */
- u_int io_apic_id:4;
- u_int io_intbase:8; /* System Interrupt base */
- u_int io_numintr:8;
- volatile ioapic_t *io_addr; /* XXX: should use bus_space */
- STAILQ_ENTRY(ioapic) io_next;
- struct ioapic_intsrc io_pins[0];
-};
-
-static u_int ioapic_read(volatile ioapic_t *apic, int reg);
-static void ioapic_write(volatile ioapic_t *apic, int reg, u_int val);
-static const char *ioapic_bus_string(int bus_type);
-static void ioapic_print_vector(struct ioapic_intsrc *intpin);
-static void ioapic_enable_source(struct intsrc *isrc);
-static void ioapic_disable_source(struct intsrc *isrc, int eoi);
-static void ioapic_eoi_source(struct intsrc *isrc);
-static void ioapic_enable_intr(struct intsrc *isrc);
-static int ioapic_vector(struct intsrc *isrc);
-static int ioapic_source_pending(struct intsrc *isrc);
-static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
- enum intr_polarity pol);
-static void ioapic_suspend(struct intsrc *isrc);
-static void ioapic_resume(struct intsrc *isrc);
-static void ioapic_program_destination(struct ioapic_intsrc *intpin);
-static void ioapic_program_intpin(struct ioapic_intsrc *intpin);
-static void ioapic_setup_mixed_mode(struct ioapic_intsrc *intpin);
-
-static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list);
-struct pic ioapic_template = { ioapic_enable_source, ioapic_disable_source,
- ioapic_eoi_source, ioapic_enable_intr,
- ioapic_vector, ioapic_source_pending,
- ioapic_suspend, ioapic_resume,
- ioapic_config_intr };
-
-static int bsp_id, current_cluster, logical_clusters, next_ioapic_base;
-static u_int mixed_mode_enabled, next_id, program_logical_dest;
-#ifdef NO_MIXED_MODE
-static int mixed_mode_active = 0;
-#else
-static int mixed_mode_active = 1;
-#endif
-TUNABLE_INT("hw.apic.mixed_mode", &mixed_mode_active);
-
-static __inline void
-_ioapic_eoi_source(struct intsrc *isrc)
-{
- lapic_eoi();
-}
-
-static u_int
-ioapic_read(volatile ioapic_t *apic, int reg)
-{
-
- mtx_assert(&icu_lock, MA_OWNED);
- apic->ioregsel = reg;
- return (apic->iowin);
-}
-
-static void
-ioapic_write(volatile ioapic_t *apic, int reg, u_int val)
-{
-
- mtx_assert(&icu_lock, MA_OWNED);
- apic->ioregsel = reg;
- apic->iowin = val;
-}
-
-static const char *
-ioapic_bus_string(int bus_type)
-{
-
- switch (bus_type) {
- case APIC_BUS_ISA:
- return ("ISA");
- case APIC_BUS_EISA:
- return ("EISA");
- case APIC_BUS_PCI:
- return ("PCI");
- default:
- return ("unknown");
- }
-}
-
-static void
-ioapic_print_vector(struct ioapic_intsrc *intpin)
-{
-
- switch (intpin->io_vector) {
- case VECTOR_DISABLED:
- printf("disabled");
- break;
- case VECTOR_EXTINT:
- printf("ExtINT");
- break;
- case VECTOR_NMI:
- printf("NMI");
- break;
- case VECTOR_SMI:
- printf("SMI");
- break;
- default:
- printf("%s IRQ %u", ioapic_bus_string(intpin->io_bus),
- intpin->io_vector);
- }
-}
-
-static void
-ioapic_enable_source(struct intsrc *isrc)
-{
- struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
- struct ioapic *io = (struct ioapic *)isrc->is_pic;
- uint32_t flags;
-
- mtx_lock_spin(&icu_lock);
- if (intpin->io_masked) {
- flags = ioapic_read(io->io_addr,
- IOAPIC_REDTBL_LO(intpin->io_intpin));
- flags &= ~(IOART_INTMASK);
- ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
- flags);
- intpin->io_masked = 0;
- }
- mtx_unlock_spin(&icu_lock);
-}
-
-static void
-ioapic_disable_source(struct intsrc *isrc, int eoi)
-{
- struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
- struct ioapic *io = (struct ioapic *)isrc->is_pic;
- uint32_t flags;
-
- mtx_lock_spin(&icu_lock);
- if (!intpin->io_masked && !intpin->io_edgetrigger) {
- flags = ioapic_read(io->io_addr,
- IOAPIC_REDTBL_LO(intpin->io_intpin));
- flags |= IOART_INTMSET;
- ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
- flags);
- intpin->io_masked = 1;
- }
-
- if (eoi == PIC_EOI)
- _ioapic_eoi_source(isrc);
-
- mtx_unlock_spin(&icu_lock);
-}
-
-static void
-ioapic_eoi_source(struct intsrc *isrc)
-{
-
- _ioapic_eoi_source(isrc);
-}
-
-/*
- * Completely program an intpin based on the data in its interrupt source
- * structure.
- */
-static void
-ioapic_program_intpin(struct ioapic_intsrc *intpin)
-{
- struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic;
- uint32_t low, high, value;
-
- /*
- * For pins routed via mixed mode or disabled, just ensure that
- * they are masked.
- */
- if (intpin->io_dest == DEST_EXTINT ||
- intpin->io_vector == VECTOR_DISABLED) {
- low = ioapic_read(io->io_addr,
- IOAPIC_REDTBL_LO(intpin->io_intpin));
- if ((low & IOART_INTMASK) == IOART_INTMCLR)
- ioapic_write(io->io_addr,
- IOAPIC_REDTBL_LO(intpin->io_intpin),
- low | IOART_INTMSET);
- return;
- }
-
- /* Set the destination. */
- if (intpin->io_dest == DEST_NONE) {
- low = IOART_DESTPHY;
- high = bsp_id << APIC_ID_SHIFT;
- } else {
- low = IOART_DESTLOG;
- high = (intpin->io_dest << APIC_ID_CLUSTER_SHIFT |
- APIC_ID_CLUSTER_ID) << APIC_ID_SHIFT;
- }
-
- /* Program the rest of the low word. */
- if (intpin->io_edgetrigger)
- low |= IOART_TRGREDG;
- else
- low |= IOART_TRGRLVL;
- if (intpin->io_activehi)
- low |= IOART_INTAHI;
- else
- low |= IOART_INTALO;
- if (intpin->io_masked)
- low |= IOART_INTMSET;
- switch (intpin->io_vector) {
- case VECTOR_EXTINT:
- KASSERT(intpin->io_edgetrigger,
- ("EXTINT not edge triggered"));
- low |= IOART_DELEXINT;
- break;
- case VECTOR_NMI:
- KASSERT(intpin->io_edgetrigger,
- ("NMI not edge triggered"));
- low |= IOART_DELNMI;
- break;
- case VECTOR_SMI:
- KASSERT(intpin->io_edgetrigger,
- ("SMI not edge triggered"));
- low |= IOART_DELSMI;
- break;
- default:
- low |= IOART_DELLOPRI | apic_irq_to_idt(intpin->io_vector);
- }
-
- /* Write the values to the APIC. */
- mtx_lock_spin(&icu_lock);
- ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low);
- value = ioapic_read(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin));
- value &= ~IOART_DEST;
- value |= high;
- ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin), value);
- mtx_unlock_spin(&icu_lock);
-}
-
-/*
- * Program an individual intpin's logical destination.
- */
-static void
-ioapic_program_destination(struct ioapic_intsrc *intpin)
-{
- struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic;
-
- KASSERT(intpin->io_dest != DEST_NONE,
- ("intpin not assigned to a cluster"));
- KASSERT(intpin->io_dest != DEST_EXTINT,
- ("intpin routed via ExtINT"));
- if (bootverbose) {
- printf("ioapic%u: routing intpin %u (", io->io_id,
- intpin->io_intpin);
- ioapic_print_vector(intpin);
- printf(") to cluster %u\n", intpin->io_dest);
- }
- ioapic_program_intpin(intpin);
-}
-
-static void
-ioapic_assign_cluster(struct ioapic_intsrc *intpin)
-{
-
- /*
- * Assign this intpin to a logical APIC cluster in a
- * round-robin fashion. We don't actually use the logical
- * destination for this intpin until after all the CPU's
- * have been started so that we don't end up with interrupts
- * that don't go anywhere. Another alternative might be to
- * start up the CPU's earlier so that they can handle interrupts
- * sooner.
- */
- intpin->io_dest = current_cluster;
- current_cluster++;
- if (current_cluster >= logical_clusters)
- current_cluster = 0;
- if (program_logical_dest)
- ioapic_program_destination(intpin);
-}
-
-static void
-ioapic_enable_intr(struct intsrc *isrc)
-{
- struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
-
- KASSERT(intpin->io_dest != DEST_EXTINT,
- ("ExtINT pin trying to use ioapic enable_intr method"));
- if (intpin->io_dest == DEST_NONE) {
- ioapic_assign_cluster(intpin);
- lapic_enable_intr(intpin->io_vector);
- }
-}
-
-static int
-ioapic_vector(struct intsrc *isrc)
-{
- struct ioapic_intsrc *pin;
-
- pin = (struct ioapic_intsrc *)isrc;
- return (pin->io_vector);
-}
-
-static int
-ioapic_source_pending(struct intsrc *isrc)
-{
- struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
-
- return (lapic_intr_pending(intpin->io_vector));
-}
-
-static int
-ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
- enum intr_polarity pol)
-{
- struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
- struct ioapic *io = (struct ioapic *)isrc->is_pic;
- int changed;
-
- KASSERT(!(trig == INTR_TRIGGER_CONFORM || pol == INTR_POLARITY_CONFORM),
- ("%s: Conforming trigger or polarity\n", __func__));
-
- /*
- * EISA interrupts always use active high polarity, so don't allow
- * them to be set to active low.
- *
- * XXX: Should we write to the ELCR if the trigger mode changes for
- * an EISA IRQ?
- */
- if (intpin->io_bus == APIC_BUS_EISA)
- pol = INTR_POLARITY_HIGH;
- changed = 0;
- if (intpin->io_edgetrigger != (trig == INTR_TRIGGER_EDGE)) {
- if (bootverbose)
- printf("ioapic%u: Changing trigger for pin %u to %s\n",
- io->io_id, intpin->io_intpin,
- trig == INTR_TRIGGER_EDGE ? "edge" : "level");
- intpin->io_edgetrigger = (trig == INTR_TRIGGER_EDGE);
- changed++;
- }
- if (intpin->io_activehi != (pol == INTR_POLARITY_HIGH)) {
- if (bootverbose)
- printf("ioapic%u: Changing polarity for pin %u to %s\n",
- io->io_id, intpin->io_intpin,
- pol == INTR_POLARITY_HIGH ? "high" : "low");
- intpin->io_activehi = (pol == INTR_POLARITY_HIGH);
- changed++;
- }
- if (changed)
- ioapic_program_intpin(intpin);
- return (0);
-}
-
-static void
-ioapic_suspend(struct intsrc *isrc)
-{
-
- TODO;
-}
-
-static void
-ioapic_resume(struct intsrc *isrc)
-{
-
- ioapic_program_intpin((struct ioapic_intsrc *)isrc);
-}
-
-/*
- * APIC enumerators call this function to indicate that the 8259A AT PICs
- * are available and that mixed mode can be used.
- */
-void
-ioapic_enable_mixed_mode(void)
-{
-
- mixed_mode_enabled = 1;
-}
-
-/*
- * Allocate and return a logical cluster ID. Note that the first time
- * this is called, it returns cluster 0. ioapic_enable_intr() treats
- * the two cases of logical_clusters == 0 and logical_clusters == 1 the
- * same: one cluster of ID 0 exists. The logical_clusters == 0 case is
- * for UP kernels, which should never call this function.
- */
-int
-ioapic_next_logical_cluster(void)
-{
-
- if (logical_clusters >= APIC_MAX_CLUSTER)
- panic("WARNING: Local APIC cluster IDs exhausted!");
- return (logical_clusters++);
-}
-
-/*
- * Create a plain I/O APIC object.
- */
-void *
-ioapic_create(uintptr_t addr, int32_t apic_id, int intbase)
-{
- struct ioapic *io;
- struct ioapic_intsrc *intpin;
- volatile ioapic_t *apic;
- u_int numintr, i;
- uint32_t value;
-
- apic = (ioapic_t *)pmap_mapdev(addr, IOAPIC_MEM_REGION);
- mtx_lock_spin(&icu_lock);
- numintr = ((ioapic_read(apic, IOAPIC_VER) & IOART_VER_MAXREDIR) >>
- MAXREDIRSHIFT) + 1;
- mtx_unlock_spin(&icu_lock);
- io = malloc(sizeof(struct ioapic) +
- numintr * sizeof(struct ioapic_intsrc), M_IOAPIC, M_WAITOK);
- io->io_pic = ioapic_template;
- mtx_lock_spin(&icu_lock);
- io->io_id = next_id++;
- io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT;
- if (apic_id != -1 && io->io_apic_id != apic_id) {
- ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT);
- mtx_unlock_spin(&icu_lock);
- io->io_apic_id = apic_id;
- printf("ioapic%u: Changing APIC ID to %d\n", io->io_id,
- apic_id);
- } else
- mtx_unlock_spin(&icu_lock);
- if (intbase == -1) {
- intbase = next_ioapic_base;
- printf("ioapic%u: Assuming intbase of %d\n", io->io_id,
- intbase);
- } else if (intbase != next_ioapic_base)
- printf("ioapic%u: WARNING: intbase %d != expected base %d\n",
- io->io_id, intbase, next_ioapic_base);
- io->io_intbase = intbase;
- next_ioapic_base = intbase + numintr;
- io->io_numintr = numintr;
- io->io_addr = apic;
-
- /*
- * Initialize pins. Start off with interrupts disabled. Default
- * to active-hi and edge-triggered for ISA interrupts and active-lo
- * and level-triggered for all others.
- */
- bzero(io->io_pins, sizeof(struct ioapic_intsrc) * numintr);
- mtx_lock_spin(&icu_lock);
- for (i = 0, intpin = io->io_pins; i < numintr; i++, intpin++) {
- intpin->io_intsrc.is_pic = (struct pic *)io;
- intpin->io_intpin = i;
- intpin->io_vector = intbase + i;
-
- /*
- * Assume that pin 0 on the first I/O APIC is an ExtINT pin
- * and that pins 1-15 are ISA interrupts. Assume that all
- * other pins are PCI interrupts.
- */
- if (intpin->io_vector == 0)
- ioapic_set_extint(io, i);
- else if (intpin->io_vector < IOAPIC_ISA_INTS) {
- intpin->io_bus = APIC_BUS_ISA;
- intpin->io_activehi = 1;
- intpin->io_edgetrigger = 1;
- intpin->io_masked = 1;
- } else {
- intpin->io_bus = APIC_BUS_PCI;
- intpin->io_activehi = 0;
- intpin->io_edgetrigger = 0;
- intpin->io_masked = 1;
- }
-
- /*
- * Route interrupts to the BSP by default using physical
- * addressing. Vectored interrupts get readdressed using
- * logical IDs to CPU clusters when they are enabled.
- */
- intpin->io_dest = DEST_NONE;
- if (bootverbose && intpin->io_vector != VECTOR_DISABLED) {
- printf("ioapic%u: intpin %d -> ", io->io_id, i);
- ioapic_print_vector(intpin);
- printf(" (%s, %s)\n", intpin->io_edgetrigger ?
- "edge" : "level", intpin->io_activehi ? "high" :
- "low");
- }
- value = ioapic_read(apic, IOAPIC_REDTBL_LO(i));
- ioapic_write(apic, IOAPIC_REDTBL_LO(i), value | IOART_INTMSET);
- }
- mtx_unlock_spin(&icu_lock);
-
- return (io);
-}
-
-int
-ioapic_get_vector(void *cookie, u_int pin)
-{
- struct ioapic *io;
-
- io = (struct ioapic *)cookie;
- if (pin >= io->io_numintr)
- return (-1);
- return (io->io_pins[pin].io_vector);
-}
-
-int
-ioapic_disable_pin(void *cookie, u_int pin)
-{
- struct ioapic *io;
-
- io = (struct ioapic *)cookie;
- if (pin >= io->io_numintr)
- return (EINVAL);
- if (io->io_pins[pin].io_vector == VECTOR_DISABLED)
- return (EINVAL);
- io->io_pins[pin].io_vector = VECTOR_DISABLED;
- if (bootverbose)
- printf("ioapic%u: intpin %d disabled\n", io->io_id, pin);
- return (0);
-}
-
-int
-ioapic_remap_vector(void *cookie, u_int pin, int vector)
-{
- struct ioapic *io;
-
- io = (struct ioapic *)cookie;
- if (pin >= io->io_numintr || vector < 0)
- return (EINVAL);
- if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
- return (EINVAL);
- io->io_pins[pin].io_vector = vector;
- if (bootverbose)
- printf("ioapic%u: Routing IRQ %d -> intpin %d\n", io->io_id,
- vector, pin);
- return (0);
-}
-
-int
-ioapic_set_bus(void *cookie, u_int pin, int bus_type)
-{
- struct ioapic *io;
-
- if (bus_type < 0 || bus_type > APIC_BUS_MAX)
- return (EINVAL);
- io = (struct ioapic *)cookie;
- if (pin >= io->io_numintr)
- return (EINVAL);
- if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
- return (EINVAL);
- io->io_pins[pin].io_bus = bus_type;
- if (bootverbose)
- printf("ioapic%u: intpin %d bus %s\n", io->io_id, pin,
- ioapic_bus_string(bus_type));
- return (0);
-}
-
-int
-ioapic_set_nmi(void *cookie, u_int pin)
-{
- struct ioapic *io;
-
- io = (struct ioapic *)cookie;
- if (pin >= io->io_numintr)
- return (EINVAL);
- if (io->io_pins[pin].io_vector == VECTOR_NMI)
- return (0);
- if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
- return (EINVAL);
- io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
- io->io_pins[pin].io_vector = VECTOR_NMI;
- io->io_pins[pin].io_masked = 0;
- io->io_pins[pin].io_edgetrigger = 1;
- io->io_pins[pin].io_activehi = 1;
- if (bootverbose)
- printf("ioapic%u: Routing NMI -> intpin %d\n",
- io->io_id, pin);
- return (0);
-}
-
-int
-ioapic_set_smi(void *cookie, u_int pin)
-{
- struct ioapic *io;
-
- io = (struct ioapic *)cookie;
- if (pin >= io->io_numintr)
- return (EINVAL);
- if (io->io_pins[pin].io_vector == VECTOR_SMI)
- return (0);
- if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
- return (EINVAL);
- io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
- io->io_pins[pin].io_vector = VECTOR_SMI;
- io->io_pins[pin].io_masked = 0;
- io->io_pins[pin].io_edgetrigger = 1;
- io->io_pins[pin].io_activehi = 1;
- if (bootverbose)
- printf("ioapic%u: Routing SMI -> intpin %d\n",
- io->io_id, pin);
- return (0);
-}
-
-int
-ioapic_set_extint(void *cookie, u_int pin)
-{
- struct ioapic *io;
-
- io = (struct ioapic *)cookie;
- if (pin >= io->io_numintr)
- return (EINVAL);
- if (io->io_pins[pin].io_vector == VECTOR_EXTINT)
- return (0);
- if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
- return (EINVAL);
- io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
- io->io_pins[pin].io_vector = VECTOR_EXTINT;
-
- /* Enable this pin if mixed mode is available and active. */
- if (mixed_mode_enabled && mixed_mode_active)
- io->io_pins[pin].io_masked = 0;
- else
- io->io_pins[pin].io_masked = 1;
- io->io_pins[pin].io_edgetrigger = 1;
- io->io_pins[pin].io_activehi = 1;
- if (bootverbose)
- printf("ioapic%u: Routing external 8259A's -> intpin %d\n",
- io->io_id, pin);
- return (0);
-}
-
-int
-ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol)
-{
- struct ioapic *io;
-
- io = (struct ioapic *)cookie;
- if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM)
- return (EINVAL);
- if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
- return (EINVAL);
- io->io_pins[pin].io_activehi = (pol == INTR_POLARITY_HIGH);
- if (bootverbose)
- printf("ioapic%u: intpin %d polarity: %s\n", io->io_id, pin,
- pol == INTR_POLARITY_HIGH ? "high" : "low");
- return (0);
-}
-
-int
-ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger)
-{
- struct ioapic *io;
-
- io = (struct ioapic *)cookie;
- if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM)
- return (EINVAL);
- if (io->io_pins[pin].io_vector >= NUM_IO_INTS)
- return (EINVAL);
- io->io_pins[pin].io_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
- if (bootverbose)
- printf("ioapic%u: intpin %d trigger: %s\n", io->io_id, pin,
- trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
- return (0);
-}
-
-/*
- * Register a complete I/O APIC object with the interrupt subsystem.
- */
-void
-ioapic_register(void *cookie)
-{
- struct ioapic_intsrc *pin;
- struct ioapic *io;
- volatile ioapic_t *apic;
- uint32_t flags;
- int i;
-
- io = (struct ioapic *)cookie;
- apic = io->io_addr;
- mtx_lock_spin(&icu_lock);
- flags = ioapic_read(apic, IOAPIC_VER) & IOART_VER_VERSION;
- STAILQ_INSERT_TAIL(&ioapic_list, io, io_next);
- mtx_unlock_spin(&icu_lock);
- printf("ioapic%u <Version %u.%u> irqs %u-%u on motherboard\n",
- io->io_id, flags >> 4, flags & 0xf, io->io_intbase,
- io->io_intbase + io->io_numintr - 1);
- bsp_id = PCPU_GET(apic_id);
- for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) {
- /*
- * Finish initializing the pins by programming the vectors
- * and delivery mode.
- */
- if (pin->io_vector == VECTOR_DISABLED)
- continue;
- ioapic_program_intpin(pin);
- if (pin->io_vector >= NUM_IO_INTS)
- continue;
- /*
- * Route IRQ0 via the 8259A using mixed mode if mixed mode
- * is available and turned on.
- */
- if (pin->io_vector == 0 && mixed_mode_active &&
- mixed_mode_enabled)
- ioapic_setup_mixed_mode(pin);
- else
- intr_register_source(&pin->io_intsrc);
- }
-}
-
-/*
- * Program all the intpins to use logical destinations once the AP's
- * have been launched.
- */
-static void
-ioapic_set_logical_destinations(void *arg __unused)
-{
- struct ioapic *io;
- int i;
-
- program_logical_dest = 1;
- STAILQ_FOREACH(io, &ioapic_list, io_next)
- for (i = 0; i < io->io_numintr; i++)
- if (io->io_pins[i].io_dest != DEST_NONE &&
- io->io_pins[i].io_dest != DEST_EXTINT)
- ioapic_program_destination(&io->io_pins[i]);
-}
-SYSINIT(ioapic_destinations, SI_SUB_SMP, SI_ORDER_SECOND,
- ioapic_set_logical_destinations, NULL)
-
-/*
- * Support for mixed-mode interrupt sources. These sources route an ISA
- * IRQ through the 8259A's via the ExtINT on pin 0 of the I/O APIC that
- * routes the ISA interrupts. We just ignore the intpins that use this
- * mode and allow the atpic driver to register its interrupt source for
- * that IRQ instead.
- */
-
-static void
-ioapic_setup_mixed_mode(struct ioapic_intsrc *intpin)
-{
- struct ioapic_intsrc *extint;
- struct ioapic *io;
-
- /*
- * Mark the associated I/O APIC intpin as being delivered via
- * ExtINT and enable the ExtINT pin on the I/O APIC if needed.
- */
- intpin->io_dest = DEST_EXTINT;
- io = (struct ioapic *)intpin->io_intsrc.is_pic;
- extint = &io->io_pins[0];
- if (extint->io_vector != VECTOR_EXTINT)
- panic("Can't find ExtINT pin to route through!");
-#ifdef ENABLE_EXTINT_LOGICAL_DESTINATION
- if (extint->io_dest == DEST_NONE)
- ioapic_assign_cluster(extint);
-#endif
-}
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,763 +0,0 @@
-/*-
- * Copyright (c) 2003 John Baldwin <jhb@xxxxxxxxxxx>
- * Copyright (c) 1996, by Steve Passe
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- * derived from this software without specific prior written permission.
- * 3. Neither the name of the author nor the names of any co-contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * Local APIC support on Pentium and later processors.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/local_apic.c,v 1.9 2004/07/14 18:12:15
jhb Exp $");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/kernel.h>
-#include <sys/pcpu.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-
-#include <machine/apicreg.h>
-#include <machine/cputypes.h>
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <machine/apicvar.h>
-#include <machine/md_var.h>
-#include <machine/smp.h>
-#include <machine/specialreg.h>
-
-/*
- * We can handle up to 60 APICs via our logical cluster IDs, but currently
- * the physical IDs on Intel processors up to the Pentium 4 are limited to
- * 16.
- */
-#define MAX_APICID 16
-
-/* Sanity checks on IDT vectors. */
-CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS <= APIC_LOCAL_INTS);
-CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
-
-/*
- * Support for local APICs. Local APICs manage interrupts on each
- * individual processor as opposed to I/O APICs which receive interrupts
- * from I/O devices and then forward them on to the local APICs.
- *
- * Local APICs can also send interrupts to each other thus providing the
- * mechanism for IPIs.
- */
-
-struct lvt {
- u_int lvt_edgetrigger:1;
- u_int lvt_activehi:1;
- u_int lvt_masked:1;
- u_int lvt_active:1;
- u_int lvt_mode:16;
- u_int lvt_vector:8;
-};
-
-struct lapic {
- struct lvt la_lvts[LVT_MAX + 1];
- u_int la_id:8;
- u_int la_cluster:4;
- u_int la_cluster_id:2;
- u_int la_present:1;
-} static lapics[MAX_APICID];
-
-/* XXX: should thermal be an NMI? */
-
-/* Global defaults for local APIC LVT entries. */
-static struct lvt lvts[LVT_MAX + 1] = {
- { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */
- { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */
- { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* Timer: needs a vector */
- { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* Error: needs a vector */
- { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* PMC */
- { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* Thermal: needs a vector */
-};
-
-static inthand_t *ioint_handlers[] = {
- NULL, /* 0 - 31 */
- IDTVEC(apic_isr1), /* 32 - 63 */
- IDTVEC(apic_isr2), /* 64 - 95 */
- IDTVEC(apic_isr3), /* 96 - 127 */
- IDTVEC(apic_isr4), /* 128 - 159 */
- IDTVEC(apic_isr5), /* 160 - 191 */
- IDTVEC(apic_isr6), /* 192 - 223 */
- IDTVEC(apic_isr7), /* 224 - 255 */
-};
-
-volatile lapic_t *lapic;
-
-static uint32_t
-lvt_mode(struct lapic *la, u_int pin, uint32_t value)
-{
- struct lvt *lvt;
-
- KASSERT(pin <= LVT_MAX, ("%s: pin %u out of range", __func__, pin));
- if (la->la_lvts[pin].lvt_active)
- lvt = &la->la_lvts[pin];
- else
- lvt = &lvts[pin];
-
- value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
- APIC_LVT_VECTOR);
- if (lvt->lvt_edgetrigger == 0)
- value |= APIC_LVT_TM;
- if (lvt->lvt_activehi == 0)
- value |= APIC_LVT_IIPP_INTALO;
- if (lvt->lvt_masked)
- value |= APIC_LVT_M;
- value |= lvt->lvt_mode;
- switch (lvt->lvt_mode) {
- case APIC_LVT_DM_NMI:
- case APIC_LVT_DM_SMI:
- case APIC_LVT_DM_INIT:
- case APIC_LVT_DM_EXTINT:
- if (!lvt->lvt_edgetrigger) {
- printf("lapic%u: Forcing LINT%u to edge trigger\n",
- la->la_id, pin);
- value |= APIC_LVT_TM;
- }
- /* Use a vector of 0. */
- break;
- case APIC_LVT_DM_FIXED:
-#if 0
- value |= lvt->lvt_vector;
-#else
- panic("Fixed LINT pins not supported");
-#endif
- break;
- default:
- panic("bad APIC LVT delivery mode: %#x\n", value);
- }
- return (value);
-}
-
-/*
- * Map the local APIC and setup necessary interrupt vectors.
- */
-void
-lapic_init(uintptr_t addr)
-{
- u_int32_t value;
-
- /* Map the local APIC and setup the spurious interrupt handler. */
- KASSERT(trunc_page(addr) == addr,
- ("local APIC not aligned on a page boundary"));
- lapic = (lapic_t *)pmap_mapdev(addr, sizeof(lapic_t));
- setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
-
- /* Perform basic initialization of the BSP's local APIC. */
- value = lapic->svr;
- value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
- value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
- lapic->svr = value;
-
- /* Set BSP's per-CPU local APIC ID. */
- PCPU_SET(apic_id, lapic_id());
-
- /* XXX: timer/error/thermal interrupts */
-}
-
-/*
- * Create a local APIC instance.
- */
-void
-lapic_create(u_int apic_id, int boot_cpu)
-{
- int i;
-
- if (apic_id >= MAX_APICID) {
- printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
- if (boot_cpu)
- panic("Can't ignore BSP");
- return;
- }
- KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
- apic_id));
-
- /*
- * Assume no local LVT overrides and a cluster of 0 and
- * intra-cluster ID of 0.
- */
- lapics[apic_id].la_present = 1;
- lapics[apic_id].la_id = apic_id;
- for (i = 0; i < LVT_MAX; i++) {
- lapics[apic_id].la_lvts[i] = lvts[i];
- lapics[apic_id].la_lvts[i].lvt_active = 0;
- }
-
-#ifdef SMP
- cpu_add(apic_id, boot_cpu);
-#endif
-}
-
-/*
- * Dump contents of local APIC registers
- */
-void
-lapic_dump(const char* str)
-{
-
- printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
- printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n",
- lapic->id, lapic->version, lapic->ldr, lapic->dfr);
- printf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
- lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
-}
-
-void
-lapic_enable_intr(u_int irq)
-{
- u_int vector;
-
- vector = apic_irq_to_idt(irq);
- KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
- KASSERT(ioint_handlers[vector / 32] != NULL,
- ("No ISR handler for IRQ %u", irq));
- setidt(vector, ioint_handlers[vector / 32], SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
-}
-
-void
-lapic_setup(void)
-{
- struct lapic *la;
- u_int32_t value, maxlvt;
- register_t eflags;
-
- la = &lapics[lapic_id()];
- KASSERT(la->la_present, ("missing APIC structure"));
- eflags = intr_disable();
- maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
-
- /* Program LINT[01] LVT entries. */
- lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0);
- lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1);
-
- /* XXX: more LVT entries */
-
- /* Clear the TPR. */
- value = lapic->tpr;
- value &= ~APIC_TPR_PRIO;
- lapic->tpr = value;
-
- /* Use the cluster model for logical IDs. */
- value = lapic->dfr;
- value &= ~APIC_DFR_MODEL_MASK;
- value |= APIC_DFR_MODEL_CLUSTER;
- lapic->dfr = value;
-
- /* Set this APIC's logical ID. */
- value = lapic->ldr;
- value &= ~APIC_ID_MASK;
- value |= (la->la_cluster << APIC_ID_CLUSTER_SHIFT |
- 1 << la->la_cluster_id) << APIC_ID_SHIFT;
- lapic->ldr = value;
-
- /* Setup spurious vector and enable the local APIC. */
- value = lapic->svr;
- value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
- value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
- lapic->svr = value;
- intr_restore(eflags);
-}
-
-void
-lapic_disable(void)
-{
- uint32_t value;
-
- /* Software disable the local APIC. */
- value = lapic->svr;
- value &= ~APIC_SVR_SWEN;
- lapic->svr = value;
-}
-
-int
-lapic_id(void)
-{
-
- KASSERT(lapic != NULL, ("local APIC is not mapped"));
- return (lapic->id >> APIC_ID_SHIFT);
-}
-
-int
-lapic_intr_pending(u_int vector)
-{
- volatile u_int32_t *irr;
-
- /*
- * The IRR registers are an array of 128-bit registers each of
- * which only describes 32 interrupts in the low 32 bits.. Thus,
- * we divide the vector by 32 to get the 128-bit index. We then
- * multiply that index by 4 to get the equivalent index from
- * treating the IRR as an array of 32-bit registers. Finally, we
- * modulus the vector by 32 to determine the individual bit to
- * test.
- */
- irr = &lapic->irr0;
- return (irr[(vector / 32) * 4] & 1 << (vector % 32));
-}
-
-void
-lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
-{
- struct lapic *la;
-
- KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
- __func__, apic_id));
- KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
- __func__, cluster));
- KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
- ("%s: intra cluster id %u too big", __func__, cluster_id));
- la = &lapics[apic_id];
- la->la_cluster = cluster;
- la->la_cluster_id = cluster_id;
-}
-
-int
-lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
-{
-
- if (pin > LVT_MAX)
- return (EINVAL);
- if (apic_id == APIC_ID_ALL) {
- lvts[pin].lvt_masked = masked;
- if (bootverbose)
- printf("lapic:");
- } else {
- KASSERT(lapics[apic_id].la_present,
- ("%s: missing APIC %u", __func__, apic_id));
- lapics[apic_id].la_lvts[pin].lvt_masked = masked;
- lapics[apic_id].la_lvts[pin].lvt_active = 1;
- if (bootverbose)
- printf("lapic%u:", apic_id);
- }
- if (bootverbose)
- printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
- return (0);
-}
-
-int
-lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
-{
- struct lvt *lvt;
-
- if (pin > LVT_MAX)
- return (EINVAL);
- if (apic_id == APIC_ID_ALL) {
- lvt = &lvts[pin];
- if (bootverbose)
- printf("lapic:");
- } else {
- KASSERT(lapics[apic_id].la_present,
- ("%s: missing APIC %u", __func__, apic_id));
- lvt = &lapics[apic_id].la_lvts[pin];
- lvt->lvt_active = 1;
- if (bootverbose)
- printf("lapic%u:", apic_id);
- }
- lvt->lvt_mode = mode;
- switch (mode) {
- case APIC_LVT_DM_NMI:
- case APIC_LVT_DM_SMI:
- case APIC_LVT_DM_INIT:
- case APIC_LVT_DM_EXTINT:
- lvt->lvt_edgetrigger = 1;
- lvt->lvt_activehi = 1;
- if (mode == APIC_LVT_DM_EXTINT)
- lvt->lvt_masked = 1;
- else
- lvt->lvt_masked = 0;
- break;
- default:
- panic("Unsupported delivery mode: 0x%x\n", mode);
- }
- if (bootverbose) {
- printf(" Routing ");
- switch (mode) {
- case APIC_LVT_DM_NMI:
- printf("NMI");
- break;
- case APIC_LVT_DM_SMI:
- printf("SMI");
- break;
- case APIC_LVT_DM_INIT:
- printf("INIT");
- break;
- case APIC_LVT_DM_EXTINT:
- printf("ExtINT");
- break;
- }
- printf(" -> LINT%u\n", pin);
- }
- return (0);
-}
-
-int
-lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
-{
-
- if (pin > LVT_MAX || pol == INTR_POLARITY_CONFORM)
- return (EINVAL);
- if (apic_id == APIC_ID_ALL) {
- lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
- if (bootverbose)
- printf("lapic:");
- } else {
- KASSERT(lapics[apic_id].la_present,
- ("%s: missing APIC %u", __func__, apic_id));
- lapics[apic_id].la_lvts[pin].lvt_active = 1;
- lapics[apic_id].la_lvts[pin].lvt_activehi =
- (pol == INTR_POLARITY_HIGH);
- if (bootverbose)
- printf("lapic%u:", apic_id);
- }
- if (bootverbose)
- printf(" LINT%u polarity: active-%s\n", pin,
- pol == INTR_POLARITY_HIGH ? "high" : "low");
- return (0);
-}
-
-int
-lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger)
-{
-
- if (pin > LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
- return (EINVAL);
- if (apic_id == APIC_ID_ALL) {
- lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
- if (bootverbose)
- printf("lapic:");
- } else {
- KASSERT(lapics[apic_id].la_present,
- ("%s: missing APIC %u", __func__, apic_id));
- lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
- (trigger == INTR_TRIGGER_EDGE);
- lapics[apic_id].la_lvts[pin].lvt_active = 1;
- if (bootverbose)
- printf("lapic%u:", apic_id);
- }
- if (bootverbose)
- printf(" LINT%u trigger: %s\n", pin,
- trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
- return (0);
-}
-
-void
-lapic_eoi(void)
-{
-
- lapic->eoi = 0;
-}
-
-void
-lapic_handle_intr(struct intrframe frame)
-{
- struct intsrc *isrc;
-
- if (frame.if_vec == -1)
- panic("Couldn't get vector from ISR!");
- isrc = intr_lookup_source(apic_idt_to_irq(frame.if_vec));
- intr_execute_handlers(isrc, &frame);
-}
-
-/* Translate between IDT vectors and IRQ vectors. */
-u_int
-apic_irq_to_idt(u_int irq)
-{
- u_int vector;
-
- KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
- vector = irq + APIC_IO_INTS;
- if (vector >= IDT_SYSCALL)
- vector++;
- return (vector);
-}
-
-u_int
-apic_idt_to_irq(u_int vector)
-{
-
- KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
- vector <= APIC_IO_INTS + NUM_IO_INTS,
- ("Vector %u does not map to an IRQ line", vector));
- if (vector > IDT_SYSCALL)
- vector--;
- return (vector - APIC_IO_INTS);
-}
-
-/*
- * APIC probing support code. This includes code to manage enumerators.
- */
-
-static SLIST_HEAD(, apic_enumerator) enumerators =
- SLIST_HEAD_INITIALIZER(enumerators);
-static struct apic_enumerator *best_enum;
-
-void
-apic_register_enumerator(struct apic_enumerator *enumerator)
-{
-#ifdef INVARIANTS
- struct apic_enumerator *apic_enum;
-
- SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
- if (apic_enum == enumerator)
- panic("%s: Duplicate register of %s", __func__,
- enumerator->apic_name);
- }
-#endif
- SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
-}
-
-/*
- * Probe the APIC enumerators, enumerate CPUs, and initialize the
- * local APIC.
- */
-static void
-apic_init(void *dummy __unused)
-{
- uint64_t apic_base;
- int retval, best;
-
- /* We only support built in local APICs. */
- if (!(cpu_feature & CPUID_APIC))
- return;
-
- /* Don't probe if APIC mode is disabled. */
- if (resource_disabled("apic", 0))
- return;
-
- /* First, probe all the enumerators to find the best match. */
- best_enum = NULL;
- best = 0;
-#ifndef XEN
- SLIST_FOREACH(enumerator, &enumerators, apic_next) {
- retval = enumerator->apic_probe();
- if (retval > 0)
- continue;
- if (best_enum == NULL || best < retval) {
- best_enum = enumerator;
- best = retval;
- }
- }
-#endif
- if (best_enum == NULL) {
- if (bootverbose)
- printf("APIC: Could not find any APICs.\n");
- return;
- }
-
- if (bootverbose)
- printf("APIC: Using the %s enumerator.\n",
- best_enum->apic_name);
-
- /*
- * To work around an errata, we disable the local APIC on some
- * CPUs during early startup. We need to turn the local APIC back
- * on on such CPUs now.
- */
- if (cpu == CPU_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 &&
- (cpu_id & 0xff0) == 0x610) {
- apic_base = rdmsr(MSR_APICBASE);
- apic_base |= APICBASE_ENABLED;
- wrmsr(MSR_APICBASE, apic_base);
- }
-
- /* Second, probe the CPU's in the system. */
- retval = best_enum->apic_probe_cpus();
- if (retval != 0)
- printf("%s: Failed to probe CPUs: returned %d\n",
- best_enum->apic_name, retval);
-
- /* Third, initialize the local APIC. */
- retval = best_enum->apic_setup_local();
- if (retval != 0)
- printf("%s: Failed to setup the local APIC: returned %d\n",
- best_enum->apic_name, retval);
-#ifdef SMP
- /* Last, setup the cpu topology now that we have probed CPUs */
- mp_topology();
-#endif
-}
-SYSINIT(apic_init, SI_SUB_CPU, SI_ORDER_FIRST, apic_init, NULL)
-
-/*
- * Setup the I/O APICs.
- */
-static void
-apic_setup_io(void *dummy __unused)
-{
- int retval;
-
- if (best_enum == NULL)
- return;
- retval = best_enum->apic_setup_io();
- if (retval != 0)
- printf("%s: Failed to setup I/O APICs: returned %d\n",
- best_enum->apic_name, retval);
-
- /*
- * Finish setting up the local APIC on the BSP once we know how to
- * properly program the LINT pins.
- */
- lapic_setup();
- if (bootverbose)
- lapic_dump("BSP");
-}
-SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL)
-
-#ifdef SMP
-/*
- * Inter Processor Interrupt functions. The lapic_ipi_*() functions are
- * private the sys/i386 code. The public interface for the rest of the
- * kernel is defined in mp_machdep.c.
- */
-
-int
-lapic_ipi_wait(int delay)
-{
- int x, incr;
-
- /*
- * Wait delay loops for IPI to be sent. This is highly bogus
- * since this is sensitive to CPU clock speed. If delay is
- * -1, we wait forever.
- */
- if (delay == -1) {
- incr = 0;
- delay = 1;
- } else
- incr = 1;
- for (x = 0; x < delay; x += incr) {
- if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
- return (1);
- ia32_pause();
- }
- return (0);
-}
-
-void
-lapic_ipi_raw(register_t icrlo, u_int dest)
-{
- register_t value, eflags;
-
- /* XXX: Need more sanity checking of icrlo? */
- KASSERT(lapic != NULL, ("%s called too early", __func__));
- KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
- ("%s: invalid dest field", __func__));
- KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
- ("%s: reserved bits set in ICR LO register", __func__));
-
- /* Set destination in ICR HI register if it is being used. */
- eflags = intr_disable();
- if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
- value = lapic->icr_hi;
- value &= ~APIC_ID_MASK;
- value |= dest << APIC_ID_SHIFT;
- lapic->icr_hi = value;
- }
-
- /* Program the contents of the IPI and dispatch it. */
- value = lapic->icr_lo;
- value &= APIC_ICRLO_RESV_MASK;
- value |= icrlo;
- lapic->icr_lo = value;
- intr_restore(eflags);
-}
-
-#define BEFORE_SPIN 1000000
-#ifdef DETECT_DEADLOCK
-#define AFTER_SPIN 1000
-#endif
-
-void
-lapic_ipi_vectored(u_int vector, int dest)
-{
- register_t icrlo, destfield;
-
- KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
- ("%s: invalid vector %d", __func__, vector));
-
- icrlo = vector | APIC_DELMODE_FIXED | APIC_DESTMODE_PHY |
- APIC_LEVEL_DEASSERT | APIC_TRIGMOD_EDGE;
- destfield = 0;
- switch (dest) {
- case APIC_IPI_DEST_SELF:
- icrlo |= APIC_DEST_SELF;
- break;
- case APIC_IPI_DEST_ALL:
- icrlo |= APIC_DEST_ALLISELF;
- break;
- case APIC_IPI_DEST_OTHERS:
- icrlo |= APIC_DEST_ALLESELF;
- break;
- default:
- KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
- ("%s: invalid destination 0x%x", __func__, dest));
- destfield = dest;
- }
-
- /* Wait for an earlier IPI to finish. */
- if (!lapic_ipi_wait(BEFORE_SPIN))
- panic("APIC: Previous IPI is stuck");
-
- lapic_ipi_raw(icrlo, destfield);
-
-#ifdef DETECT_DEADLOCK
- /* Wait for IPI to be delivered. */
- if (!lapic_ipi_wait(AFTER_SPIN)) {
-#ifdef needsattention
- /*
- * XXX FIXME:
- *
- * The above function waits for the message to actually be
- * delivered. It breaks out after an arbitrary timeout
- * since the message should eventually be delivered (at
- * least in theory) and that if it wasn't we would catch
- * the failure with the check above when the next IPI is
- * sent.
- *
- * We could skiip this wait entirely, EXCEPT it probably
- * protects us from other routines that assume that the
- * message was delivered and acted upon when this function
- * returns.
- */
- printf("APIC: IPI might be stuck\n");
-#else /* !needsattention */
- /* Wait until mesage is sent without a timeout. */
- while (lapic->icr_lo & APIC_DELSTAT_PEND)
- ia32_pause();
-#endif /* needsattention */
- }
-#endif /* DETECT_DEADLOCK */
-}
-#endif /* SMP */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,949 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)locore.s 7.3 (Berkeley) 5/13/91
- * $FreeBSD: src/sys/i386/i386/locore.s,v 1.181 2003/11/03 21:53:37 jhb Exp $
- *
- * originally from: locore.s, by William F. Jolitz
- *
- * Substantially rewritten by David Greenman, Rod Grimes,
- * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
- * and many others.
- */
-
-#include "opt_bootp.h"
-#include "opt_compat.h"
-#include "opt_nfsroot.h"
-#include "opt_pmap.h"
-
-#include <sys/syscall.h>
-#include <sys/reboot.h>
-
-#include <machine/asmacros.h>
-#include <machine/cputypes.h>
-#include <machine/psl.h>
-#include <machine/pmap.h>
-#include <machine/specialreg.h>
-
-#include "assym.s"
-
-.section __xen_guest
- .asciz "LOADER=generic,GUEST_VER=5.3,XEN_VER=3.0,BSD_SYMTAB"
-
-
-/*
- * XXX
- *
- * Note: This version greatly munged to avoid various assembler errors
- * that may be fixed in newer versions of gas. Perhaps newer versions
- * will have more pleasant appearance.
- */
-
-/*
- * PTmap is recursive pagemap at top of virtual address space.
- * Within PTmap, the page directory can be found (third indirection).
- */
- .globl PTmap,PTD,PTDpde
- .set PTmap,(PTDPTDI << PDRSHIFT)
- .set PTD,PTmap + (PTDPTDI * PAGE_SIZE)
- .set PTDpde,PTD + (PTDPTDI * PDESIZE)
-
-#ifdef SMP
-/*
- * Define layout of per-cpu address space.
- * This is "constructed" in locore.s on the BSP and in mp_machdep.c
- * for each AP. DO NOT REORDER THESE WITHOUT UPDATING THE REST!
- */
- .globl SMP_prvspace
- .set SMP_prvspace,(MPPTDI << PDRSHIFT)
-#endif /* SMP */
-
-/*
- * Compiled KERNBASE location and the kernel load address
- */
- .globl kernbase
- .set kernbase,KERNBASE
- .globl kernload
- .set kernload,KERNLOAD
-
-/*
- * Globals
- */
- .data
- ALIGN_DATA /* just to be sure */
-
- .space 0x2000 /* space for tmpstk - temporary stack */
-tmpstk:
-
- .globl bootinfo
-bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
-
- .globl KERNend
-KERNend: .long 0 /* phys addr end of kernel (just after
bss) */
-physfree: .long 0 /* phys addr of next free page */
-
-#ifdef SMP
- .globl cpu0prvpage
-cpu0pp: .long 0 /* phys addr cpu0 private pg */
-cpu0prvpage: .long 0 /* relocated version */
-
- .globl SMPpt
-SMPptpa: .long 0 /* phys addr SMP page table */
-SMPpt: .long 0 /* relocated version */
-#endif /* SMP */
-
- .globl IdlePTD
-IdlePTD: .long 0 /* phys addr of kernel PTD */
-
-
- .globl KPTphys
-KPTphys: .long 0 /* phys addr of kernel page tables */
-
- .globl proc0uarea, proc0kstack
-proc0uarea: .long 0 /* address of proc 0 uarea space */
-proc0kstack: .long 0 /* address of proc 0 kstack space */
-p0upa: .long 0 /* phys addr of proc0's UAREA */
-p0kpa: .long 0 /* phys addr of proc0's STACK */
-
-#ifdef PC98
- .globl pc98_system_parameter
-pc98_system_parameter:
- .space 0x240
-#endif
-
-/**********************************************************************
- *
- * Some handy macros
- *
- */
-
-#define R(foo) ((foo))
-
-#define ALLOCPAGES(foo) \
- movl R(physfree), %esi ; \
- movl $((foo)*PAGE_SIZE), %eax ; \
- addl %esi, %eax ; \
- movl %eax, R(physfree) ; \
- movl %esi, %edi ; \
- movl $((foo)*PAGE_SIZE),%ecx ; \
- xorl %eax,%eax ; \
- cld ; \
- rep ; \
- stosb
-
-/*
- * fillkpt
- * eax = page frame address
- * ebx = index into page table
- * ecx = how many pages to map
- * base = base address of page dir/table
- * prot = protection bits
- */
-#define fillkpt(base, prot) \
- shll $PTESHIFT,%ebx ; \
- addl base,%ebx ; \
- orl $PG_V,%eax ; \
- orl prot,%eax ; \
-1: movl %eax,(%ebx) ; \
- addl $PAGE_SIZE,%eax ; /* increment physical address */ \
- addl $PTESIZE,%ebx ; /* next pte */ \
- loop 1b
-
-/*
- * fillkptphys(prot)
- * eax = physical address
- * ecx = how many pages to map
- * prot = protection bits
- */
-#define fillkptphys(prot) \
- movl %eax, %ebx ; \
- shrl $PAGE_SHIFT, %ebx ; \
- fillkpt(R(KPTphys), prot)
-
- .text
-/**********************************************************************
- *
- * This is where the bootblocks start us, set the ball rolling...
- *
- */
-NON_GPROF_ENTRY(btext)
- pushl %esi
- call initvalues
- popl %esi
- call identify_cpu
- movl proc0kstack,%eax
- leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
- xorl %ebp,%ebp /* mark end of frames */
- movl IdlePTD,%esi
- movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
- call init386
- call mi_startup
- int $3
-
-
-#ifdef PC98
- /* save SYSTEM PARAMETER for resume (NS/T or other) */
- movl $0xa1400,%esi
- movl $R(pc98_system_parameter),%edi
- movl $0x0240,%ecx
- cld
- rep
- movsb
-#else /* IBM-PC */
-/* Tell the bios to warmboot next time */
- movw $0x1234,0x472
-#endif /* PC98 */
-
-/* Set up a real frame in case the double return in newboot is executed. */
- pushl %ebp
- movl %esp, %ebp
-
-/* Don't trust what the BIOS gives for eflags. */
- pushl $PSL_KERNEL
- popfl
-
-/*
- * Don't trust what the BIOS gives for %fs and %gs. Trust the bootstrap
- * to set %cs, %ds, %es and %ss.
- */
- mov %ds, %ax
- mov %ax, %fs
- mov %ax, %gs
-
-/*
- * Clear the bss. Not all boot programs do it, and it is our job anyway.
- *
- * XXX we don't check that there is memory for our bss and page tables
- * before using it.
- *
- * Note: we must be careful to not overwrite an active gdt or idt. They
- * inactive from now until we switch to new ones, since we don't load any
- * more segment registers or permit interrupts until after the switch.
- */
- movl $R(end),%ecx
- movl $R(edata),%edi
- subl %edi,%ecx
- xorl %eax,%eax
- cld
- rep
- stosb
-
- call recover_bootinfo
-
-/* Get onto a stack that we can trust. */
-/*
- * XXX this step is delayed in case recover_bootinfo needs to return via
- * the old stack, but it need not be, since recover_bootinfo actually
- * returns via the old frame.
- */
- movl $R(tmpstk),%esp
-
-#ifdef PC98
- /* pc98_machine_type & M_EPSON_PC98 */
- testb $0x02,R(pc98_system_parameter)+220
- jz 3f
- /* epson_machine_id <= 0x0b */
- cmpb $0x0b,R(pc98_system_parameter)+224
- ja 3f
-
- /* count up memory */
- movl $0x100000,%eax /* next, talley remaining memory */
- movl $0xFFF-0x100,%ecx
-1: movl 0(%eax),%ebx /* save location to check */
- movl $0xa55a5aa5,0(%eax) /* write test pattern */
- cmpl $0xa55a5aa5,0(%eax) /* does not check yet for rollover */
- jne 2f
- movl %ebx,0(%eax) /* restore memory */
- addl $PAGE_SIZE,%eax
- loop 1b
-2: subl $0x100000,%eax
- shrl $17,%eax
- movb %al,R(pc98_system_parameter)+1
-3:
-
- movw R(pc98_system_parameter+0x86),%ax
- movw %ax,R(cpu_id)
-#endif
-
- call identify_cpu
- call create_pagetables
-
-/*
- * If the CPU has support for VME, turn it on.
- */
- testl $CPUID_VME, R(cpu_feature)
- jz 1f
- movl %cr4, %eax
- orl $CR4_VME, %eax
- movl %eax, %cr4
-1:
-
-/* Now enable paging */
- movl R(IdlePTD), %eax
- movl %eax,%cr3 /* load ptd addr into mmu */
- movl %cr0,%eax /* get control word */
- orl $CR0_PE|CR0_PG,%eax /* enable paging */
- movl %eax,%cr0 /* and let's page NOW! */
-
- pushl $begin /* jump to high virtualized address */
- ret
-
-/* now running relocated at KERNBASE where the system is linked to run */
-begin:
- /* set up bootstrap stack */
- movl proc0kstack,%eax /* location of in-kernel stack */
- /* bootstrap stack end location */
- leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
-
- xorl %ebp,%ebp /* mark end of frames */
-
-#ifdef PAE
- movl IdlePDPT,%esi
-#else
- movl IdlePTD,%esi
-#endif
- movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
-
- pushl physfree /* value of first for init386(first) */
- call init386 /* wire 386 chip for unix operation */
-
- /*
- * Clean up the stack in a way that db_numargs() understands, so
- * that backtraces in ddb don't underrun the stack. Traps for
- * inaccessible memory are more fatal than usual this early.
- */
- addl $4,%esp
-
- call mi_startup /* autoconfiguration, mountroot etc */
- /* NOTREACHED */
- addl $0,%esp /* for db_numargs() again */
-
-/*
- * Signal trampoline, copied to top of user stack
- */
-NON_GPROF_ENTRY(sigcode)
- calll *SIGF_HANDLER(%esp)
- leal SIGF_UC(%esp),%eax /* get ucontext */
- pushl %eax
- testl $PSL_VM,UC_EFLAGS(%eax)
- jne 1f
- movl UC_GS(%eax),%gs /* restore %gs */
-1:
- movl $SYS_sigreturn,%eax
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
- /* on stack */
-1:
- jmp 1b
-
-#ifdef COMPAT_FREEBSD4
- ALIGN_TEXT
-freebsd4_sigcode:
- calll *SIGF_HANDLER(%esp)
- leal SIGF_UC4(%esp),%eax /* get ucontext */
- pushl %eax
- testl $PSL_VM,UC4_EFLAGS(%eax)
- jne 1f
- movl UC4_GS(%eax),%gs /* restore %gs */
-1:
- movl $344,%eax /* 4.x SYS_sigreturn */
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
- /* on stack */
-1:
- jmp 1b
-#endif
-
-#ifdef COMPAT_43
- ALIGN_TEXT
-osigcode:
- call *SIGF_HANDLER(%esp) /* call signal handler */
- lea SIGF_SC(%esp),%eax /* get sigcontext */
- pushl %eax
- testl $PSL_VM,SC_PS(%eax)
- jne 9f
- movl SC_GS(%eax),%gs /* restore %gs */
-9:
- movl $103,%eax /* 3.x SYS_sigreturn */
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
-0: jmp 0b
-#endif /* COMPAT_43 */
-
- ALIGN_TEXT
-esigcode:
-
- .data
- .globl szsigcode
-szsigcode:
- .long esigcode-sigcode
-#ifdef COMPAT_FREEBSD4
- .globl szfreebsd4_sigcode
-szfreebsd4_sigcode:
- .long esigcode-freebsd4_sigcode
-#endif
-#ifdef COMPAT_43
- .globl szosigcode
-szosigcode:
- .long esigcode-osigcode
-#endif
- .text
-
-/**********************************************************************
- *
- * Recover the bootinfo passed to us from the boot program
- *
- */
-recover_bootinfo:
- /*
- * This code is called in different ways depending on what loaded
- * and started the kernel. This is used to detect how we get the
- * arguments from the other code and what we do with them.
- *
- * Old disk boot blocks:
- * (*btext)(howto, bootdev, cyloffset, esym);
- * [return address == 0, and can NOT be returned to]
- * [cyloffset was not supported by the FreeBSD boot code
- * and always passed in as 0]
- * [esym is also known as total in the boot code, and
- * was never properly supported by the FreeBSD boot code]
- *
- * Old diskless netboot code:
- * (*btext)(0,0,0,0,&nfsdiskless,0,0,0);
- * [return address != 0, and can NOT be returned to]
- * If we are being booted by this code it will NOT work,
- * so we are just going to halt if we find this case.
- *
- * New uniform boot code:
- * (*btext)(howto, bootdev, 0, 0, 0, &bootinfo)
- * [return address != 0, and can be returned to]
- *
- * There may seem to be a lot of wasted arguments in here, but
- * that is so the newer boot code can still load very old kernels
- * and old boot code can load new kernels.
- */
-
- /*
- * The old style disk boot blocks fake a frame on the stack and
- * did an lret to get here. The frame on the stack has a return
- * address of 0.
- */
- cmpl $0,4(%ebp)
- je olddiskboot
-
- /*
- * We have some form of return address, so this is either the
- * old diskless netboot code, or the new uniform code. That can
- * be detected by looking at the 5th argument, if it is 0
- * we are being booted by the new uniform boot code.
- */
- cmpl $0,24(%ebp)
- je newboot
-
- /*
- * Seems we have been loaded by the old diskless boot code, we
- * don't stand a chance of running as the diskless structure
- * changed considerably between the two, so just halt.
- */
- hlt
-
- /*
- * We have been loaded by the new uniform boot code.
- * Let's check the bootinfo version, and if we do not understand
- * it we return to the loader with a status of 1 to indicate this error
- */
-newboot:
- movl 28(%ebp),%ebx /* &bootinfo.version */
- movl BI_VERSION(%ebx),%eax
- cmpl $1,%eax /* We only understand version 1 */
- je 1f
- movl $1,%eax /* Return status */
- leave
- /*
- * XXX this returns to our caller's caller (as is required) since
- * we didn't set up a frame and our caller did.
- */
- ret
-
-1:
- /*
- * If we have a kernelname copy it in
- */
- movl BI_KERNELNAME(%ebx),%esi
- cmpl $0,%esi
- je 2f /* No kernelname */
- movl $MAXPATHLEN,%ecx /* Brute force!!! */
- movl $R(kernelname),%edi
- cmpb $'/',(%esi) /* Make sure it starts with a slash */
- je 1f
- movb $'/',(%edi)
- incl %edi
- decl %ecx
-1:
- cld
- rep
- movsb
-
-2:
- /*
- * Determine the size of the boot loader's copy of the bootinfo
- * struct. This is impossible to do properly because old versions
- * of the struct don't contain a size field and there are 2 old
- * versions with the same version number.
- */
- movl $BI_ENDCOMMON,%ecx /* prepare for sizeless version */
- testl $RB_BOOTINFO,8(%ebp) /* bi_size (and bootinfo) valid? */
- je got_bi_size /* no, sizeless version */
- movl BI_SIZE(%ebx),%ecx
-got_bi_size:
-
- /*
- * Copy the common part of the bootinfo struct
- */
- movl %ebx,%esi
- movl $R(bootinfo),%edi
- cmpl $BOOTINFO_SIZE,%ecx
- jbe got_common_bi_size
- movl $BOOTINFO_SIZE,%ecx
-got_common_bi_size:
- cld
- rep
- movsb
-
-#ifdef NFS_ROOT
-#ifndef BOOTP_NFSV3
- /*
- * If we have a nfs_diskless structure copy it in
- */
- movl BI_NFS_DISKLESS(%ebx),%esi
- cmpl $0,%esi
- je olddiskboot
- movl $R(nfs_diskless),%edi
- movl $NFSDISKLESS_SIZE,%ecx
- cld
- rep
- movsb
- movl $R(nfs_diskless_valid),%edi
- movl $1,(%edi)
-#endif
-#endif
-
- /*
- * The old style disk boot.
- * (*btext)(howto, bootdev, cyloffset, esym);
- * Note that the newer boot code just falls into here to pick
- * up howto and bootdev, cyloffset and esym are no longer used
- */
-olddiskboot:
- movl 8(%ebp),%eax
- movl %eax,R(boothowto)
- movl 12(%ebp),%eax
- movl %eax,R(bootdev)
-
- ret
-
-
-/**********************************************************************
- *
- * Identify the CPU and initialize anything special about it
- *
- */
-identify_cpu:
-
- /* Try to toggle alignment check flag ; does not exist on 386.
*/
- pushfl
- popl %eax
- movl %eax,%ecx
- orl $PSL_AC,%eax
- pushl %eax
- popfl
- pushfl
- popl %eax
- xorl %ecx,%eax
- andl $PSL_AC,%eax
- pushl %ecx
- popfl
-
- testl %eax,%eax
- jnz try486
-
- /* NexGen CPU does not have aligment check flag. */
- pushfl
- movl $0x5555, %eax
- xorl %edx, %edx
- movl $2, %ecx
- clc
- divl %ecx
- jz trynexgen
- popfl
- movl $CPU_386,R(cpu)
- jmp 3f
-
-trynexgen:
- popfl
- movl $CPU_NX586,R(cpu)
- movl $0x4778654e,R(cpu_vendor) # store vendor string
- movl $0x72446e65,R(cpu_vendor+4)
- movl $0x6e657669,R(cpu_vendor+8)
- movl $0,R(cpu_vendor+12)
- jmp 3f
-
-try486: /* Try to toggle identification flag ; does not exist on
early 486s. */
- pushfl
- popl %eax
- movl %eax,%ecx
- xorl $PSL_ID,%eax
- pushl %eax
- popfl
- pushfl
- popl %eax
- xorl %ecx,%eax
- andl $PSL_ID,%eax
- pushl %ecx
- popfl
-
- testl %eax,%eax
- jnz trycpuid
- movl $CPU_486,R(cpu)
-
- /*
- * Check Cyrix CPU
- * Cyrix CPUs do not change the undefined flags following
- * execution of the divide instruction which divides 5 by 2.
- *
- * Note: CPUID is enabled on M2, so it passes another
way.
- */
- pushfl
- movl $0x5555, %eax
- xorl %edx, %edx
- movl $2, %ecx
- clc
- divl %ecx
- jnc trycyrix
- popfl
- jmp 3f /* You may use Intel CPU. */
-
-trycyrix:
- popfl
- /*
- * IBM Bluelighting CPU also doesn't change the undefined flags.
- * Because IBM doesn't disclose the information for Bluelighting
- * CPU, we couldn't distinguish it from Cyrix's (including IBM
- * brand of Cyrix CPUs).
- */
- movl $0x69727943,R(cpu_vendor) # store vendor string
- movl $0x736e4978,R(cpu_vendor+4)
- movl $0x64616574,R(cpu_vendor+8)
- jmp 3f
-
-trycpuid: /* Use the `cpuid' instruction. */
- xorl %eax,%eax
- cpuid # cpuid 0
- movl %eax,R(cpu_high) # highest capability
- movl %ebx,R(cpu_vendor) # store vendor string
- movl %edx,R(cpu_vendor+4)
- movl %ecx,R(cpu_vendor+8)
- movb $0,R(cpu_vendor+12)
-
- movl $1,%eax
- cpuid # cpuid 1
- movl %eax,R(cpu_id) # store cpu_id
- movl %ebx,R(cpu_procinfo) # store cpu_procinfo
- movl %edx,R(cpu_feature) # store cpu_feature
- rorl $8,%eax # extract family type
- andl $15,%eax
- cmpl $5,%eax
- jae 1f
-
- /* less than Pentium ; must be 486 */
- movl $CPU_486,R(cpu)
- jmp 3f
-1:
- /* a Pentium? */
- cmpl $5,%eax
- jne 2f
- movl $CPU_586,R(cpu)
- jmp 3f
-2:
- /* Greater than Pentium...call it a Pentium Pro */
- movl $CPU_686,R(cpu)
-3:
- ret
-
-/**********************************************************************
- *
- * Create the first page directory and its page tables.
- *
- */
-
-create_pagetables:
-
-/* Find end of kernel image (rounded up to a page boundary). */
- movl $R(_end),%esi
-
-/* Include symbols, if any. */
- movl R(bootinfo+BI_ESYMTAB),%edi
- testl %edi,%edi
- je over_symalloc
- movl %edi,%esi
- movl $KERNBASE,%edi
- addl %edi,R(bootinfo+BI_SYMTAB)
- addl %edi,R(bootinfo+BI_ESYMTAB)
-over_symalloc:
-
-/* If we are told where the end of the kernel space is, believe it. */
- movl R(bootinfo+BI_KERNEND),%edi
- testl %edi,%edi
- je no_kernend
- movl %edi,%esi
-no_kernend:
-
- addl $PDRMASK,%esi /* Play conservative for now, and */
- andl $~PDRMASK,%esi /* ... wrap to next 4M. */
- movl %esi,R(KERNend) /* save end of kernel */
- movl %esi,R(physfree) /* next free page is at end of kernel */
-
-/* Allocate Kernel Page Tables */
- ALLOCPAGES(NKPT)
- movl %esi,R(KPTphys)
-
-/* Allocate Page Table Directory */
-#ifdef PAE
- /* XXX only need 32 bytes (easier for now) */
- ALLOCPAGES(1)
- movl %esi,R(IdlePDPT)
-#endif
- ALLOCPAGES(NPGPTD)
- movl %esi,R(IdlePTD)
-
-/* Allocate UPAGES */
- ALLOCPAGES(UAREA_PAGES)
- movl %esi,R(p0upa)
- addl $KERNBASE, %esi
- movl %esi, R(proc0uarea)
-
- ALLOCPAGES(KSTACK_PAGES)
- movl %esi,R(p0kpa)
- addl $KERNBASE, %esi
- movl %esi, R(proc0kstack)
-#if 0
- ALLOCPAGES(1) /* vm86/bios stack */
- movl %esi,R(vm86phystk)
-
- ALLOCPAGES(3) /* pgtable + ext + IOPAGES */
- movl %esi,R(vm86pa)
- addl $KERNBASE, %esi
- movl %esi, R(vm86paddr)
-#endif
-#ifdef SMP
-/* Allocate cpu0's private data page */
- ALLOCPAGES(1)
- movl %esi,R(cpu0pp)
- addl $KERNBASE, %esi
- movl %esi, R(cpu0prvpage) /* relocated to KVM space */
-
-/* Allocate SMP page table page */
- ALLOCPAGES(1)
- movl %esi,R(SMPptpa)
- addl $KERNBASE, %esi
- movl %esi, R(SMPpt) /* relocated to KVM space */
-#endif /* SMP */
-
-/* Map page zero read-write so bios32 calls can use it */
- xorl %eax, %eax
- movl $PG_RW,%edx
- movl $1,%ecx
- fillkptphys(%edx)
-
-/* Map read-only from page 1 to the beginning of the kernel text section */
- movl $PAGE_SIZE, %eax
- xorl %edx,%edx
- movl $R(btext),%ecx
- addl $PAGE_MASK,%ecx
- subl %eax,%ecx
- shrl $PAGE_SHIFT,%ecx
- fillkptphys(%edx)
-
-/*
- * Enable PSE and PGE.
- */
-#ifndef DISABLE_PSE
- testl $CPUID_PSE, R(cpu_feature)
- jz 1f
- movl $PG_PS, R(pseflag)
- movl %cr4, %eax
- orl $CR4_PSE, %eax
- movl %eax, %cr4
-1:
-#endif
-#ifndef DISABLE_PG_G
- testl $CPUID_PGE, R(cpu_feature)
- jz 2f
- movl $PG_G, R(pgeflag)
- movl %cr4, %eax
- orl $CR4_PGE, %eax
- movl %eax, %cr4
-2:
-#endif
-
-/*
- * Write page tables for the kernel starting at btext and
- * until the end. Make sure to map read+write. We do this even
- * if we've enabled PSE above, we'll just switch the corresponding kernel
- * PDEs before we turn on paging.
- *
- * XXX: We waste some pages here in the PSE case! DON'T BLINDLY REMOVE
- * THIS! SMP needs the page table to be there to map the kernel P==V.
- */
- movl $R(btext),%eax
- addl $PAGE_MASK, %eax
- andl $~PAGE_MASK, %eax
- movl $PG_RW,%edx
- movl R(KERNend),%ecx
- subl %eax,%ecx
- shrl $PAGE_SHIFT,%ecx
- fillkptphys(%edx)
-
-/* Map page directory. */
- movl R(IdlePTD), %eax
- movl $NPGPTD, %ecx
- fillkptphys($PG_RW)
-
-/* Map proc0's UPAGES in the physical way ... */
- movl R(p0upa), %eax
- movl $(UAREA_PAGES), %ecx
- fillkptphys($PG_RW)
-
-/* Map proc0's KSTACK in the physical way ... */
- movl R(p0kpa), %eax
- movl $(KSTACK_PAGES), %ecx
- fillkptphys($PG_RW)
-
-/* Map ISA hole */
- movl $ISA_HOLE_START, %eax
- movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
- fillkptphys($PG_RW)
-#if 0
-/* Map space for the vm86 region */
- movl R(vm86phystk), %eax
- movl $4, %ecx
- fillkptphys($PG_RW)
-
-/* Map page 0 into the vm86 page table */
- movl $0, %eax
- movl $0, %ebx
- movl $1, %ecx
- fillkpt(R(vm86pa), $PG_RW|PG_U)
-
-/* ...likewise for the ISA hole */
- movl $ISA_HOLE_START, %eax
- movl $ISA_HOLE_START>>PAGE_SHIFT, %ebx
- movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
- fillkpt(R(vm86pa), $PG_RW|PG_U)
-#endif
-#ifdef SMP
-/* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */
- movl R(cpu0pp), %eax
- movl $1, %ecx
- fillkptphys($PG_RW)
-
-/* Map SMP page table page into global kmem FWIW */
- movl R(SMPptpa), %eax
- movl $1, %ecx
- fillkptphys($PG_RW)
-
-/* Map the private page into the SMP page table */
- movl R(cpu0pp), %eax
- movl $0, %ebx /* pte offset = 0 */
- movl $1, %ecx /* one private page coming right up */
- fillkpt(R(SMPptpa), $PG_RW)
-
-/* ... and put the page table table in the pde. */
- movl R(SMPptpa), %eax
- movl $MPPTDI, %ebx
- movl $1, %ecx
- fillkpt(R(IdlePTD), $PG_RW)
-
-/* Fakeup VA for the local apic to allow early traps. */
- ALLOCPAGES(1)
- movl %esi, %eax
- movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */
- movl $1, %ecx /* one private pt coming right up */
- fillkpt(R(SMPptpa), $PG_RW)
-#endif /* SMP */
-
-/* install a pde for temporary double map of bottom of VA */
- movl R(KPTphys), %eax
- xorl %ebx, %ebx
- movl $NKPT, %ecx
- fillkpt(R(IdlePTD), $PG_RW)
-
-/*
- * For the non-PSE case, install PDEs for PTs covering the kernel.
- * For the PSE case, do the same, but clobber the ones corresponding
- * to the kernel (from btext to KERNend) with 4M ('PS') PDEs immediately
- * after.
- */
- movl R(KPTphys), %eax
- movl $KPTDI, %ebx
- movl $NKPT, %ecx
- fillkpt(R(IdlePTD), $PG_RW)
- cmpl $0,R(pseflag)
- je done_pde
-
- movl R(KERNend), %ecx
- movl $KERNLOAD, %eax
- subl %eax, %ecx
- shrl $PDRSHIFT, %ecx
- movl $(KPTDI+(KERNLOAD/(1 << PDRSHIFT))), %ebx
- shll $PDESHIFT, %ebx
- addl R(IdlePTD), %ebx
- orl $(PG_V|PG_RW|PG_PS), %eax
-1: movl %eax, (%ebx)
- addl $(1 << PDRSHIFT), %eax
- addl $PDESIZE, %ebx
- loop 1b
-
-done_pde:
-/* install a pde recursively mapping page directory as a page table */
- movl R(IdlePTD), %eax
- movl $PTDPTDI, %ebx
- movl $NPGPTD,%ecx
- fillkpt(R(IdlePTD), $PG_RW)
-
-#ifdef PAE
- movl R(IdlePTD), %eax
- xorl %ebx, %ebx
- movl $NPGPTD, %ecx
- fillkpt(R(IdlePDPT), $0x0)
-#endif
-
- ret
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,2466 +0,0 @@
-/*-
- * Copyright (c) 1992 Terrence R. Lambert.
- * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.584 2003/12/03 21:12:09
jhb Exp $");
-
-#include "opt_apic.h"
-#include "opt_atalk.h"
-#include "opt_compat.h"
-#include "opt_cpu.h"
-#include "opt_ddb.h"
-#include "opt_inet.h"
-#include "opt_ipx.h"
-#include "opt_isa.h"
-#include "opt_kstack_pages.h"
-#include "opt_maxmem.h"
-#include "opt_msgbuf.h"
-#include "opt_npx.h"
-#include "opt_perfmon.h"
-#include "opt_xen.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/sysproto.h>
-#include <sys/signalvar.h>
-#include <sys/imgact.h>
-#include <sys/kdb.h>
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/linker.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/memrange.h>
-#include <sys/mutex.h>
-#include <sys/pcpu.h>
-#include <sys/proc.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/reboot.h>
-#include <sys/callout.h>
-#include <sys/msgbuf.h>
-#include <sys/sched.h>
-#include <sys/sysent.h>
-#include <sys/sysctl.h>
-#include <sys/smp.h>
-#include <sys/ucontext.h>
-#include <sys/vmmeter.h>
-#include <sys/bus.h>
-#include <sys/eventhandler.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_pager.h>
-#include <vm/vm_extern.h>
-
-#include <sys/user.h>
-#include <sys/exec.h>
-#include <sys/cons.h>
-
-#ifdef DDB
-#ifndef KDB
-#error KDB must be enabled in order for DDB to work!
-#endif
-#include <ddb/ddb.h>
-#include <ddb/db_sym.h>
-#endif
-
-#include <net/netisr.h>
-
-#include <machine/cpu.h>
-#include <machine/cputypes.h>
-#include <machine/reg.h>
-#include <machine/clock.h>
-#include <machine/specialreg.h>
-#include <machine/bootinfo.h>
-#include <machine/intr_machdep.h>
-#include <machine/md_var.h>
-#include <machine/pc/bios.h>
-#include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
-#include <machine/proc.h>
-#ifdef PERFMON
-#include <machine/perfmon.h>
-#endif
-#ifdef SMP
-#include <machine/privatespace.h>
-#include <machine/smp.h>
-#endif
-
-#ifdef DEV_ISA
-#include <i386/isa/icu.h>
-#endif
-
-#include <isa/rtc.h>
-#include <sys/ptrace.h>
-#include <machine/sigframe.h>
-
-
-/* XEN includes */
-#include <machine/hypervisor-ifs.h>
-#include <machine/xen-os.h>
-#include <machine/hypervisor.h>
-#include <machine/xenfunc.h>
-#include <machine/xenvar.h>
-#include <machine/xen_intr.h>
-
-void Xhypervisor_callback(void);
-void failsafe_callback(void);
-
-/***************/
-
-
-/* Sanity check for __curthread() */
-CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
-
-extern void init386(void);
-extern void dblfault_handler(void);
-
-extern void printcpuinfo(void); /* XXX header file */
-extern void finishidentcpu(void);
-extern void panicifcpuunsupported(void);
-extern void initializecpu(void);
-void initvalues(start_info_t *startinfo);
-
-#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
-#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) ==
0)
-
-#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
-#define CPU_ENABLE_SSE
-#endif
-#if defined(CPU_DISABLE_SSE)
-#undef CPU_ENABLE_SSE
-#endif
-
-static void cpu_startup(void *);
-static void fpstate_drop(struct thread *td);
-static void get_fpcontext(struct thread *td, mcontext_t *mcp);
-static int set_fpcontext(struct thread *td, const mcontext_t *mcp);
-#ifdef CPU_ENABLE_SSE
-static void set_fpregs_xmm(struct save87 *, struct savexmm *);
-static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
-#endif /* CPU_ENABLE_SSE */
-SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
-
-#ifdef DDB
-extern vm_offset_t ksym_start, ksym_end;
-#endif
-
-int _udatasel, _ucodesel;
-u_int basemem;
-
-start_info_t *xen_start_info;
-unsigned long *xen_phys_machine;
-int xendebug_flags;
-int init_first = 0;
-int cold = 1;
-
-#ifdef COMPAT_43
-static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code);
-#endif
-#ifdef COMPAT_FREEBSD4
-static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask,
- u_long code);
-#endif
-
-long Maxmem = 0;
-
-vm_paddr_t phys_avail[10];
-
-/* must be 2 less so 0 0 can signal end of chunks */
-#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
-
-struct kva_md_info kmi;
-
-static struct trapframe proc0_tf;
-#ifndef SMP
-static struct pcpu __pcpu;
-#endif
-struct mtx icu_lock;
-
-struct mem_range_softc mem_range_softc;
-
-static void
-cpu_startup(void *dummy)
-{
- /*
- * Good {morning,afternoon,evening,night}.
- */
- startrtclock();
-
- printcpuinfo();
- panicifcpuunsupported();
-#ifdef PERFMON
- perfmon_init();
-#endif
- printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem),
- ptoa((uintmax_t)Maxmem) / 1048576);
- /*
- * Display any holes after the first chunk of extended memory.
- */
- if (bootverbose) {
- int indx;
-
- printf("Physical memory chunk(s):\n");
- for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
- vm_paddr_t size;
-
- size = phys_avail[indx + 1] - phys_avail[indx];
- printf(
- "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
- (uintmax_t)phys_avail[indx],
- (uintmax_t)phys_avail[indx + 1] - 1,
- (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
- }
- }
-
- vm_ksubmap_init(&kmi);
-
- printf("avail memory = %ju (%ju MB)\n",
- ptoa((uintmax_t)cnt.v_free_count),
- ptoa((uintmax_t)cnt.v_free_count) / 1048576);
-
- /*
- * Set up buffers, so they can be used to read disk labels.
- */
- bufinit();
- vm_pager_bufferinit();
-
- cpu_setregs();
-
-}
-
-/*
- * Send an interrupt to process.
- *
- * Stack is set up to allow sigcode stored
- * at top to call routine, followed by kcall
- * to sigreturn routine below. After sigreturn
- * resets the signal mask, the stack, and the
- * frame pointer, it returns to the user
- * specified pc, psl.
- */
-#ifdef COMPAT_43
-static void
-osendsig(catcher, sig, mask, code)
- sig_t catcher;
- int sig;
- sigset_t *mask;
- u_long code;
-{
- struct osigframe sf, *fp;
- struct proc *p;
- struct thread *td;
- struct sigacts *psp;
- struct trapframe *regs;
- int oonstack;
-
- td = curthread;
- p = td->td_proc;
- PROC_LOCK_ASSERT(p, MA_OWNED);
- psp = p->p_sigacts;
- mtx_assert(&psp->ps_mtx, MA_OWNED);
- regs = td->td_frame;
- oonstack = sigonstack(regs->tf_esp);
-
- /* Allocate space for the signal handler context. */
- if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
- SIGISMEMBER(psp->ps_sigonstack, sig)) {
- fp = (struct osigframe *)(td->td_sigstk.ss_sp +
- td->td_sigstk.ss_size - sizeof(struct osigframe));
-#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
- td->td_sigstk.ss_flags |= SS_ONSTACK;
-#endif
- } else
- fp = (struct osigframe *)regs->tf_esp - 1;
-
- /* Translate the signal if appropriate. */
- if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
- sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
-
- /* Build the argument list for the signal handler. */
- sf.sf_signum = sig;
- sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
- if (SIGISMEMBER(psp->ps_siginfo, sig)) {
- /* Signal handler installed with SA_SIGINFO. */
- sf.sf_arg2 = (register_t)&fp->sf_siginfo;
- sf.sf_siginfo.si_signo = sig;
- sf.sf_siginfo.si_code = code;
- sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
- } else {
- /* Old FreeBSD-style arguments. */
- sf.sf_arg2 = code;
- sf.sf_addr = regs->tf_err;
- sf.sf_ahu.sf_handler = catcher;
- }
- mtx_unlock(&psp->ps_mtx);
- PROC_UNLOCK(p);
-
- /* Save most if not all of trap frame. */
- sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
- sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
- sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
- sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
- sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
- sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
- sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
- sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
- sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
- sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
- sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
- sf.sf_siginfo.si_sc.sc_gs = rgs();
- sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
-
- /* Build the signal context to be used by osigreturn(). */
- sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
- SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
- sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
- sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
- sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
- sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
- sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
- sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
-
- /*
- * Copy the sigframe out to the user's stack.
- */
- if (copyout(&sf, fp, sizeof(*fp)) != 0) {
-#ifdef DEBUG
- printf("process %ld has trashed its stack\n", (long)p->p_pid);
-#endif
- PROC_LOCK(p);
- sigexit(td, SIGILL);
- }
-
- regs->tf_esp = (int)fp;
- regs->tf_eip = PS_STRINGS - szosigcode;
- regs->tf_eflags &= ~PSL_T;
- regs->tf_cs = _ucodesel;
- regs->tf_ds = _udatasel;
- regs->tf_es = _udatasel;
- regs->tf_fs = _udatasel;
- load_gs(_udatasel);
- regs->tf_ss = _udatasel;
- PROC_LOCK(p);
- mtx_lock(&psp->ps_mtx);
-}
-#endif /* COMPAT_43 */
-
-#ifdef COMPAT_FREEBSD4
-static void
-freebsd4_sendsig(catcher, sig, mask, code)
- sig_t catcher;
- int sig;
- sigset_t *mask;
- u_long code;
-{
- struct sigframe4 sf, *sfp;
- struct proc *p;
- struct thread *td;
- struct sigacts *psp;
- struct trapframe *regs;
- int oonstack;
-
- td = curthread;
- p = td->td_proc;
- PROC_LOCK_ASSERT(p, MA_OWNED);
- psp = p->p_sigacts;
- mtx_assert(&psp->ps_mtx, MA_OWNED);
- regs = td->td_frame;
- oonstack = sigonstack(regs->tf_esp);
-
- /* Save user context. */
- bzero(&sf, sizeof(sf));
- sf.sf_uc.uc_sigmask = *mask;
- sf.sf_uc.uc_stack = td->td_sigstk;
- sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
- ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
- sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
- sf.sf_uc.uc_mcontext.mc_gs = rgs();
- bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
-
- /* Allocate space for the signal handler context. */
- if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
- SIGISMEMBER(psp->ps_sigonstack, sig)) {
- sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp +
- td->td_sigstk.ss_size - sizeof(struct sigframe4));
-#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
- td->td_sigstk.ss_flags |= SS_ONSTACK;
-#endif
- } else
- sfp = (struct sigframe4 *)regs->tf_esp - 1;
-
- /* Translate the signal if appropriate. */
- if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
- sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
-
- /* Build the argument list for the signal handler. */
- sf.sf_signum = sig;
- sf.sf_ucontext = (register_t)&sfp->sf_uc;
- if (SIGISMEMBER(psp->ps_siginfo, sig)) {
- /* Signal handler installed with SA_SIGINFO. */
- sf.sf_siginfo = (register_t)&sfp->sf_si;
- sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
-
- /* Fill in POSIX parts */
- sf.sf_si.si_signo = sig;
- sf.sf_si.si_code = code;
- sf.sf_si.si_addr = (void *)regs->tf_err;
- } else {
- /* Old FreeBSD-style arguments. */
- sf.sf_siginfo = code;
- sf.sf_addr = regs->tf_err;
- sf.sf_ahu.sf_handler = catcher;
- }
- mtx_unlock(&psp->ps_mtx);
- PROC_UNLOCK(p);
-
- /*
- * Copy the sigframe out to the user's stack.
- */
- if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
-#ifdef DEBUG
- printf("process %ld has trashed its stack\n", (long)p->p_pid);
-#endif
- PROC_LOCK(p);
- sigexit(td, SIGILL);
- }
-
- regs->tf_esp = (int)sfp;
- regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode;
- regs->tf_eflags &= ~PSL_T;
- regs->tf_cs = _ucodesel;
- regs->tf_ds = _udatasel;
- regs->tf_es = _udatasel;
- regs->tf_fs = _udatasel;
- regs->tf_ss = _udatasel;
- PROC_LOCK(p);
- mtx_lock(&psp->ps_mtx);
-}
-#endif /* COMPAT_FREEBSD4 */
-
-void
-sendsig(catcher, sig, mask, code)
- sig_t catcher;
- int sig;
- sigset_t *mask;
- u_long code;
-{
- struct sigframe sf, *sfp;
- struct proc *p;
- struct thread *td;
- struct sigacts *psp;
- char *sp;
- struct trapframe *regs;
- int oonstack;
-
- td = curthread;
- p = td->td_proc;
- PROC_LOCK_ASSERT(p, MA_OWNED);
- psp = p->p_sigacts;
- mtx_assert(&psp->ps_mtx, MA_OWNED);
-#ifdef COMPAT_FREEBSD4
- if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
- freebsd4_sendsig(catcher, sig, mask, code);
- return;
- }
-#endif
-#ifdef COMPAT_43
- if (SIGISMEMBER(psp->ps_osigset, sig)) {
- osendsig(catcher, sig, mask, code);
- return;
- }
-#endif
- regs = td->td_frame;
- oonstack = sigonstack(regs->tf_esp);
-
- /* Save user context. */
- bzero(&sf, sizeof(sf));
- sf.sf_uc.uc_sigmask = *mask;
- sf.sf_uc.uc_stack = td->td_sigstk;
- sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
- ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
- sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
- sf.sf_uc.uc_mcontext.mc_gs = rgs();
- bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
- sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
- get_fpcontext(td, &sf.sf_uc.uc_mcontext);
- fpstate_drop(td);
-
- /* Allocate space for the signal handler context. */
- if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
- SIGISMEMBER(psp->ps_sigonstack, sig)) {
- sp = td->td_sigstk.ss_sp +
- td->td_sigstk.ss_size - sizeof(struct sigframe);
-#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
- td->td_sigstk.ss_flags |= SS_ONSTACK;
-#endif
- } else
- sp = (char *)regs->tf_esp - sizeof(struct sigframe);
- /* Align to 16 bytes. */
- sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
-
- /* Translate the signal if appropriate. */
- if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
- sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
-
- /* Build the argument list for the signal handler. */
- sf.sf_signum = sig;
- sf.sf_ucontext = (register_t)&sfp->sf_uc;
- if (SIGISMEMBER(psp->ps_siginfo, sig)) {
- /* Signal handler installed with SA_SIGINFO. */
- sf.sf_siginfo = (register_t)&sfp->sf_si;
- sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
-
- /* Fill in POSIX parts */
- sf.sf_si.si_signo = sig;
- sf.sf_si.si_code = code;
- sf.sf_si.si_addr = (void *)regs->tf_err;
- } else {
- /* Old FreeBSD-style arguments. */
- sf.sf_siginfo = code;
- sf.sf_addr = regs->tf_err;
- sf.sf_ahu.sf_handler = catcher;
- }
- mtx_unlock(&psp->ps_mtx);
- PROC_UNLOCK(p);
- /*
- * Copy the sigframe out to the user's stack.
- */
- if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
-#ifdef DEBUG
- printf("process %ld has trashed its stack\n", (long)p->p_pid);
-#endif
- PROC_LOCK(p);
- sigexit(td, SIGILL);
- }
-
- regs->tf_esp = (int)sfp;
- regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
- regs->tf_eflags &= ~PSL_T;
- regs->tf_cs = _ucodesel;
- regs->tf_ds = _udatasel;
- regs->tf_es = _udatasel;
- regs->tf_fs = _udatasel;
- regs->tf_ss = _udatasel;
- PROC_LOCK(p);
- mtx_lock(&psp->ps_mtx);
-}
-
-/*
- * Build siginfo_t for SA thread
- */
-void
-cpu_thread_siginfo(int sig, u_long code, siginfo_t *si)
-{
- struct proc *p;
- struct thread *td;
-
- td = curthread;
- p = td->td_proc;
- PROC_LOCK_ASSERT(p, MA_OWNED);
-
- bzero(si, sizeof(*si));
- si->si_signo = sig;
- si->si_code = code;
- si->si_addr = (void *)td->td_frame->tf_err;
- /* XXXKSE fill other fields */
-}
-
-/*
- * System call to cleanup state after a signal
- * has been taken. Reset signal mask and
- * stack state from context left by sendsig (above).
- * Return to previous pc and psl as specified by
- * context left by sendsig. Check carefully to
- * make sure that the user has not modified the
- * state to gain improper privileges.
- *
- * MPSAFE
- */
-#ifdef COMPAT_43
-int
-osigreturn(td, uap)
- struct thread *td;
- struct osigreturn_args /* {
- struct osigcontext *sigcntxp;
- } */ *uap;
-{
- struct osigcontext sc;
- struct trapframe *regs;
- struct osigcontext *scp;
- struct proc *p = td->td_proc;
- int eflags, error;
-
- regs = td->td_frame;
- error = copyin(uap->sigcntxp, &sc, sizeof(sc));
- if (error != 0)
- return (error);
- scp = ≻
- eflags = scp->sc_ps;
- /*
- * Don't allow users to change privileged or reserved flags.
- */
- /*
- * XXX do allow users to change the privileged flag PSL_RF.
- * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
- * should sometimes set it there too. tf_eflags is kept in
- * the signal context during signal handling and there is no
- * other place to remember it, so the PSL_RF bit may be
- * corrupted by the signal handler without us knowing.
- * Corruption of the PSL_RF bit at worst causes one more or
- * one less debugger trap, so allowing it is fairly harmless.
- */
- if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
- return (EINVAL);
- }
-
- /*
- * Don't allow users to load a valid privileged %cs. Let the
- * hardware check for invalid selectors, excess privilege in
- * other selectors, invalid %eip's and invalid %esp's.
- */
- if (!CS_SECURE(scp->sc_cs)) {
- trapsignal(td, SIGBUS, T_PROTFLT);
- return (EINVAL);
- }
- regs->tf_ds = scp->sc_ds;
- regs->tf_es = scp->sc_es;
- regs->tf_fs = scp->sc_fs;
-
- /* Restore remaining registers. */
- regs->tf_eax = scp->sc_eax;
- regs->tf_ebx = scp->sc_ebx;
- regs->tf_ecx = scp->sc_ecx;
- regs->tf_edx = scp->sc_edx;
- regs->tf_esi = scp->sc_esi;
- regs->tf_edi = scp->sc_edi;
- regs->tf_cs = scp->sc_cs;
- regs->tf_ss = scp->sc_ss;
- regs->tf_isp = scp->sc_isp;
- regs->tf_ebp = scp->sc_fp;
- regs->tf_esp = scp->sc_sp;
- regs->tf_eip = scp->sc_pc;
- regs->tf_eflags = eflags;
-
- PROC_LOCK(p);
-#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
- if (scp->sc_onstack & 1)
- td->td_sigstk.ss_flags |= SS_ONSTACK;
- else
- td->td_sigstk.ss_flags &= ~SS_ONSTACK;
-#endif
- SIGSETOLD(td->td_sigmask, scp->sc_mask);
- SIG_CANTMASK(td->td_sigmask);
- signotify(td);
- PROC_UNLOCK(p);
- return (EJUSTRETURN);
-}
-#endif /* COMPAT_43 */
-
-#ifdef COMPAT_FREEBSD4
-/*
- * MPSAFE
- */
-int
-freebsd4_sigreturn(td, uap)
- struct thread *td;
- struct freebsd4_sigreturn_args /* {
- const ucontext4 *sigcntxp;
- } */ *uap;
-{
- struct ucontext4 uc;
- struct proc *p = td->td_proc;
- struct trapframe *regs;
- const struct ucontext4 *ucp;
- int cs, eflags, error;
-
- error = copyin(uap->sigcntxp, &uc, sizeof(uc));
- if (error != 0)
- return (error);
- ucp = &uc;
- regs = td->td_frame;
- eflags = ucp->uc_mcontext.mc_eflags;
- /*
- * Don't allow users to change privileged or reserved flags.
- */
- /*
- * XXX do allow users to change the privileged flag PSL_RF.
- * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
- * should sometimes set it there too. tf_eflags is kept in
- * the signal context during signal handling and there is no
- * other place to remember it, so the PSL_RF bit may be
- * corrupted by the signal handler without us knowing.
- * Corruption of the PSL_RF bit at worst causes one more or
- * one less debugger trap, so allowing it is fairly harmless.
- */
- if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
- printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags);
- return (EINVAL);
- }
-
- /*
- * Don't allow users to load a valid privileged %cs. Let the
- * hardware check for invalid selectors, excess privilege in
- * other selectors, invalid %eip's and invalid %esp's.
- */
- cs = ucp->uc_mcontext.mc_cs;
- if (!CS_SECURE(cs)) {
- printf("freebsd4_sigreturn: cs = 0x%x\n", cs);
- trapsignal(td, SIGBUS, T_PROTFLT);
- return (EINVAL);
- }
-
- bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
-
- PROC_LOCK(p);
-#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
- if (ucp->uc_mcontext.mc_onstack & 1)
- td->td_sigstk.ss_flags |= SS_ONSTACK;
- else
- td->td_sigstk.ss_flags &= ~SS_ONSTACK;
-#endif
-
- td->td_sigmask = ucp->uc_sigmask;
- SIG_CANTMASK(td->td_sigmask);
- signotify(td);
- PROC_UNLOCK(p);
- return (EJUSTRETURN);
-}
-#endif /* COMPAT_FREEBSD4 */
-
-/*
- * MPSAFE
- */
-int
-sigreturn(td, uap)
- struct thread *td;
- struct sigreturn_args /* {
- const __ucontext *sigcntxp;
- } */ *uap;
-{
- ucontext_t uc;
- struct proc *p = td->td_proc;
- struct trapframe *regs;
- const ucontext_t *ucp;
- int cs, eflags, error, ret;
-
- error = copyin(uap->sigcntxp, &uc, sizeof(uc));
- if (error != 0)
- return (error);
- ucp = &uc;
- regs = td->td_frame;
- eflags = ucp->uc_mcontext.mc_eflags;
- /*
- * Don't allow users to change privileged or reserved flags.
- */
- /*
- * XXX do allow users to change the privileged flag PSL_RF.
- * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
- * should sometimes set it there too. tf_eflags is kept in
- * the signal context during signal handling and there is no
- * other place to remember it, so the PSL_RF bit may be
- * corrupted by the signal handler without us knowing.
- * Corruption of the PSL_RF bit at worst causes one more or
- * one less debugger trap, so allowing it is fairly harmless.
- */
-#if 0
- if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
- __asm__("int $0x3");
- printf("sigreturn: eflags = 0x%x\n", eflags);
- return (EINVAL);
- }
-#endif
- /*
- * Don't allow users to load a valid privileged %cs. Let the
- * hardware check for invalid selectors, excess privilege in
- * other selectors, invalid %eip's and invalid %esp's.
- */
- cs = ucp->uc_mcontext.mc_cs;
- if (!CS_SECURE(cs)) {
- __asm__("int $0x3");
- printf("sigreturn: cs = 0x%x\n", cs);
- trapsignal(td, SIGBUS, T_PROTFLT);
- return (EINVAL);
- }
-
- ret = set_fpcontext(td, &ucp->uc_mcontext);
- if (ret != 0)
- return (ret);
- bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
- PROC_LOCK(p);
-#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
- if (ucp->uc_mcontext.mc_onstack & 1)
- td->td_sigstk.ss_flags |= SS_ONSTACK;
- else
- td->td_sigstk.ss_flags &= ~SS_ONSTACK;
-#endif
-
- td->td_sigmask = ucp->uc_sigmask;
- SIG_CANTMASK(td->td_sigmask);
- signotify(td);
- PROC_UNLOCK(p);
- return (EJUSTRETURN);
-}
-
-/*
- * Machine dependent boot() routine
- *
- * I haven't seen anything to put here yet
- * Possibly some stuff might be grafted back here from boot()
- */
-void
-cpu_boot(int howto)
-{
-}
-
-/*
- * Shutdown the CPU as much as possible
- */
-void
-cpu_halt(void)
-{
- HYPERVISOR_shutdown();
-}
-
-/*
- * Hook to idle the CPU when possible. In the SMP case we default to
- * off because a halted cpu will not currently pick up a new thread in the
- * run queue until the next timer tick. If turned on this will result in
- * approximately a 4.2% loss in real time performance in buildworld tests
- * (but improves user and sys times oddly enough), and saves approximately
- * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
- *
- * XXX we need to have a cpu mask of idle cpus and generate an IPI or
- * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
- * Then we can have our cake and eat it too.
- *
- * XXX I'm turning it on for SMP as well by default for now. It seems to
- * help lock contention somewhat, and this is critical for HTT. -Peter
- */
-static int cpu_idle_hlt = 1;
-SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
- &cpu_idle_hlt, 0, "Idle loop HLT enable");
-
-static void
-cpu_idle_default(void)
-{
- idle_block();
- enable_intr();
-}
-
-/*
- * Note that we have to be careful here to avoid a race between checking
- * sched_runnable() and actually halting. If we don't do this, we may waste
- * the time between calling hlt and the next interrupt even though there
- * is a runnable process.
- */
-void
-cpu_idle(void)
-{
-
-#ifdef SMP
- if (mp_grab_cpu_hlt())
- return;
-#endif
-
- if (cpu_idle_hlt) {
- disable_intr();
- if (sched_runnable())
- enable_intr();
- else
- (*cpu_idle_hook)();
- }
-}
-
-/* Other subsystems (e.g., ACPI) can hook this later. */
-void (*cpu_idle_hook)(void) = cpu_idle_default;
-
-/*
- * Clear registers on exec
- */
-void
-exec_setregs(td, entry, stack, ps_strings)
- struct thread *td;
- u_long entry;
- u_long stack;
- u_long ps_strings;
-{
- struct trapframe *regs = td->td_frame;
- struct pcb *pcb = td->td_pcb;
-
- /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
- pcb->pcb_gs = _udatasel;
- load_gs(_udatasel);
-
- if (td->td_proc->p_md.md_ldt)
- user_ldt_free(td);
-
- bzero((char *)regs, sizeof(struct trapframe));
- regs->tf_eip = entry;
- regs->tf_esp = stack;
- regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
- regs->tf_ss = _udatasel;
- regs->tf_ds = _udatasel;
- regs->tf_es = _udatasel;
- regs->tf_fs = _udatasel;
- regs->tf_cs = _ucodesel;
-
- /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */
- regs->tf_ebx = ps_strings;
-
- /*
- * Reset the hardware debug registers if they were in use.
- * They won't have any meaning for the newly exec'd process.
- */
- if (pcb->pcb_flags & PCB_DBREGS) {
- pcb->pcb_dr0 = 0;
- pcb->pcb_dr1 = 0;
- pcb->pcb_dr2 = 0;
- pcb->pcb_dr3 = 0;
- pcb->pcb_dr6 = 0;
- pcb->pcb_dr7 = 0;
- if (pcb == PCPU_GET(curpcb)) {
- /*
- * Clear the debug registers on the running
- * CPU, otherwise they will end up affecting
- * the next process we switch to.
- */
- reset_dbregs();
- }
- pcb->pcb_flags &= ~PCB_DBREGS;
- }
-
- /*
- * Initialize the math emulator (if any) for the current process.
- * Actually, just clear the bit that says that the emulator has
- * been initialized. Initialization is delayed until the process
- * traps to the emulator (if it is done at all) mainly because
- * emulators don't provide an entry point for initialization.
- */
- td->td_pcb->pcb_flags &= ~FP_SOFTFP;
-
- /* Initialize the npx (if any) for the current process. */
- /*
- * XXX the above load_cr0() also initializes it and is a layering
- * violation if NPX is configured. It drops the npx partially
- * and this would be fatal if we were interrupted now, and decided
- * to force the state to the pcb, and checked the invariant
- * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL).
- * ALL of this can happen except the check. The check used to
- * happen and be fatal later when we didn't complete the drop
- * before returning to user mode. This should be fixed properly
- * soon.
- */
- fpstate_drop(td);
-
- /*
- * XXX - Linux emulator
- * Make sure sure edx is 0x0 on entry. Linux binaries depend
- * on it.
- */
- td->td_retval[1] = 0;
-}
-
-void
-cpu_setregs(void)
-{
- /* nothing for Xen to do */
-}
-
-static int
-sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
-{
- int error;
- error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
- req);
- if (!error && req->newptr)
- resettodr();
- return (error);
-}
-
-SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
- &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
-
-SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
- CTLFLAG_RW, &disable_rtc_set, 0, "");
-
-SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
- CTLFLAG_RD, &bootinfo, bootinfo, "");
-
-u_long bootdev; /* not a dev_t - encoding is different */
-SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
- CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev
*format)");
-
-/*
- * Initialize 386 and configure to run kernel
- */
-
-/*
- * Initialize segments & interrupt table
- */
-
-int _default_ldt;
-union descriptor *gdt; /* global descriptor table */
-static struct gate_descriptor idt0[NIDT];
-struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
-union descriptor *ldt; /* local descriptor table */
-struct region_descriptor r_idt; /* table descriptors */
-
-int private_tss; /* flag indicating private tss */
-
-#if defined(I586_CPU) && !defined(NO_F00F_HACK)
-extern int has_f00f_bug;
-#endif
-
-static struct i386tss dblfault_tss;
-static char dblfault_stack[PAGE_SIZE];
-
-extern struct user *proc0uarea;
-extern vm_offset_t proc0kstack;
-
-
-/* software prototypes -- in more palatable form */
-struct soft_segment_descriptor gdt_segs[] = {
-/* GNULL_SEL 0 Null Descriptor */
-{ 0x0, /* segment base address */
- 0x0, /* length */
- 0, /* segment type */
- SEL_KPL, /* segment descriptor priority level */
- 0, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
-/* GCODE_SEL 1 Code Descriptor for kernel */
-{ 0x0, /* segment base address */
- 0x0, /* length - all address space */
- 0, /* segment type */
- 0, /* segment descriptor priority level */
- 0, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
-
-/* GDATA_SEL 2 Data Descriptor for kernel */
-{ 0x0, /* segment base address */
- 0x0, /* length - all address space */
- 0, /* segment type */
- 0, /* segment descriptor priority level */
- 0, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
-
-/* GPRIV_SEL 3 SMP Per-Processor Private Data Descriptor */
-{ 0x0, /* segment base address */
- 0xfffff, /* length - all address space */
- SDT_MEMRWA, /* segment type */
- SEL_KPL, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 1, /* default 32 vs 16 bit size */
- 1 /* limit granularity (byte/page units)*/ },
-#if 0
-/* GPROC0_SEL 4 Proc 0 Tss Descriptor */
-{
- 0x0, /* segment base address */
- sizeof(struct i386tss)-1,/* length */
- SDT_SYS386TSS, /* segment type */
- 0, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 0, /* unused - default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
-/* GLDT_SEL 5 LDT Descriptor */
-{ (int) ldt, /* segment base address */
- sizeof(ldt)-1, /* length - all address space */
- SDT_SYSLDT, /* segment type */
- SEL_UPL, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 0, /* unused - default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
-/* GUSERLDT_SEL 6 User LDT Descriptor per process */
-{ (int) ldt, /* segment base address */
- (512 * sizeof(union descriptor)-1), /* length */
- SDT_SYSLDT, /* segment type */
- 0, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 0, /* unused - default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
-/* GTGATE_SEL 7 Null Descriptor - Placeholder */
-{ 0x0, /* segment base address */
- 0x0, /* length - all address space */
- 0, /* segment type */
- 0, /* segment descriptor priority level */
- 0, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
-/* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
-{ 0x400, /* segment base address */
- 0xfffff, /* length */
- SDT_MEMRWA, /* segment type */
- 0, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 1, /* default 32 vs 16 bit size */
- 1 /* limit granularity (byte/page units)*/ },
-/* GPANIC_SEL 9 Panic Tss Descriptor */
-{ (int) &dblfault_tss, /* segment base address */
- sizeof(struct i386tss)-1,/* length - all address space */
- SDT_SYS386TSS, /* segment type */
- 0, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 0, /* unused - default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
-/* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
-{ 0, /* segment base address (overwritten) */
- 0xfffff, /* length */
- SDT_MEMERA, /* segment type */
- 0, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 1 /* limit granularity (byte/page units)*/ },
-/* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
-{ 0, /* segment base address (overwritten) */
- 0xfffff, /* length */
- SDT_MEMERA, /* segment type */
- 0, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 1 /* limit granularity (byte/page units)*/ },
-/* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
-{ 0, /* segment base address (overwritten) */
- 0xfffff, /* length */
- SDT_MEMRWA, /* segment type */
- 0, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 1, /* default 32 vs 16 bit size */
- 1 /* limit granularity (byte/page units)*/ },
-/* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
-{ 0, /* segment base address (overwritten) */
- 0xfffff, /* length */
- SDT_MEMRWA, /* segment type */
- 0, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 1 /* limit granularity (byte/page units)*/ },
-/* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
-{ 0, /* segment base address (overwritten) */
- 0xfffff, /* length */
- SDT_MEMRWA, /* segment type */
- 0, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 1 /* limit granularity (byte/page units)*/ },
-#endif
-};
-
-static struct soft_segment_descriptor ldt_segs[] = {
- /* Null Descriptor - overwritten by call gate */
-{ 0x0, /* segment base address */
- 0x0, /* length - all address space */
- 0, /* segment type */
- 0, /* segment descriptor priority level */
- 0, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
- /* Null Descriptor - overwritten by call gate */
-{ 0x0, /* segment base address */
- 0x0, /* length - all address space */
- 0, /* segment type */
- 0, /* segment descriptor priority level */
- 0, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
- /* Null Descriptor - overwritten by call gate */
-{ 0x0, /* segment base address */
- 0x0, /* length - all address space */
- 0, /* segment type */
- 0, /* segment descriptor priority level */
- 0, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
- /* Code Descriptor for user */
-{ 0x0, /* segment base address */
- 0xfffff, /* length - all address space */
- SDT_MEMERA, /* segment type */
- SEL_UPL, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 1, /* default 32 vs 16 bit size */
- 1 /* limit granularity (byte/page units)*/ },
- /* Null Descriptor - overwritten by call gate */
-{ 0x0, /* segment base address */
- 0x0, /* length - all address space */
- 0, /* segment type */
- 0, /* segment descriptor priority level */
- 0, /* segment descriptor present */
- 0, 0,
- 0, /* default 32 vs 16 bit size */
- 0 /* limit granularity (byte/page units)*/ },
- /* Data Descriptor for user */
-{ 0x0, /* segment base address */
- 0xfffff, /* length - all address space */
- SDT_MEMRWA, /* segment type */
- SEL_UPL, /* segment descriptor priority level */
- 1, /* segment descriptor present */
- 0, 0,
- 1, /* default 32 vs 16 bit size */
- 1 /* limit granularity (byte/page units)*/ },
-};
-
-struct proc_ldt default_proc_ldt;
-
-void
-setidt(idx, func, typ, dpl, selec)
- int idx;
- inthand_t *func;
- int typ;
- int dpl;
- int selec;
-{
- struct gate_descriptor *ip;
-
- ip = idt + idx;
- ip->gd_looffset = (int)func;
- ip->gd_selector = selec;
- ip->gd_stkcpy = 0;
- ip->gd_xx = 0;
- ip->gd_type = typ;
- ip->gd_dpl = dpl;
- ip->gd_p = 1;
- ip->gd_hioffset = ((int)func)>>16 ;
-}
-
-#define IDTVEC(name) __CONCAT(X,name)
-
-extern inthand_t
- IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
- IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
- IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
- IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
- IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
-
-#ifdef DDB
-/*
- * Display the index and function name of any IDT entries that don't use
- * the default 'rsvd' entry point.
- */
-DB_SHOW_COMMAND(idt, db_show_idt)
-{
- struct gate_descriptor *ip;
- int idx, quit;
- uintptr_t func;
-
- ip = idt;
- db_setup_paging(db_simple_pager, &quit, DB_LINES_PER_PAGE);
- for (idx = 0, quit = 0; idx < NIDT; idx++) {
- func = (ip->gd_hioffset << 16 | ip->gd_looffset);
- if (func != (uintptr_t)&IDTVEC(rsvd)) {
- db_printf("%3d\t", idx);
- db_printsym(func, DB_STGY_PROC);
- db_printf("\n");
- }
- ip++;
- }
-}
-#endif
-
-void
-sdtossd(sd, ssd)
- struct segment_descriptor *sd;
- struct soft_segment_descriptor *ssd;
-{
- ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase;
- ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
- ssd->ssd_type = sd->sd_type;
- ssd->ssd_dpl = sd->sd_dpl;
- ssd->ssd_p = sd->sd_p;
- ssd->ssd_def32 = sd->sd_def32;
- ssd->ssd_gran = sd->sd_gran;
-}
-
-#define PHYSMAP_SIZE (2 * 8)
-
-/*
- * Populate the (physmap) array with base/bound pairs describing the
- * available physical memory in the system, then test this memory and
- * build the phys_avail array describing the actually-available memory.
- *
- * If we cannot accurately determine the physical memory map, then use
- * value from the 0xE801 call, and failing that, the RTC.
- *
- * Total memory size may be set by the kernel environment variable
- * hw.physmem or the compile-time define MAXMEM.
- *
- * XXX first should be vm_paddr_t.
- */
-static void
-getmemsize(void)
-{
- int i;
- printf("start_info %p\n", xen_start_info);
- printf("start_info->nr_pages %ld\n", xen_start_info->nr_pages);
- Maxmem = xen_start_info->nr_pages - init_first;
- /* call pmap initialization to make new kernel address space */
- pmap_bootstrap((init_first)<< PAGE_SHIFT, 0);
- for (i = 0; i < 10; i++)
- phys_avail[i] = 0;
- physmem = Maxmem;
- avail_end = ptoa(Maxmem) - round_page(MSGBUF_SIZE);
- phys_avail[0] = init_first << PAGE_SHIFT;
- phys_avail[1] = avail_end;
-}
-
-extern unsigned long cpu0prvpage;
-extern unsigned long *SMPpt;
-pteinfo_t *pteinfo_list;
-unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0));
-int preemptable;
-int gdt_set;
-static int ncpus;
-
-/* Linux infection */
-#define PAGE_OFFSET KERNBASE
-#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-void
-initvalues(start_info_t *startinfo)
-{
- int i;
- vm_paddr_t pdir_shadow_ma, KPTphys;
- vm_offset_t *pdir_shadow;
-#ifdef SMP
- int j;
-#endif
-
-#ifdef WRITABLE_PAGETABLES
- printk("using writable pagetables\n");
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
-#endif
-
- xen_start_info = startinfo;
- xen_phys_machine = (unsigned long *)startinfo->mfn_list;
- unsigned long tmpindex = ((__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
xen_start_info->nr_pt_frames) + 3 /* number of pages allocated after the pts +
1*/;
- xendebug_flags = 0xffffffff;
- /* pre-zero unused mapped pages */
- bzero((char *)(KERNBASE + (tmpindex << PAGE_SHIFT)), (1024 -
tmpindex)*PAGE_SIZE);
- IdlePTD = (pd_entry_t *)xpmap_ptom(__pa(startinfo->pt_base));
- KPTphys = xpmap_ptom(__pa(startinfo->pt_base + PAGE_SIZE));
- XENPRINTF("IdlePTD %p\n", IdlePTD);
- XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
- "mod_start: 0x%lx mod_len: 0x%lx\n",
- xen_start_info->nr_pages, xen_start_info->shared_info,
- xen_start_info->flags, xen_start_info->pt_base,
- xen_start_info->mod_start, xen_start_info->mod_len);
-
-
-
-
- /* Map proc0's UPAGES */
- proc0uarea = (struct user *)(KERNBASE + (tmpindex << PAGE_SHIFT));
- tmpindex += UAREA_PAGES;
-
- /* Map proc0's KSTACK */
- proc0kstack = KERNBASE + (tmpindex << PAGE_SHIFT);
- tmpindex += KSTACK_PAGES;
-
- /* allocate page for gdt */
- gdt = (union descriptor *)(KERNBASE + (tmpindex << PAGE_SHIFT));
- tmpindex++;
-
- /* allocate page for ldt */
- ldt = (union descriptor *)(KERNBASE + (tmpindex << PAGE_SHIFT));
- tmpindex++;
-
- /* initialize page directory shadow page */
- pdir_shadow = (vm_offset_t *)(KERNBASE + (tmpindex << PAGE_SHIFT));
- i686_pagezero(pdir_shadow);
- pdir_shadow_ma = xpmap_ptom(tmpindex << PAGE_SHIFT);
- PT_SET_MA(pdir_shadow, pdir_shadow_ma | PG_V | PG_A);
- tmpindex++;
-
- /* setup shadow mapping first so vtomach will work */
- xen_pt_pin((vm_paddr_t)pdir_shadow_ma);
- xen_queue_pt_update((vm_paddr_t)(IdlePTD + PTDPTDI),
- pdir_shadow_ma | PG_V | PG_A | PG_RW | PG_M);
- xen_queue_pt_update(pdir_shadow_ma + PTDPTDI*sizeof(vm_paddr_t),
- ((vm_paddr_t)IdlePTD) | PG_V | PG_A);
- xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t),
- KPTphys | PG_V | PG_A);
-
- xen_flush_queue();
- /* allocate remainder of NKPT pages */
-
-
-#ifdef SMP
-#if 0
- /* allocate cpu0 private page */
- cpu0prvpage = (KERNBASE + (tmpindex << PAGE_SHIFT));
- tmpindex++;
-#endif
- /* allocate SMP page table */
- SMPpt = (unsigned long *)(KERNBASE + (tmpindex << PAGE_SHIFT));
-#if 0
- /* Map the private page into the SMP page table */
- SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A;
-#endif
- /* map SMP page table RO */
- PT_SET_MA(SMPpt, *vtopte((vm_offset_t)SMPpt) & ~PG_RW);
-
- /* put the page table into the page directory */
- xen_queue_pt_update((vm_paddr_t)(IdlePTD + MPPTDI),
- xpmap_ptom((tmpindex << PAGE_SHIFT))| PG_M | PG_RW |
PG_V | PG_A);
- xen_queue_pt_update(pdir_shadow_ma + MPPTDI*sizeof(vm_paddr_t),
- xpmap_ptom((tmpindex << PAGE_SHIFT))| PG_V | PG_A);
- tmpindex++;
-#endif
-
-#ifdef PMAP_DEBUG
- pteinfo_list = (pteinfo_t *)(KERNBASE + (tmpindex << PAGE_SHIFT));
- tmpindex += ((xen_start_info->nr_pages >> 10) + 1)*(1 +
XPQ_CALL_DEPTH*XPQ_CALL_COUNT);
-
- if (tmpindex > 980)
- __asm__("int3");
-#endif
- /* unmap remaining pages from initial 4MB chunk */
- for (i = tmpindex; i%1024 != 0; i++)
- xen_queue_pt_update(KPTphys + i*sizeof(vm_paddr_t), 0);
- xen_flush_queue();
-
- /* allocate remainder of NKPT pages */
- for (i = 0; i < NKPT-1; i++, tmpindex++) {
- xen_queue_pt_update((vm_paddr_t)(IdlePTD + KPTDI + i + 1),
- xpmap_ptom((tmpindex << PAGE_SHIFT)| PG_M | PG_RW |
PG_V | PG_A));
- xen_queue_pt_update(pdir_shadow_ma + (KPTDI + i +
1)*sizeof(vm_paddr_t),
- xpmap_ptom((tmpindex << PAGE_SHIFT)| PG_V | PG_A));
- }
- tmpindex += NKPT-1;
- PT_UPDATES_FLUSH();
-
- HYPERVISOR_shared_info = (shared_info_t *)(KERNBASE + (tmpindex <<
PAGE_SHIFT));
- PT_SET_MA(HYPERVISOR_shared_info,
- xen_start_info->shared_info | PG_A | PG_V | PG_RW | PG_M);
- tmpindex++;
-
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned
long)xen_phys_machine;
- ncpus = HYPERVISOR_shared_info->n_vcpu;
-#ifdef SMP
- for (i = 0; i < ncpus; i++) {
- int npages = (sizeof(struct privatespace) + 1)/PAGE_SIZE;
- for (j = 0; j < npages; j++) {
- vm_paddr_t ma = xpmap_ptom(tmpindex << PAGE_SHIFT);
- tmpindex++;
- PT_SET_VA_MA(SMPpt + i*npages + j, ma | PG_A | PG_V | PG_RW
| PG_M, FALSE);
- }
- }
- xen_flush_queue();
-#endif
-
- init_first = tmpindex;
-
-}
-
-
-trap_info_t trap_table[] = {
- { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
- { 1, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
- { 3, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
- { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
- /* This is UPL on Linux and KPL on BSD */
- { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
- { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
- { 7, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
- /*
- * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
- * no handler for double fault
- */
- { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
- {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
- {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
- {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
- {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
- {14, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
- {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
- {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
- {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
- {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
- {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
- {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long)
&IDTVEC(int0x80_syscall)},
- { 0, 0, 0, 0 }
-};
-
-void
-init386(void)
-{
- int gsel_tss, metadata_missing, off, x, error;
- struct pcpu *pc;
- unsigned long gdtmachpfn;
-#ifdef SMP
- int i;
-#endif
- proc0.p_uarea = proc0uarea;
- thread0.td_kstack = proc0kstack;
- thread0.td_pcb = (struct pcb *)
- (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
-
- /*
- * This may be done better later if it gets more high level
- * components in it. If so just link td->td_proc here.
- */
- proc_linkup(&proc0, &ksegrp0, &thread0);
-
- metadata_missing = 0;
- if (xen_start_info->mod_start)
- preload_metadata = (caddr_t)xen_start_info->mod_start;
- else
- metadata_missing = 1;
-
- /* XXX - temporary hack */
- preload_metadata = (caddr_t)0;
- /* XXX */
-
- if (envmode == 1)
- kern_envp = static_env;
- else if ((caddr_t)xen_start_info->cmd_line)
- kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line);
-
- boothowto |= xen_boothowto(kern_envp);
-
- if (boothowto & RB_GDB_PAUSE)
- __asm__("int $0x3;");
-
- /* Init basic tunables, hz etc */
- init_param1();
- /*
- * make gdt memory segments, the code segment goes up to end of the
- * page with etext in it, the data segment goes to the end of
- * the address space
- */
-#if 0
- /*
- * XEN occupies the upper 64MB of virtual address space
- * At its base it manages an array mapping machine page frames
- * to physical page frames - hence we need to be able to
- * access 4GB - (64MB - 4MB + 64k)
- */
- gdt_segs[GCODE_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 <<
16)));
- gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 <<
16)));
-#endif
-#ifdef SMP
- /* XXX this will blow up if there are more than 512/NGDT vcpus */
- pc = &SMP_prvspace[0].pcpu;
- for (i = 0; i < ncpus; i++) {
- cpu_add(i, (i == 0));
-
- gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[i];
- gdt_segs[GPRIV_SEL].ssd_limit =
- atop(sizeof(struct privatespace) - 1);
- gdt_segs[GPROC0_SEL].ssd_base =
- (int) &SMP_prvspace[i].pcpu.pc_common_tss;
- SMP_prvspace[i].pcpu.pc_prvspace =
- &SMP_prvspace[i].pcpu;
-
- for (x = 0; x < NGDT; x++) {
- ssdtosd(&gdt_segs[x], &gdt[i * NGDT + x].sd);
- }
- }
-#else
- pc = &__pcpu;
- gdt_segs[GPRIV_SEL].ssd_limit =
- atop(sizeof(struct pcpu) - 1);
- gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
- gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
- for (x = 0; x < NGDT; x++)
- ssdtosd(&gdt_segs[x], &gdt[x].sd);
-#endif
-
-
- PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~PG_RW);
- gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
- PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0);
-
-
- lgdt_finish();
- gdt_set = 1;
-
- if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
- panic("set_trap_table failed - error %d\n", error);
- }
- HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), (unsigned
long)Xhypervisor_callback,
- GSEL(GCODE_SEL, SEL_KPL), (unsigned
long)failsafe_callback);
-
-
-
- pcpu_init(pc, 0, sizeof(struct pcpu));
- PCPU_SET(prvspace, pc);
- PCPU_SET(curthread, &thread0);
- PCPU_SET(curpcb, thread0.td_pcb);
- PCPU_SET(pdir, (unsigned long)IdlePTD);
- /*
- * Initialize mutexes.
- *
- */
- mutex_init();
-
- mtx_init(&clock_lock, "clk", NULL, MTX_SPIN);
- mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
-
-
-
- /* make ldt memory segments */
- /*
- * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it
- * should be spelled ...MAX_USER...
- */
- ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
- ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
- for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
- ssdtosd(&ldt_segs[x], &ldt[x].sd);
- default_proc_ldt.ldt_base = (caddr_t)ldt;
- default_proc_ldt.ldt_len = 6;
- _default_ldt = (int)&default_proc_ldt;
- PCPU_SET(currentldt, _default_ldt)
- PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW);
- xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof
ldt_segs[0]));
-
-
- /*
- * Initialize the console before we print anything out.
- */
- cninit();
- if (metadata_missing)
- printf("WARNING: loader(8) metadata is missing!\n");
-
-#ifdef DDB
- ksym_start = bootinfo.bi_symtab;
- ksym_end = bootinfo.bi_esymtab;
-#endif
- kdb_init();
-#ifdef KDB
- if (boothowto & RB_KDB)
- kdb_enter("Boot flags requested debugger");
-#endif
-
- finishidentcpu(); /* Final stage of CPU initialization */
- setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- initializecpu(); /* Initialize CPU registers */
-
- /* make an initial tss so cpu can get interrupt stack on syscall! */
- /* Note: -16 is so we can grow the trapframe if we came from vm86 */
- PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
- KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
-#if 0
- private_tss = 0;
- PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
- PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
- PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
-#endif
- HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
PCPU_GET(common_tss.tss_esp0));
-
-
- dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
- dblfault_tss.tss_esp2 =
(int)&dblfault_stack[sizeof(dblfault_stack)];
- dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
- dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
-
- dblfault_tss.tss_cr3 = (int)IdlePTD;
- dblfault_tss.tss_eip = (int)dblfault_handler;
- dblfault_tss.tss_eflags = PSL_KERNEL;
- dblfault_tss.tss_ds = dblfault_tss.tss_es =
- dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
- dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
- dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
- dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
-
- getmemsize();
- init_param2(physmem);
- /* now running on new page tables, configured,and u/iom is accessible */
- /* Map the message buffer. */
- for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
- pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
- PT_UPDATES_FLUSH();
-
- /* safe to enable xen page queue locking */
-
- msgbufinit(msgbufp, MSGBUF_SIZE);
- /* XXX KMM I don't think we need call gates */
-#if 0
- printf("modify ldt\n");
- /* make a call gate to reenter kernel with */
- gdp = &ldt[LSYS5CALLS_SEL].gd;
-
- x = (int) &IDTVEC(lcall_syscall);
- gdp->gd_looffset = x;
- gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
- gdp->gd_stkcpy = 1;
- gdp->gd_type = SDT_SYS386CGT;
- gdp->gd_dpl = SEL_UPL;
- gdp->gd_p = 1;
- gdp->gd_hioffset = x >> 16;
-
- /* XXX does this work? */
- ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
- ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
-#endif
- /* transfer to user mode */
-
- _ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
- _udatasel = LSEL(LUDATA_SEL, SEL_UPL);
-
- /* setup proc 0's pcb */
- thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
- thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
- thread0.td_pcb->pcb_ext = 0;
- thread0.td_frame = &proc0_tf;
-}
-
-void
-cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
-{
-
- pcpu->pc_acpi_id = 0xffffffff;
-}
-
-/*
- * Construct a PCB from a trapframe. This is called from kdb_trap() where
- * we want to start a backtrace from the function that caused us to enter
- * the debugger. We have the context in the trapframe, but base the trace
- * on the PCB. The PCB doesn't have to be perfect, as long as it contains
- * enough for a backtrace.
- */
-void
-makectx(struct trapframe *tf, struct pcb *pcb)
-{
-
- pcb->pcb_edi = tf->tf_edi;
- pcb->pcb_esi = tf->tf_esi;
- pcb->pcb_ebp = tf->tf_ebp;
- pcb->pcb_ebx = tf->tf_ebx;
- pcb->pcb_eip = tf->tf_eip;
- pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
-}
-
-int
-ptrace_set_pc(struct thread *td, u_long addr)
-{
-
- td->td_frame->tf_eip = addr;
- return (0);
-}
-
-int
-ptrace_single_step(struct thread *td)
-{
- td->td_frame->tf_eflags |= PSL_T;
- return (0);
-}
-
-int
-ptrace_clear_single_step(struct thread *td)
-{
- td->td_frame->tf_eflags &= ~PSL_T;
- return (0);
-}
-
-int
-fill_regs(struct thread *td, struct reg *regs)
-{
- struct pcb *pcb;
- struct trapframe *tp;
-
- tp = td->td_frame;
- regs->r_fs = tp->tf_fs;
- regs->r_es = tp->tf_es;
- regs->r_ds = tp->tf_ds;
- regs->r_edi = tp->tf_edi;
- regs->r_esi = tp->tf_esi;
- regs->r_ebp = tp->tf_ebp;
- regs->r_ebx = tp->tf_ebx;
- regs->r_edx = tp->tf_edx;
- regs->r_ecx = tp->tf_ecx;
- regs->r_eax = tp->tf_eax;
- regs->r_eip = tp->tf_eip;
- regs->r_cs = tp->tf_cs;
- regs->r_eflags = tp->tf_eflags;
- regs->r_esp = tp->tf_esp;
- regs->r_ss = tp->tf_ss;
- pcb = td->td_pcb;
- regs->r_gs = pcb->pcb_gs;
- return (0);
-}
-
-int
-set_regs(struct thread *td, struct reg *regs)
-{
- struct pcb *pcb;
- struct trapframe *tp;
-
- tp = td->td_frame;
- if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
- !CS_SECURE(regs->r_cs))
- return (EINVAL);
- tp->tf_fs = regs->r_fs;
- tp->tf_es = regs->r_es;
- tp->tf_ds = regs->r_ds;
- tp->tf_edi = regs->r_edi;
- tp->tf_esi = regs->r_esi;
- tp->tf_ebp = regs->r_ebp;
- tp->tf_ebx = regs->r_ebx;
- tp->tf_edx = regs->r_edx;
- tp->tf_ecx = regs->r_ecx;
- tp->tf_eax = regs->r_eax;
- tp->tf_eip = regs->r_eip;
- tp->tf_cs = regs->r_cs;
- tp->tf_eflags = regs->r_eflags;
- tp->tf_esp = regs->r_esp;
- tp->tf_ss = regs->r_ss;
- pcb = td->td_pcb;
- pcb->pcb_gs = regs->r_gs;
- return (0);
-}
-
-#ifdef CPU_ENABLE_SSE
-static void
-fill_fpregs_xmm(sv_xmm, sv_87)
- struct savexmm *sv_xmm;
- struct save87 *sv_87;
-{
- register struct env87 *penv_87 = &sv_87->sv_env;
- register struct envxmm *penv_xmm = &sv_xmm->sv_env;
- int i;
-
- bzero(sv_87, sizeof(*sv_87));
-
- /* FPU control/status */
- penv_87->en_cw = penv_xmm->en_cw;
- penv_87->en_sw = penv_xmm->en_sw;
- penv_87->en_tw = penv_xmm->en_tw;
- penv_87->en_fip = penv_xmm->en_fip;
- penv_87->en_fcs = penv_xmm->en_fcs;
- penv_87->en_opcode = penv_xmm->en_opcode;
- penv_87->en_foo = penv_xmm->en_foo;
- penv_87->en_fos = penv_xmm->en_fos;
-
- /* FPU registers */
- for (i = 0; i < 8; ++i)
- sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
-}
-
-static void
-set_fpregs_xmm(sv_87, sv_xmm)
- struct save87 *sv_87;
- struct savexmm *sv_xmm;
-{
- register struct env87 *penv_87 = &sv_87->sv_env;
- register struct envxmm *penv_xmm = &sv_xmm->sv_env;
- int i;
-
- /* FPU control/status */
- penv_xmm->en_cw = penv_87->en_cw;
- penv_xmm->en_sw = penv_87->en_sw;
- penv_xmm->en_tw = penv_87->en_tw;
- penv_xmm->en_fip = penv_87->en_fip;
- penv_xmm->en_fcs = penv_87->en_fcs;
- penv_xmm->en_opcode = penv_87->en_opcode;
- penv_xmm->en_foo = penv_87->en_foo;
- penv_xmm->en_fos = penv_87->en_fos;
-
- /* FPU registers */
- for (i = 0; i < 8; ++i)
- sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
-}
-#endif /* CPU_ENABLE_SSE */
-
-int
-fill_fpregs(struct thread *td, struct fpreg *fpregs)
-{
-#ifdef CPU_ENABLE_SSE
- if (cpu_fxsr) {
- fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
- (struct save87 *)fpregs);
- return (0);
- }
-#endif /* CPU_ENABLE_SSE */
- bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
- return (0);
-}
-
-int
-set_fpregs(struct thread *td, struct fpreg *fpregs)
-{
-#ifdef CPU_ENABLE_SSE
- if (cpu_fxsr) {
- set_fpregs_xmm((struct save87 *)fpregs,
- &td->td_pcb->pcb_save.sv_xmm);
- return (0);
- }
-#endif /* CPU_ENABLE_SSE */
- bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
- return (0);
-}
-
-/*
- * Get machine context.
- */
-int
-get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
-{
- struct trapframe *tp;
-
- tp = td->td_frame;
-
- PROC_LOCK(curthread->td_proc);
- mcp->mc_onstack = sigonstack(tp->tf_esp);
- PROC_UNLOCK(curthread->td_proc);
- mcp->mc_gs = td->td_pcb->pcb_gs;
- mcp->mc_fs = tp->tf_fs;
- mcp->mc_es = tp->tf_es;
- mcp->mc_ds = tp->tf_ds;
- mcp->mc_edi = tp->tf_edi;
- mcp->mc_esi = tp->tf_esi;
- mcp->mc_ebp = tp->tf_ebp;
- mcp->mc_isp = tp->tf_isp;
- if (flags & GET_MC_CLEAR_RET) {
- mcp->mc_eax = 0;
- mcp->mc_edx = 0;
- } else {
- mcp->mc_eax = tp->tf_eax;
- mcp->mc_edx = tp->tf_edx;
- }
- mcp->mc_ebx = tp->tf_ebx;
- mcp->mc_ecx = tp->tf_ecx;
- mcp->mc_eip = tp->tf_eip;
- mcp->mc_cs = tp->tf_cs;
- mcp->mc_eflags = tp->tf_eflags;
- mcp->mc_esp = tp->tf_esp;
- mcp->mc_ss = tp->tf_ss;
- mcp->mc_len = sizeof(*mcp);
- get_fpcontext(td, mcp);
- return (0);
-}
-
-/*
- * Set machine context.
- *
- * However, we don't set any but the user modifiable flags, and we won't
- * touch the cs selector.
- */
-int
-set_mcontext(struct thread *td, const mcontext_t *mcp)
-{
- struct trapframe *tp;
- int eflags, ret;
-
- tp = td->td_frame;
- if (mcp->mc_len != sizeof(*mcp))
- return (EINVAL);
- eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
- (tp->tf_eflags & ~PSL_USERCHANGE);
- if ((ret = set_fpcontext(td, mcp)) == 0) {
- tp->tf_fs = mcp->mc_fs;
- tp->tf_es = mcp->mc_es;
- tp->tf_ds = mcp->mc_ds;
- tp->tf_edi = mcp->mc_edi;
- tp->tf_esi = mcp->mc_esi;
- tp->tf_ebp = mcp->mc_ebp;
- tp->tf_ebx = mcp->mc_ebx;
- tp->tf_edx = mcp->mc_edx;
- tp->tf_ecx = mcp->mc_ecx;
- tp->tf_eax = mcp->mc_eax;
- tp->tf_eip = mcp->mc_eip;
- tp->tf_eflags = eflags;
- tp->tf_esp = mcp->mc_esp;
- tp->tf_ss = mcp->mc_ss;
- td->td_pcb->pcb_gs = mcp->mc_gs;
- ret = 0;
- }
- return (ret);
-}
-
-static void
-get_fpcontext(struct thread *td, mcontext_t *mcp)
-{
-#ifndef DEV_NPX
- mcp->mc_fpformat = _MC_FPFMT_NODEV;
- mcp->mc_ownedfp = _MC_FPOWNED_NONE;
-#else
- union savefpu *addr;
-
- /*
- * XXX mc_fpstate might be misaligned, since its declaration is not
- * unportabilized using __attribute__((aligned(16))) like the
- * declaration of struct savemm, and anyway, alignment doesn't work
- * for auto variables since we don't use gcc's pessimal stack
- * alignment. Work around this by abusing the spare fields after
- * mcp->mc_fpstate.
- *
- * XXX unpessimize most cases by only aligning when fxsave might be
- * called, although this requires knowing too much about
- * npxgetregs()'s internals.
- */
- addr = (union savefpu *)&mcp->mc_fpstate;
- if (td == PCPU_GET(fpcurthread) &&
-#ifdef CPU_ENABLE_SSE
- cpu_fxsr &&
-#endif
- ((uintptr_t)(void *)addr & 0xF)) {
- do
- addr = (void *)((char *)addr + 4);
- while ((uintptr_t)(void *)addr & 0xF);
- }
- mcp->mc_ownedfp = npxgetregs(td, addr);
- if (addr != (union savefpu *)&mcp->mc_fpstate) {
- bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
- bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
- }
- mcp->mc_fpformat = npxformat();
-#endif
-}
-
-static int
-set_fpcontext(struct thread *td, const mcontext_t *mcp)
-{
- union savefpu *addr;
-
- if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
- return (0);
- else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
- mcp->mc_fpformat != _MC_FPFMT_XMM)
- return (EINVAL);
- else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
- /* We don't care what state is left in the FPU or PCB. */
- fpstate_drop(td);
- else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
- mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
- /* XXX align as above. */
- addr = (union savefpu *)&mcp->mc_fpstate;
- if (td == PCPU_GET(fpcurthread) &&
-#ifdef CPU_ENABLE_SSE
- cpu_fxsr &&
-#endif
- ((uintptr_t)(void *)addr & 0xF)) {
- do
- addr = (void *)((char *)addr + 4);
- while ((uintptr_t)(void *)addr & 0xF);
- bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
- }
-#ifdef DEV_NPX
- /*
- * XXX we violate the dubious requirement that npxsetregs()
- * be called with interrupts disabled.
- */
- npxsetregs(td, addr);
-#endif
- /*
- * Don't bother putting things back where they were in the
- * misaligned case, since we know that the caller won't use
- * them again.
- */
- } else
- return (EINVAL);
- return (0);
-}
-
-static void
-fpstate_drop(struct thread *td)
-{
- register_t s;
-
- s = intr_disable();
-#ifdef DEV_NPX
- if (PCPU_GET(fpcurthread) == td)
- npxdrop();
-#endif
- /*
- * XXX force a full drop of the npx. The above only drops it if we
- * owned it. npxgetregs() has the same bug in the !cpu_fxsr case.
- *
- * XXX I don't much like npxgetregs()'s semantics of doing a full
- * drop. Dropping only to the pcb matches fnsave's behaviour.
- * We only need to drop to !PCB_INITDONE in sendsig(). But
- * sendsig() is the only caller of npxgetregs()... perhaps we just
- * have too many layers.
- */
- curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
- intr_restore(s);
-}
-
-int
-fill_dbregs(struct thread *td, struct dbreg *dbregs)
-{
- struct pcb *pcb;
-
- if (td == NULL) {
- dbregs->dr[0] = rdr0();
- dbregs->dr[1] = rdr1();
- dbregs->dr[2] = rdr2();
- dbregs->dr[3] = rdr3();
- dbregs->dr[4] = rdr4();
- dbregs->dr[5] = rdr5();
- dbregs->dr[6] = rdr6();
- dbregs->dr[7] = rdr7();
- } else {
- pcb = td->td_pcb;
- dbregs->dr[0] = pcb->pcb_dr0;
- dbregs->dr[1] = pcb->pcb_dr1;
- dbregs->dr[2] = pcb->pcb_dr2;
- dbregs->dr[3] = pcb->pcb_dr3;
- dbregs->dr[4] = 0;
- dbregs->dr[5] = 0;
- dbregs->dr[6] = pcb->pcb_dr6;
- dbregs->dr[7] = pcb->pcb_dr7;
- }
- return (0);
-}
-
-int
-set_dbregs(struct thread *td, struct dbreg *dbregs)
-{
- struct pcb *pcb;
- int i;
- u_int32_t mask1, mask2;
-
- if (td == NULL) {
- load_dr0(dbregs->dr[0]);
- load_dr1(dbregs->dr[1]);
- load_dr2(dbregs->dr[2]);
- load_dr3(dbregs->dr[3]);
- load_dr4(dbregs->dr[4]);
- load_dr5(dbregs->dr[5]);
- load_dr6(dbregs->dr[6]);
- load_dr7(dbregs->dr[7]);
- } else {
- /*
- * Don't let an illegal value for dr7 get set. Specifically,
- * check for undefined settings. Setting these bit patterns
- * result in undefined behaviour and can lead to an unexpected
- * TRCTRAP.
- */
- for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8;
- i++, mask1 <<= 2, mask2 <<= 2)
- if ((dbregs->dr[7] & mask1) == mask2)
- return (EINVAL);
-
- pcb = td->td_pcb;
-
- /*
- * Don't let a process set a breakpoint that is not within the
- * process's address space. If a process could do this, it
- * could halt the system by setting a breakpoint in the kernel
- * (if ddb was enabled). Thus, we need to check to make sure
- * that no breakpoints are being enabled for addresses outside
- * process's address space, unless, perhaps, we were called by
- * uid 0.
- *
- * XXX - what about when the watched area of the user's
- * address space is written into from within the kernel
- * ... wouldn't that still cause a breakpoint to be generated
- * from within kernel mode?
- */
-
- if (suser(td) != 0) {
- if (dbregs->dr[7] & 0x3) {
- /* dr0 is enabled */
- if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
- return (EINVAL);
- }
-
- if (dbregs->dr[7] & (0x3<<2)) {
- /* dr1 is enabled */
- if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
- return (EINVAL);
- }
-
- if (dbregs->dr[7] & (0x3<<4)) {
- /* dr2 is enabled */
- if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
- return (EINVAL);
- }
-
- if (dbregs->dr[7] & (0x3<<6)) {
- /* dr3 is enabled */
- if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
- return (EINVAL);
- }
- }
-
- pcb->pcb_dr0 = dbregs->dr[0];
- pcb->pcb_dr1 = dbregs->dr[1];
- pcb->pcb_dr2 = dbregs->dr[2];
- pcb->pcb_dr3 = dbregs->dr[3];
- pcb->pcb_dr6 = dbregs->dr[6];
- pcb->pcb_dr7 = dbregs->dr[7];
-
- pcb->pcb_flags |= PCB_DBREGS;
- }
-
- return (0);
-}
-
-/*
- * Return > 0 if a hardware breakpoint has been hit, and the
- * breakpoint was in user space. Return 0, otherwise.
- */
-int
-user_dbreg_trap(void)
-{
- u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
- u_int32_t bp; /* breakpoint bits extracted from dr6 */
- int nbp; /* number of breakpoints that triggered */
- caddr_t addr[4]; /* breakpoint addresses */
- int i;
-
- dr7 = rdr7();
- if ((dr7 & 0x000000ff) == 0) {
- /*
- * all GE and LE bits in the dr7 register are zero,
- * thus the trap couldn't have been caused by the
- * hardware debug registers
- */
- return 0;
- }
-
- nbp = 0;
- dr6 = rdr6();
- bp = dr6 & 0x0000000f;
-
- if (!bp) {
- /*
- * None of the breakpoint bits are set meaning this
- * trap was not caused by any of the debug registers
- */
- return 0;
- }
-
- /*
- * at least one of the breakpoints were hit, check to see
- * which ones and if any of them are user space addresses
- */
-
- if (bp & 0x01) {
- addr[nbp++] = (caddr_t)rdr0();
- }
- if (bp & 0x02) {
- addr[nbp++] = (caddr_t)rdr1();
- }
- if (bp & 0x04) {
- addr[nbp++] = (caddr_t)rdr2();
- }
- if (bp & 0x08) {
- addr[nbp++] = (caddr_t)rdr3();
- }
-
- for (i=0; i<nbp; i++) {
- if (addr[i] <
- (caddr_t)VM_MAXUSER_ADDRESS) {
- /*
- * addr[i] is in user space
- */
- return nbp;
- }
- }
-
- /*
- * None of the breakpoints are in user space.
- */
- return 0;
-}
-
-#ifndef DEV_APIC
-#include <machine/apicvar.h>
-
-/*
- * Provide stub functions so that the MADT APIC enumerator in the acpi
- * kernel module will link against a kernel without 'device apic'.
- *
- * XXX - This is a gross hack.
- */
-void
-apic_register_enumerator(struct apic_enumerator *enumerator)
-{
-}
-
-void *
-ioapic_create(uintptr_t addr, int32_t id, int intbase)
-{
- return (NULL);
-}
-
-int
-ioapic_disable_pin(void *cookie, u_int pin)
-{
- return (ENXIO);
-}
-
-int
-ioapic_get_vector(void *cookie, u_int pin)
-{
- return (-1);
-}
-
-void
-ioapic_register(void *cookie)
-{
-}
-
-int
-ioapic_remap_vector(void *cookie, u_int pin, int vector)
-{
- return (ENXIO);
-}
-
-int
-ioapic_set_extint(void *cookie, u_int pin)
-{
- return (ENXIO);
-}
-
-int
-ioapic_set_nmi(void *cookie, u_int pin)
-{
- return (ENXIO);
-}
-
-int
-ioapic_set_polarity(void *cookie, u_int pin,enum intr_polarity pol )
-{
- return (ENXIO);
-}
-
-int
-ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger )
-{
- return (ENXIO);
-}
-
-void
-lapic_create(u_int apic_id, int boot_cpu)
-{
-}
-
-void
-lapic_init(uintptr_t addr)
-{
-}
-
-int
-lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode)
-{
- return (ENXIO);
-}
-
-int
-lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol)
-{
- return (ENXIO);
-}
-
-int
-lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger)
-{
- return (ENXIO);
-}
-#endif
-
-#ifdef KDB
-
-/*
- * Provide inb() and outb() as functions. They are normally only
- * available as macros calling inlined functions, thus cannot be
- * called from the debugger.
- *
- * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
- */
-
-#undef inb
-#undef outb
-
-/* silence compiler warnings */
-u_char inb(u_int);
-void outb(u_int, u_char);
-
-u_char
-inb(u_int port)
-{
- u_char data;
- /*
- * We use %%dx and not %1 here because i/o is done at %dx and not at
- * %edx, while gcc generates inferior code (movw instead of movl)
- * if we tell it to load (u_short) port.
- */
- __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
- return (data);
-}
-
-void
-outb(u_int port, u_char data)
-{
- u_char al;
- /*
- * Use an unnecessary assignment to help gcc's register allocator.
- * This make a large difference for gcc-1.40 and a tiny difference
- * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
- * best results. gcc-2.6.0 can't handle this.
- */
- al = data;
- __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
-}
-
-#endif /* KDB */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_clock.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_clock.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,150 +0,0 @@
-/*-
- * ----------------------------------------------------------------------------
- * "THE BEER-WARE LICENSE" (Revision 42):
- * <phk@xxxxxxxxxxx> wrote this file. As long as you retain this notice you
- * can do whatever you want with this stuff. If we meet some day, and you think
- * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
- * ----------------------------------------------------------------------------
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/mp_clock.c,v 1.19 2004/05/30 20:34:57
phk Exp $");
-
-/*-
- * Just when we thought life were beautiful, reality pops its grim face over
- * the edge again:
- *
- * ] 20. ACPI Timer Errata
- * ]
- * ] Problem: The power management timer may return improper result when
- * ] read. Although the timer value settles properly after incrementing,
- * ] while incrementing there is a 3nS window every 69.8nS where the
- * ] timer value is indeterminate (a 4.2% chance that the data will be
- * ] incorrect when read). As a result, the ACPI free running count up
- * ] timer specification is violated due to erroneous reads. Implication:
- * ] System hangs due to the "inaccuracy" of the timer when used by
- * ] software for time critical events and delays.
- * ]
- * ] Workaround: Read the register twice and compare.
- * ] Status: This will not be fixed in the PIIX4 or PIIX4E.
- *
- * The counter is in other words not latched to the PCI bus clock when
- * read. Notice the workaround isn't: We need to read until we have
- * three monotonic samples and then use the middle one, otherwise we are
- * not protected against the fact that the bits can be wrong in two
- * directions. If we only cared about monosity two reads would be enough.
- */
-
-/* #include "opt_bus.h" */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/timetc.h>
-#include <sys/kernel.h>
-#include <sys/module.h>
-#include <sys/sysctl.h>
-#include <sys/bus.h>
-
-#include <dev/pci/pcireg.h>
-#include <dev/pci/pcivar.h>
-
-static unsigned piix_get_timecount(struct timecounter *tc);
-
-static u_int32_t piix_timecounter_address;
-static u_int piix_freq = 14318182/4;
-
-static struct timecounter piix_timecounter = {
- piix_get_timecount, /* get_timecount */
- 0, /* no poll_pps */
- 0xffffff, /* counter_mask */
- 0, /* frequency */
- "PIIX" /* name */
-};
-
-
-static int
-sysctl_machdep_piix_freq(SYSCTL_HANDLER_ARGS)
-{
- int error;
- u_int freq;
-
- if (piix_timecounter.tc_frequency == 0)
- return (EOPNOTSUPP);
- freq = piix_freq;
- error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
- if (error == 0 && req->newptr != NULL) {
- piix_freq = freq;
- piix_timecounter.tc_frequency = piix_freq;
- }
- return (error);
-}
-
-SYSCTL_PROC(_machdep, OID_AUTO, piix_freq, CTLTYPE_INT | CTLFLAG_RW,
- 0, sizeof(u_int), sysctl_machdep_piix_freq, "I", "");
-
-static unsigned
-piix_get_timecount(struct timecounter *tc)
-{
- unsigned u1, u2, u3;
-
- u2 = inl(piix_timecounter_address);
- u3 = inl(piix_timecounter_address);
- do {
- u1 = u2;
- u2 = u3;
- u3 = inl(piix_timecounter_address);
- } while (u1 > u2 || u2 > u3);
- return (u2);
-}
-
-static int
-piix_probe(device_t dev)
-{
- u_int32_t d;
-
- if (devclass_get_device(devclass_find("acpi"), 0) != NULL)
- return (ENXIO);
- switch (pci_get_devid(dev)) {
- case 0x71138086:
- device_set_desc(dev, "PIIX Timecounter");
- break;
- default:
- return (ENXIO);
- }
-
- d = pci_read_config(dev, PCIR_COMMAND, 2);
- if (!(d & PCIM_CMD_PORTEN)) {
- device_printf(dev, "PIIX I/O space not mapped\n");
- return (ENXIO);
- }
- return (0);
-}
-
-static int
-piix_attach(device_t dev)
-{
- u_int32_t d;
-
- d = pci_read_config(dev, 0x40, 4);
- piix_timecounter_address = (d & 0xffc0) + 8;
- piix_timecounter.tc_frequency = piix_freq;
- tc_init(&piix_timecounter);
- return (0);
-}
-
-static device_method_t piix_methods[] = {
- /* Device interface */
- DEVMETHOD(device_probe, piix_probe),
- DEVMETHOD(device_attach, piix_attach),
- { 0, 0 }
-};
-
-static driver_t piix_driver = {
- "piix",
- piix_methods,
- 1,
-};
-
-static devclass_t piix_devclass;
-
-DRIVER_MODULE(piix, pci, piix_driver, piix_devclass, 0, 0);
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,1487 +0,0 @@
-/*-
- * Copyright (c) 1996, by Steve Passe
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.235.2.3 2004/09/24
15:02:33 rik Exp $");
-
-#include "opt_apic.h"
-#include "opt_cpu.h"
-#include "opt_kstack_pages.h"
-#include "opt_mp_watchdog.h"
-
-#if !defined(lint)
-#if !defined(SMP)
-#error How did you get here?
-#endif
-
-#if defined(I386_CPU) && !defined(COMPILING_LINT)
-#error SMP not supported with I386_CPU
-#endif
-#if 0
-#ifndef DEV_APIC
-#error The apic device is required for SMP, add "device apic" to your config
file.
-#endif
-#endif
-#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
-#error SMP not supported with CPU_DISABLE_CMPXCHG
-#endif
-#endif /* not lint */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/cons.h> /* cngetc() */
-#ifdef GPROF
-#include <sys/gmon.h>
-#endif
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/memrange.h>
-#include <sys/mutex.h>
-#include <sys/pcpu.h>
-#include <sys/proc.h>
-#include <sys/smp.h>
-#include <sys/sysctl.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_extern.h>
-
-#include <machine/apicreg.h>
-#include <machine/clock.h>
-#include <machine/md_var.h>
-#include <machine/mp_watchdog.h>
-#include <machine/pcb.h>
-#include <machine/smp.h>
-#include <machine/smptests.h> /** COUNT_XINVLTLB_HITS */
-#include <machine/specialreg.h>
-#include <machine/privatespace.h>
-
-
-/* XEN includes */
-#include <machine/xenfunc.h>
-#include <machine/xen_intr.h>
-
-void Xhypervisor_callback(void);
-void failsafe_callback(void);
-
-/***************/
-
-
-#define WARMBOOT_TARGET 0
-#define WARMBOOT_OFF (KERNBASE + 0x0467)
-#define WARMBOOT_SEG (KERNBASE + 0x0469)
-
-#define CMOS_REG (0x70)
-#define CMOS_DATA (0x71)
-#define BIOS_RESET (0x0f)
-#define BIOS_WARM (0x0a)
-
-
-#undef POSTCODE
-#define POSTCODE(x)
-
-/*
- * this code MUST be enabled here and in mpboot.s.
- * it follows the very early stages of AP boot by placing values in CMOS ram.
- * it NORMALLY will never be needed and thus the primitive method for enabling.
- *
-#define CHECK_POINTS
- */
-
-#if defined(CHECK_POINTS) && !defined(PC98)
-#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
-#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
-
-#define CHECK_INIT(D); \
- CHECK_WRITE(0x34, (D)); \
- CHECK_WRITE(0x35, (D)); \
- CHECK_WRITE(0x36, (D)); \
- CHECK_WRITE(0x37, (D)); \
- CHECK_WRITE(0x38, (D)); \
- CHECK_WRITE(0x39, (D));
-
-#define CHECK_PRINT(S); \
- printf("%s: %d, %d, %d, %d, %d, %d\n", \
- (S), \
- CHECK_READ(0x34), \
- CHECK_READ(0x35), \
- CHECK_READ(0x36), \
- CHECK_READ(0x37), \
- CHECK_READ(0x38), \
- CHECK_READ(0x39));
-
-#else /* CHECK_POINTS */
-
-#define CHECK_INIT(D)
-#define CHECK_PRINT(S)
-#define CHECK_WRITE(A, D)
-
-#endif /* CHECK_POINTS */
-
-/*
- * Values to send to the POST hardware.
- */
-#define MP_BOOTADDRESS_POST 0x10
-#define MP_PROBE_POST 0x11
-#define MPTABLE_PASS1_POST 0x12
-
-#define MP_START_POST 0x13
-#define MP_ENABLE_POST 0x14
-#define MPTABLE_PASS2_POST 0x15
-
-#define START_ALL_APS_POST 0x16
-#define INSTALL_AP_TRAMP_POST 0x17
-#define START_AP_POST 0x18
-
-#define MP_ANNOUNCE_POST 0x19
-
-/* lock region used by kernel profiling */
-int mcount_lock;
-
-/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
-int current_postcode;
-
-int mp_naps; /* # of Applications processors */
-int boot_cpu_id = -1; /* designated BSP */
-extern int nkpt;
-
-/*
- * CPU topology map datastructures for HTT.
- */
-static struct cpu_group mp_groups[MAXCPU];
-static struct cpu_top mp_top;
-
-/* AP uses this during bootstrap. Do not staticize. */
-char *bootSTK;
-static int bootAP;
-
-/* Hotwire a 0->4MB V==P mapping */
-extern pt_entry_t *KPTphys;
-
-/* SMP page table page */
-extern pt_entry_t *SMPpt;
-
-extern trap_info_t trap_table[];
-
-struct pcb stoppcbs[MAXCPU];
-
-/* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr1;
-vm_offset_t smp_tlb_addr2;
-volatile int smp_tlb_wait;
-
-/*
- * Local data and functions.
- */
-
-static u_int logical_cpus;
-
-/* used to hold the AP's until we are ready to release them */
-static struct mtx ap_boot_mtx;
-
-/* Set to 1 once we're ready to let the APs out of the pen. */
-static volatile int aps_ready = 0;
-
-/*
- * Store data from cpu_add() until later in the boot when we actually setup
- * the APs.
- */
-struct cpu_info {
- int cpu_present:1;
- int cpu_bsp:1;
-} static cpu_info[MAXCPU];
-static int cpu_apic_ids[MAXCPU];
-
-static u_int boot_address;
-
-static void set_logical_apic_ids(void);
-static int start_all_aps(void);
-#if 0
-static void install_ap_tramp(void);
-#endif
-static int start_ap(int apic_id);
-static void release_aps(void *dummy);
-
-static int hlt_logical_cpus;
-static struct sysctl_ctx_list logical_cpu_clist;
-
-static void
-mem_range_AP_init(void)
-{
- if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
- mem_range_softc.mr_op->initAP(&mem_range_softc);
-}
-
-void
-mp_topology(void)
-{
- struct cpu_group *group;
- int logical_cpus;
- int apic_id;
- int groups;
- int cpu;
-
- /* Build the smp_topology map. */
- /* Nothing to do if there is no HTT support. */
- if ((cpu_feature & CPUID_HTT) == 0)
- return;
- logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
- if (logical_cpus <= 1)
- return;
- group = &mp_groups[0];
- groups = 1;
- for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
- if (!cpu_info[apic_id].cpu_present)
- continue;
- /*
- * If the current group has members and we're not a logical
- * cpu, create a new group.
- */
- if (group->cg_count != 0 && (apic_id % logical_cpus) == 0) {
- group++;
- groups++;
- }
- group->cg_count++;
- group->cg_mask |= 1 << cpu;
- cpu++;
- }
-
- mp_top.ct_count = groups;
- mp_top.ct_group = mp_groups;
- smp_topology = &mp_top;
-}
-
-
-/*
- * Calculate usable address in base memory for AP trampoline code.
- */
-u_int
-mp_bootaddress(u_int basemem)
-{
- POSTCODE(MP_BOOTADDRESS_POST);
-
- boot_address = trunc_page(basemem); /* round down to 4k boundary */
- if ((basemem - boot_address) < bootMP_size)
- boot_address -= PAGE_SIZE; /* not enough, lower by 4k */
-
- return boot_address;
-}
-
-void
-cpu_add(u_int apic_id, char boot_cpu)
-{
-
- if (apic_id >= MAXCPU) {
- printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n",
- apic_id, MAXCPU - 1);
- return;
- }
- KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
- apic_id));
- cpu_info[apic_id].cpu_present = 1;
- if (boot_cpu) {
- KASSERT(boot_cpu_id == -1,
- ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
- boot_cpu_id));
- boot_cpu_id = apic_id;
- cpu_info[apic_id].cpu_bsp = 1;
- }
- mp_ncpus++;
- if (bootverbose)
- printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
- "AP");
-
-}
-
-void
-cpu_mp_setmaxid(void)
-{
-
- mp_maxid = MAXCPU - 1;
-}
-
-int
-cpu_mp_probe(void)
-{
-
- mp_ncpus = HYPERVISOR_shared_info->n_vcpu;
- /*
- * Always record BSP in CPU map so that the mbuf init code works
- * correctly.
- */
- all_cpus = 1;
- if (mp_ncpus == 0) {
- /*
- * No CPUs were found, so this must be a UP system. Setup
- * the variables to represent a system with a single CPU
- * with an id of 0.
- */
- mp_ncpus = 1;
- return (0);
- }
-
- /* At least one CPU was found. */
- if (mp_ncpus == 1) {
- /*
- * One CPU was found, so this must be a UP system with
- * an I/O APIC.
- */
- return (0);
- }
-
- /* At least two CPUs were found. */
- return (1);
-}
-
-static void
-cpu_mp_ipi_init(void)
-{
- int irq;
- int cpu = smp_processor_id();
- /*
- * these are not needed by XenFreeBSD - from Keir:
- * For TLB-flush related IPIs, Xen has hypercalls
- * you should use instead. You can pass a pointer
- * to a vcpu bitmap to update_va_mapping(), and to
- * MMUEXT_flush_tlb_multi and MMEXT_invlpg_multi.
- * Xen will then make sure that those vcpus get
- * flushed appropriately before returning to the
- * caller.
- * There is also no indication that we need to forward
- * clock interrupts.
- */
-#if 0
- /* Install an inter-CPU IPI for TLB invalidation */
- setidt(IPI_INVLTLB, IDTVEC(invltlb),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
- setidt(IPI_INVLPG, IDTVEC(invlpg),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
- setidt(IPI_INVLRNG, IDTVEC(invlrng),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-
- /* Install an inter-CPU IPI for forwarding hardclock() */
- setidt(IPI_HARDCLOCK, IDTVEC(hardclock),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-
- /* Install an inter-CPU IPI for forwarding statclock() */
- setidt(IPI_STATCLOCK, IDTVEC(statclock),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-#endif
-
- /*
- * These can all be consolidated. For now leaving
- * as individual IPIs.
- *
- */
-#if 0
- /* Install an inter-CPU IPI for lazy pmap release */
- setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-#else
- irq = bind_ipi_on_cpu_to_irq(cpu, IPI_LAZYPMAP);
- PCPU_SET(lazypmap, irq);
- PANIC_IF(intr_add_handler("pmap_lazyfix", irq,
- (driver_intr_t *)pmap_lazyfix_action,
- NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
-#endif
-
-#if 0
- /* Install an inter-CPU IPI for all-CPU rendezvous */
- setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-#else
- irq = bind_ipi_on_cpu_to_irq(cpu, IPI_RENDEZVOUS);
- PCPU_SET(rendezvous, irq);
- PANIC_IF(intr_add_handler("smp_rendezvous", irq,
- (driver_intr_t *)smp_rendezvous_action,
- NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
-#endif
-
-#if 0
- /* Install an inter-CPU IPI for forcing an additional software trap */
- setidt(IPI_AST, IDTVEC(cpuast),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-#else
- irq = bind_ipi_on_cpu_to_irq(cpu, IPI_AST);
- PCPU_SET(cpuast, irq);
-#endif
- /* XXX ignore for now */
-#if 0
- /* Install an inter-CPU IPI for CPU stop/restart */
- setidt(IPI_STOP, IDTVEC(cpustop),
- SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-#endif
-
-}
-
-SYSINIT(ipi_setup, SI_SUB_INTR, SI_ORDER_ANY, cpu_mp_ipi_init, NULL);
-
-/*
- * Initialize the IPI handlers and start up the AP's.
- */
-void
-cpu_mp_start(void) /* --- Start here --- */
-{
- int i;
-
- POSTCODE(MP_START_POST);
-
- /* Initialize the logical ID to APIC ID table. */
- for (i = 0; i < MAXCPU; i++)
- cpu_apic_ids[i] = -1;
-
-
- /* Set boot_cpu_id if needed. */
- if (boot_cpu_id == -1) {
- boot_cpu_id = PCPU_GET(apic_id);
- cpu_info[boot_cpu_id].cpu_bsp = 1;
- } else
- KASSERT(boot_cpu_id == PCPU_GET(apic_id),
- ("BSP's APIC ID doesn't match boot_cpu_id"));
- cpu_apic_ids[0] = boot_cpu_id;
-
- /* Start each Application Processor */
- start_all_aps();
-
- /* Setup the initial logical CPUs info. */
- logical_cpus = logical_cpus_mask = 0;
- if (cpu_feature & CPUID_HTT)
- logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
-
- set_logical_apic_ids();
-}
-
-
-/*
- * Print various information about the SMP system hardware and setup.
- */
-void
-cpu_mp_announce(void)
-{
- int i, x;
-
- POSTCODE(MP_ANNOUNCE_POST);
-
- /* List CPUs */
- printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
- for (i = 1, x = 0; x < MAXCPU; x++) {
- if (cpu_info[x].cpu_present && !cpu_info[x].cpu_bsp) {
- KASSERT(i < mp_ncpus,
- ("mp_ncpus and actual cpus are out of whack"));
- printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
- }
- }
-}
-
-/*
- * AP CPU's call this to initialize themselves.
- */
-void
-init_secondary(void)
-{
- int myid;
- unsigned long gdtmachpfn;
- printk("MADE IT!!");
-
-#if 0
- u_int cr0;
-#endif
- /* Steps to booting SMP on xen as gleaned from XenLinux:
- * - cpu_init() - processor specific initialization
- * - smp_callin()
- * - wait 2s for BP to finish its startup sequence
- * - map_cpu_to_logical_apicid()
- * - save cpuid info
- * - set bit in callin map to let master (BP?) continue
- * - local setup timer() - per cpu timer initialization
- * - ldebug_setup() - bind debug IRQ to local CPU.
- * - smp_intr_init() - IPI setup that we do in cpu_mp_start
- * - local_irq_enable() - enable interrupts locally
- * - cpu_set(id, map) - announce that we're up
- * - cpu_idle() - make us schedulable
- */
-
-
- /* bootAP is set in start_ap() to our ID. */
- myid = bootAP;
-
- gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
- PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0);
-
-
- lgdt_finish();
-
- PCPU_SET(cpuid, myid);
-
-
- set_user_ldt((struct mdproc *)_default_ldt);
- PCPU_SET(currentldt, _default_ldt);
-
- PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
- PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
- PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
-#if 0
- ltr(gsel_tss);
-
- /*
- * Set to a known state:
- * Set by mpboot.s: CR0_PG, CR0_PE
- * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
- */
- cr0 = rcr0();
- cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
- load_cr0(cr0);
-#endif
- CHECK_WRITE(0x38, 5);
-
- /* Disable local APIC just to be sure. */
- lapic_disable();
-
- /* signal our startup to the BSP. */
- mp_naps++;
- CHECK_WRITE(0x39, 6);
-
- /* Spin until the BSP releases the AP's. */
- while (!aps_ready)
- ia32_pause();
-
- /* BSP may have changed PTD while we were waiting */
- invltlb();
- pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
-
-#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- lidt(&r_idt);
-#endif
-
- /* set up CPU registers and state */
- cpu_setregs();
-
- /* set up FPU state on the AP */
- npxinit(__INITIAL_NPXCW__);
-
- /* set up SSE registers */
- enable_sse();
-
- /* A quick check from sanity claus */
- if (PCPU_GET(apic_id) != lapic_id()) {
- printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
- printf("SMP: actual apic_id = %d\n", lapic_id());
- printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
- printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
- panic("cpuid mismatch! boom!!");
- }
-
- mtx_lock_spin(&ap_boot_mtx);
-
- /* Init local apic for irq's */
- lapic_setup();
-
- /* Set memory range attributes for this CPU to match the BSP */
- mem_range_AP_init();
-
- smp_cpus++;
-
- CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
- printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
-
- /* Determine if we are a logical CPU. */
- if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
- logical_cpus_mask |= PCPU_GET(cpumask);
-
- /* Build our map of 'other' CPUs. */
- PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
-
- if (bootverbose)
- lapic_dump("AP");
-
- if (smp_cpus == mp_ncpus) {
- /* enable IPI's, tlb shootdown, freezes etc */
- atomic_store_rel_int(&smp_started, 1);
- smp_active = 1; /* historic */
- }
-
- mtx_unlock_spin(&ap_boot_mtx);
-
- /* wait until all the AP's are up */
- while (smp_started == 0)
- ia32_pause();
-
- /* need to wait until now to setup the IPIs as SI_SUB_CPU is
- * much earlier than SI_SUB_INTR
- */
- ap_evtchn_init(myid);
- ap_cpu_initclocks();
- cpu_mp_ipi_init();
-
- /* ok, now grab sched_lock and enter the scheduler */
- mtx_lock_spin(&sched_lock);
-
- binuptime(PCPU_PTR(switchtime));
- PCPU_SET(switchticks, ticks);
-
- cpu_throw(NULL, choosethread()); /* doesn't return */
-
- panic("scheduler returned us to %s", __func__);
- /* NOTREACHED */
-}
-
-/*******************************************************************
- * local functions and data
- */
-
-/*
- * Set the APIC logical IDs.
- *
- * We want to cluster logical CPU's within the same APIC ID cluster.
- * Since logical CPU's are aligned simply filling in the clusters in
- * APIC ID order works fine. Note that this does not try to balance
- * the number of CPU's in each cluster. (XXX?)
- */
-static void
-set_logical_apic_ids(void)
-{
- u_int apic_id, cluster, cluster_id;
-
- /* Force us to allocate cluster 0 at the start. */
- cluster = -1;
- cluster_id = APIC_MAX_INTRACLUSTER_ID;
- for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
- if (!cpu_info[apic_id].cpu_present)
- continue;
- if (cluster_id == APIC_MAX_INTRACLUSTER_ID) {
- cluster = ioapic_next_logical_cluster();
- cluster_id = 0;
- } else
- cluster_id++;
- if (bootverbose)
- printf("APIC ID: physical %u, logical %u:%u\n",
- apic_id, cluster, cluster_id);
- lapic_set_logical_id(apic_id, cluster, cluster_id);
- }
-}
-
-/*
- * start each AP in our list
- */
-static int
-start_all_aps(void)
-{
- struct pcpu *pc;
- char *stack;
- int i, apic_id, cpu;
-
- /*
- * This function corresponds most closely to
- * smp_boot_cpus in XenLinux - the sequence there
- * is:
- * - check if SMP config is found - if not:
- * - clear the I/O APIC IRQs
- * - map cpu to logical apicid
- * - exit
- * - smp_intr_init - IPI initialization
- * - map cpu to logical apicid
- * - boot each of the vcpus
- * - clear and then construct the cpu sibling [logical CPUs] map.
- *
- */
-
- POSTCODE(START_ALL_APS_POST);
-
- mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
-#if 0
- /* install the AP 1st level boot code */
- install_ap_tramp();
-
- /* save the current value of the warm-start vector */
- mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
-
-
- /* set up temporary P==V mapping for AP boot */
- /* XXX this is a hack, we should boot the AP on its own stack/PTD */
- kptbase = (uintptr_t)(void *)KPTphys;
- for (i = 0; i < NKPT; i++)
- PTD[i] = (pd_entry_t)(PG_V | PG_RW |
- ((kptbase + i * PAGE_SIZE) & PG_FRAME));
- invltlb();
-#endif
- /* start each AP */
- for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
- if (!cpu_info[apic_id].cpu_present ||
- cpu_info[apic_id].cpu_bsp)
- continue;
- cpu++;
-
- /* save APIC ID for this logical ID */
- cpu_apic_ids[cpu] = apic_id;
-#if 0
- /* first page of AP's private space */
- pg = cpu * i386_btop(sizeof(struct privatespace));
-
- /* allocate a new private data page */
- pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
-
- /* wire it into the private page table page */
- SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
-
- /* allocate and set up an idle stack data page */
- stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES *
PAGE_SIZE); /* XXXKSE */
- for (i = 0; i < KSTACK_PAGES; i++)
- SMPpt[pg + 1 + i] = (pt_entry_t)
- (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
-#endif
- pc = &SMP_prvspace[cpu].pcpu;
-
- /* prime data page for it to use */
- pcpu_init(pc, cpu, sizeof(struct pcpu));
- pc->pc_apic_id = apic_id;
-
-#if 0
- /* setup a vector to our boot code */
- *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
- *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
-#ifndef PC98
- outb(CMOS_REG, BIOS_RESET);
- outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
-#endif
-#endif
- bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
- PAGE_SIZE];
- bootAP = cpu;
-
- /* attempt to start the Application Processor */
- CHECK_INIT(99); /* setup checkpoints */
- if (!start_ap(apic_id)) {
- printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
- CHECK_PRINT("trace"); /* show checkpoints */
- /* better panic as the AP may be running loose */
- printf("panic y/n? [y] ");
- if (cngetc() != 'n')
- panic("bye-bye");
- }
- CHECK_PRINT("trace"); /* show checkpoints */
-
- all_cpus |= (1 << cpu); /* record AP in CPU map */
- }
-
- /* build our map of 'other' CPUs */
- PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
-
-#if 0
- /* restore the warmstart vector */
- *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
-#endif
- /*
- * Set up the idle context for the BSP. Similar to above except
- * that some was done by locore, some by pmap.c and some is implicit
- * because the BSP is cpu#0 and the page is initially zero and also
- * because we can refer to variables by name on the BSP..
- */
-
- /* Allocate and setup BSP idle stack */
- stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
- for (i = 0; i < KSTACK_PAGES; i++)
- SMPpt[1 + i] = (pt_entry_t)
- (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
-
- for (i = 0; i < NKPT; i++)
- PTD[i] = 0;
- pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
-
- /* number of APs actually started */
- return mp_naps;
-}
-
-/*
- * load the 1st level AP boot code into base memory.
- */
-
-/* targets for relocation */
-extern void bigJump(void);
-extern void bootCodeSeg(void);
-extern void bootDataSeg(void);
-extern void MPentry(void);
-extern u_int MP_GDT;
-extern u_int mp_gdtbase;
-#if 0
-static void
-install_ap_tramp(void)
-{
- int x;
- int size = *(int *) ((u_long) & bootMP_size);
- vm_offset_t va = boot_address + KERNBASE;
- u_char *src = (u_char *) ((u_long) bootMP);
- u_char *dst = (u_char *) va;
- u_int boot_base = (u_int) bootMP;
- u_int8_t *dst8;
- u_int16_t *dst16;
- u_int32_t *dst32;
-
- POSTCODE(INSTALL_AP_TRAMP_POST);
-
- KASSERT (size <= PAGE_SIZE,
- ("'size' do not fit into PAGE_SIZE, as expected."));
- pmap_kenter(va, boot_address);
- pmap_invalidate_page (kernel_pmap, va);
- for (x = 0; x < size; ++x)
- *dst++ = *src++;
-
- /*
- * modify addresses in code we just moved to basemem. unfortunately we
- * need fairly detailed info about mpboot.s for this to work. changes
- * to mpboot.s might require changes here.
- */
-
- /* boot code is located in KERNEL space */
- dst = (u_char *) va;
-
- /* modify the lgdt arg */
- dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
- *dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
-
- /* modify the ljmp target for MPentry() */
- dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
- *dst32 = ((u_int) MPentry - KERNBASE);
-
- /* modify the target for boot code segment */
- dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
- dst8 = (u_int8_t *) (dst16 + 1);
- *dst16 = (u_int) boot_address & 0xffff;
- *dst8 = ((u_int) boot_address >> 16) & 0xff;
-
- /* modify the target for boot data segment */
- dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
- dst8 = (u_int8_t *) (dst16 + 1);
- *dst16 = (u_int) boot_address & 0xffff;
- *dst8 = ((u_int) boot_address >> 16) & 0xff;
-}
-#endif
-
-static void
-cpu_mp_trap_init(trap_info_t *trap_ctxt)
-{
- trap_info_t *t = trap_table;
-
- for (t = trap_table; t->address; t++) {
- trap_ctxt[t->vector].flags = t->flags;
- trap_ctxt[t->vector].cs = t->cs;
- trap_ctxt[t->vector].address = t->address;
- }
-}
-
-/*
- * This function starts the AP (application processor) identified
- * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
- * to accomplish this. This is necessary because of the nuances
- * of the different hardware we might encounter. It isn't pretty,
- * but it seems to work.
- */
-static int
-start_ap(int apic_id)
-{
- int vector, ms, i;
- int cpus, boot_error;
- vcpu_guest_context_t ctxt;
-
- /*
- * This is the FreeBSD equivalent to do_boot_cpu(apicid) in
- * smpboot.c.
- * its initialization sequence consists of:
- * - fork_idle(cpu) to create separate idle context
- * - initialization of idle's context to start_secondary
- * - initialization of cpu ctxt to start in startup_32_smp
- * - then we call HYPERVISOR_boot_vcpu with the cpu index and
- * a pointer to the context.
- * - on boot success we:
- * - set ourselves in the callout_map
- * - wait up to 5 seconds for us to be set in the callin map
- * - set x86_cpu_to_apicid[cpu] = apicid;
- *
- */
-
- POSTCODE(START_AP_POST);
-
- /* calculate the vector */
- vector = (boot_address >> 12) & 0xff;
-
- /* used as a watchpoint to signal AP startup */
- cpus = mp_naps;
-
- memset(&ctxt, 0, sizeof(ctxt));
-
- ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.fs = 0;
- ctxt.user_regs.gs = 0;
- ctxt.user_regs.ss = __KERNEL_DS;
- ctxt.user_regs.cs = __KERNEL_CS;
- ctxt.user_regs.eip = (unsigned long)init_secondary;
- ctxt.user_regs.esp = (unsigned long)bootSTK;
-#ifdef notyet
- ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
-#else
- ctxt.user_regs.eflags = (1<<9) | (1<<2);
-#endif
- /* FPU is set up to default initial state. */
- memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
-
- /* Virtual IDT is empty at start-of-day. */
- for ( i = 0; i < 256; i++ )
- {
- ctxt.trap_ctxt[i].vector = i;
- ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
- }
- cpu_mp_trap_init(ctxt.trap_ctxt);
-
- /* No LDT. */
- ctxt.ldt_ents = 0;
-
- /* Ring 1 stack is the initial stack. */
- ctxt.kernel_ss = __KERNEL_DS;
- ctxt.kernel_sp = (unsigned long)bootSTK;
-
- /* Callback handlers. */
- ctxt.event_callback_cs = __KERNEL_CS;
- ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback;
- ctxt.failsafe_callback_cs = __KERNEL_CS;
- ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-
- ctxt.ctrlreg[3] = (vm_paddr_t)IdlePTD;
-
- boot_error = HYPERVISOR_boot_vcpu(bootAP, &ctxt);
-
-
- if (boot_error)
- printk("Houston we have a problem\n");
- else
- printk("boot_vcpu succeeded\n");
-#if 0
- /*
- * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
- * and running the target CPU. OR this INIT IPI might be latched (P5
- * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
- * ignored.
- */
-
- /* do an INIT IPI: assert RESET */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
-
- /* wait for pending status end */
- lapic_ipi_wait(-1);
-
- /* do an INIT IPI: deassert RESET */
- lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
-
- /* wait for pending status end */
- DELAY(10000); /* wait ~10mS */
- lapic_ipi_wait(-1);
-
- /*
- * next we do a STARTUP IPI: the previous INIT IPI might still be
- * latched, (P5 bug) this 1st STARTUP would then terminate
- * immediately, and the previously started INIT IPI would continue. OR
- * the previous INIT IPI has already run. and this STARTUP IPI will
- * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
- * will run.
- */
-
- /* do a STARTUP IPI */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- lapic_ipi_wait(-1);
- DELAY(200); /* wait ~200uS */
-
- /*
- * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
- * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
- * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
- * recognized after hardware RESET or INIT IPI.
- */
-
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- lapic_ipi_wait(-1);
-#endif
- DELAY(200); /* wait ~200uS */
-
- /* Wait up to 5 seconds for it to start. */
- for (ms = 0; ms < 5000; ms++) {
- if (mp_naps > cpus)
- return 1; /* return SUCCESS */
- DELAY(1000);
- }
- return 0; /* return FAILURE */
-}
-
-#ifdef COUNT_XINVLTLB_HITS
-u_int xhits_gbl[MAXCPU];
-u_int xhits_pg[MAXCPU];
-u_int xhits_rng[MAXCPU];
-SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
- sizeof(xhits_gbl), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
- sizeof(xhits_pg), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
- sizeof(xhits_rng), "IU", "");
-
-u_int ipi_global;
-u_int ipi_page;
-u_int ipi_range;
-u_int ipi_range_size;
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
- 0, "");
-
-u_int ipi_masked_global;
-u_int ipi_masked_page;
-u_int ipi_masked_range;
-u_int ipi_masked_range_size;
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
- &ipi_masked_global, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
- &ipi_masked_page, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
- &ipi_masked_range, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
- &ipi_masked_range_size, 0, "");
-#endif /* COUNT_XINVLTLB_HITS */
-
-/*
- * Flush the TLB on all other CPU's
- */
-static void
-smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
-{
- u_int ncpu;
-
- ncpu = mp_ncpus - 1; /* does not shootdown self */
- if (ncpu < 1)
- return; /* no other cpus */
- mtx_assert(&smp_rv_mtx, MA_OWNED);
- smp_tlb_addr1 = addr1;
- smp_tlb_addr2 = addr2;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- ipi_all_but_self(vector);
- while (smp_tlb_wait < ncpu)
- ia32_pause();
-}
-
-/*
- * This is about as magic as it gets. fortune(1) has got similar code
- * for reversing bits in a word. Who thinks up this stuff??
- *
- * Yes, it does appear to be consistently faster than:
- * while (i = ffs(m)) {
- * m >>= i;
- * bits++;
- * }
- * and
- * while (lsb = (m & -m)) { // This is magic too
- * m &= ~lsb; // or: m ^= lsb
- * bits++;
- * }
- * Both of these latter forms do some very strange things on gcc-3.1 with
- * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
- * There is probably an SSE or MMX popcnt instruction.
- *
- * I wonder if this should be in libkern?
- *
- * XXX Stop the presses! Another one:
- * static __inline u_int32_t
- * popcnt1(u_int32_t v)
- * {
- * v -= ((v >> 1) & 0x55555555);
- * v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
- * v = (v + (v >> 4)) & 0x0F0F0F0F;
- * return (v * 0x01010101) >> 24;
- * }
- * The downside is that it has a multiply. With a pentium3 with
- * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
- * an imull, and in that case it is faster. In most other cases
- * it appears slightly slower.
- *
- * Another variant (also from fortune):
- * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
- * #define BX_(x) ((x) - (((x)>>1)&0x77777777) \
- * - (((x)>>2)&0x33333333) \
- * - (((x)>>3)&0x11111111))
- */
-static __inline u_int32_t
-popcnt(u_int32_t m)
-{
-
- m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
- m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
- m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
- m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
- m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
- return m;
-}
-
-static void
-smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1,
vm_offset_t addr2)
-{
- int ncpu, othercpus;
-
- othercpus = mp_ncpus - 1;
- if (mask == (u_int)-1) {
- ncpu = othercpus;
- if (ncpu < 1)
- return;
- } else {
- mask &= ~PCPU_GET(cpumask);
- if (mask == 0)
- return;
- ncpu = popcnt(mask);
- if (ncpu > othercpus) {
- /* XXX this should be a panic offence */
- printf("SMP: tlb shootdown to %d other cpus (only have
%d)\n",
- ncpu, othercpus);
- ncpu = othercpus;
- }
- /* XXX should be a panic, implied by mask == 0 above */
- if (ncpu < 1)
- return;
- }
- mtx_assert(&smp_rv_mtx, MA_OWNED);
- smp_tlb_addr1 = addr1;
- smp_tlb_addr2 = addr2;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- if (mask == (u_int)-1)
- ipi_all_but_self(vector);
- else
- ipi_selected(mask, vector);
- while (smp_tlb_wait < ncpu)
- ia32_pause();
-}
-
-void
-smp_invltlb(void)
-{
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_global++;
-#endif
- }
-}
-
-void
-smp_invlpg(vm_offset_t addr)
-{
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLPG, addr, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_page++;
-#endif
- }
-}
-
-void
-smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
-{
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_range++;
- ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
-#endif
- }
-}
-
-void
-smp_masked_invltlb(u_int mask)
-{
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_global++;
-#endif
- }
-}
-
-void
-smp_masked_invlpg(u_int mask, vm_offset_t addr)
-{
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_page++;
-#endif
- }
-}
-
-void
-smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
-{
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_masked_range++;
- ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
-#endif
- }
-}
-
-
-/*
- * For statclock, we send an IPI to all CPU's to have them call this
- * function.
- */
-void
-forwarded_statclock(struct clockframe frame)
-{
- struct thread *td;
-
- CTR0(KTR_SMP, "forwarded_statclock");
- td = curthread;
- td->td_intr_nesting_level++;
- if (profprocs != 0)
- profclock(&frame);
- if (pscnt == psdiv)
- statclock(&frame);
- td->td_intr_nesting_level--;
-}
-
-void
-forward_statclock(void)
-{
- int map;
-
- CTR0(KTR_SMP, "forward_statclock");
-
- if (!smp_started || cold || panicstr)
- return;
-
- map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
- if (map != 0)
- ipi_selected(map, IPI_STATCLOCK);
-}
-
-/*
- * For each hardclock(), we send an IPI to all other CPU's to have them
- * execute this function. It would be nice to reduce contention on
- * sched_lock if we could simply peek at the CPU to determine the user/kernel
- * state and call hardclock_process() on the CPU receiving the clock interrupt
- * and then just use a simple IPI to handle any ast's if needed.
- */
-void
-forwarded_hardclock(struct clockframe frame)
-{
- struct thread *td;
-
- CTR0(KTR_SMP, "forwarded_hardclock");
- td = curthread;
- td->td_intr_nesting_level++;
- hardclock_process(&frame);
- td->td_intr_nesting_level--;
-}
-
-void
-forward_hardclock(void)
-{
- u_int map;
-
- CTR0(KTR_SMP, "forward_hardclock");
-
- if (!smp_started || cold || panicstr)
- return;
-
- map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
- if (map != 0)
- ipi_selected(map, IPI_HARDCLOCK);
-}
-
-/*
- * send an IPI to a set of cpus.
- */
-void
-ipi_selected(u_int32_t cpus, u_int ipi)
-{
- int cpu;
-
- CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
- while ((cpu = ffs(cpus)) != 0) {
- cpu--;
- KASSERT(cpu_apic_ids[cpu] != -1,
- ("IPI to non-existent CPU %d", cpu));
- lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
- cpus &= ~(1 << cpu);
- }
-}
-
-/*
- * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
- */
-void
-ipi_all(u_int ipi)
-{
-
- CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
-}
-
-/*
- * send an IPI to all CPUs EXCEPT myself
- */
-void
-ipi_all_but_self(u_int ipi)
-{
-
- CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
-}
-
-/*
- * send an IPI to myself
- */
-void
-ipi_self(u_int ipi)
-{
-
- CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
-}
-
-/*
- * This is called once the rest of the system is up and running and we're
- * ready to let the AP's out of the pen.
- */
-static void
-release_aps(void *dummy __unused)
-{
-
- if (mp_ncpus == 1)
- return;
- mtx_lock_spin(&sched_lock);
- atomic_store_rel_int(&aps_ready, 1);
- while (smp_started == 0)
- ia32_pause();
- mtx_unlock_spin(&sched_lock);
-}
-SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
-
-static int
-sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
-{
- u_int mask;
- int error;
-
- mask = hlt_cpus_mask;
- error = sysctl_handle_int(oidp, &mask, 0, req);
- if (error || !req->newptr)
- return (error);
-
- if (logical_cpus_mask != 0 &&
- (mask & logical_cpus_mask) == logical_cpus_mask)
- hlt_logical_cpus = 1;
- else
- hlt_logical_cpus = 0;
-
- if ((mask & all_cpus) == all_cpus)
- mask &= ~(1<<0);
- hlt_cpus_mask = mask;
- return (error);
-}
-SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
- 0, 0, sysctl_hlt_cpus, "IU",
- "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2.");
-
-static int
-sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
-{
- int disable, error;
-
- disable = hlt_logical_cpus;
- error = sysctl_handle_int(oidp, &disable, 0, req);
- if (error || !req->newptr)
- return (error);
-
- if (disable)
- hlt_cpus_mask |= logical_cpus_mask;
- else
- hlt_cpus_mask &= ~logical_cpus_mask;
-
- if ((hlt_cpus_mask & all_cpus) == all_cpus)
- hlt_cpus_mask &= ~(1<<0);
-
- hlt_logical_cpus = disable;
- return (error);
-}
-
-static void
-cpu_hlt_setup(void *dummy __unused)
-{
-
- if (logical_cpus_mask != 0) {
- TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
- &hlt_logical_cpus);
- sysctl_ctx_init(&logical_cpu_clist);
- SYSCTL_ADD_PROC(&logical_cpu_clist,
- SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
- "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
- sysctl_hlt_logical_cpus, "IU", "");
- SYSCTL_ADD_UINT(&logical_cpu_clist,
- SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
- "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
- &logical_cpus_mask, 0, "");
-
- if (hlt_logical_cpus)
- hlt_cpus_mask |= logical_cpus_mask;
- }
-}
-SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
-
-int
-mp_grab_cpu_hlt(void)
-{
- u_int mask = PCPU_GET(cpumask);
-#ifdef MP_WATCHDOG
- u_int cpuid = PCPU_GET(cpuid);
-#endif
- int retval;
-
-#ifdef MP_WATCHDOG
- ap_watchdog(cpuid);
-#endif
-
- retval = mask & hlt_cpus_mask;
- while (mask & hlt_cpus_mask)
- __asm __volatile("sti; hlt" : : : "memory");
- return (retval);
-}
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/mptable.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mptable.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,974 +0,0 @@
-/*-
- * Copyright (c) 2003 John Baldwin <jhb@xxxxxxxxxxx>
- * Copyright (c) 1996, by Steve Passe
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/mptable.c,v 1.235.2.1 2004/09/28
16:24:09 jhb Exp $");
-
-#include "opt_mptable_force_htt.h"
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-
-#include <machine/apicreg.h>
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <machine/apicvar.h>
-#include <machine/md_var.h>
-#include <machine/mptable.h>
-#include <machine/specialreg.h>
-
-#include <dev/pci/pcivar.h>
-
-/* string defined by the Intel MP Spec as identifying the MP table */
-#define MP_SIG 0x5f504d5f /* _MP_ */
-
-#define NAPICID 32 /* Max number of APIC's */
-
-#ifdef PC98
-#define BIOS_BASE (0xe8000)
-#define BIOS_SIZE (0x18000)
-#else
-#define BIOS_BASE (0xf0000)
-#define BIOS_SIZE (0x10000)
-#endif
-#define BIOS_COUNT (BIOS_SIZE/4)
-
-typedef void mptable_entry_handler(u_char *entry, void *arg);
-
-static basetable_entry basetable_entry_types[] =
-{
- {0, 20, "Processor"},
- {1, 8, "Bus"},
- {2, 8, "I/O APIC"},
- {3, 8, "I/O INT"},
- {4, 8, "Local INT"}
-};
-
-typedef struct BUSDATA {
- u_char bus_id;
- enum busTypes bus_type;
-} bus_datum;
-
-typedef struct INTDATA {
- u_char int_type;
- u_short int_flags;
- u_char src_bus_id;
- u_char src_bus_irq;
- u_char dst_apic_id;
- u_char dst_apic_int;
- u_char int_vector;
-} io_int, local_int;
-
-typedef struct BUSTYPENAME {
- u_char type;
- char name[7];
-} bus_type_name;
-
-/* From MP spec v1.4, table 4-8. */
-static bus_type_name bus_type_table[] =
-{
- {UNKNOWN_BUSTYPE, "CBUS "},
- {UNKNOWN_BUSTYPE, "CBUSII"},
- {EISA, "EISA "},
- {UNKNOWN_BUSTYPE, "FUTURE"},
- {UNKNOWN_BUSTYPE, "INTERN"},
- {ISA, "ISA "},
- {UNKNOWN_BUSTYPE, "MBI "},
- {UNKNOWN_BUSTYPE, "MBII "},
- {MCA, "MCA "},
- {UNKNOWN_BUSTYPE, "MPI "},
- {UNKNOWN_BUSTYPE, "MPSA "},
- {UNKNOWN_BUSTYPE, "NUBUS "},
- {PCI, "PCI "},
- {UNKNOWN_BUSTYPE, "PCMCIA"},
- {UNKNOWN_BUSTYPE, "TC "},
- {UNKNOWN_BUSTYPE, "VL "},
- {UNKNOWN_BUSTYPE, "VME "},
- {UNKNOWN_BUSTYPE, "XPRESS"}
-};
-
-/* From MP spec v1.4, table 5-1. */
-static int default_data[7][5] =
-{
-/* nbus, id0, type0, id1, type1 */
- {1, 0, ISA, 255, NOBUS},
- {1, 0, EISA, 255, NOBUS},
- {1, 0, EISA, 255, NOBUS},
- {1, 0, MCA, 255, NOBUS},
- {2, 0, ISA, 1, PCI},
- {2, 0, EISA, 1, PCI},
- {2, 0, MCA, 1, PCI}
-};
-
-struct pci_probe_table_args {
- u_char bus;
- u_char found;
-};
-
-struct pci_route_interrupt_args {
- u_char bus; /* Source bus. */
- u_char irq; /* Source slot:pin. */
- int vector; /* Return value. */
-};
-
-static mpfps_t mpfps;
-static mpcth_t mpct;
-static void *ioapics[NAPICID];
-static bus_datum *busses;
-static int mptable_nioapics, mptable_nbusses, mptable_maxbusid;
-static int pci0 = -1;
-
-MALLOC_DEFINE(M_MPTABLE, "MP Table", "MP Table Items");
-
-static enum intr_polarity conforming_polarity(u_char src_bus,
- u_char src_bus_irq);
-static enum intr_trigger conforming_trigger(u_char src_bus, u_char
src_bus_irq);
-static enum intr_polarity intentry_polarity(int_entry_ptr intr);
-static enum intr_trigger intentry_trigger(int_entry_ptr intr);
-static int lookup_bus_type(char *name);
-static void mptable_count_items(void);
-static void mptable_count_items_handler(u_char *entry, void *arg);
-#ifdef MPTABLE_FORCE_HTT
-static void mptable_hyperthread_fixup(u_int id_mask);
-#endif
-static void mptable_parse_apics_and_busses(void);
-static void mptable_parse_apics_and_busses_handler(u_char *entry,
- void *arg);
-static void mptable_parse_ints(void);
-static void mptable_parse_ints_handler(u_char *entry, void *arg);
-static void mptable_parse_io_int(int_entry_ptr intr);
-static void mptable_parse_local_int(int_entry_ptr intr);
-static void mptable_pci_probe_table_handler(u_char *entry, void *arg);
-static void mptable_pci_route_interrupt_handler(u_char *entry, void *arg);
-static void mptable_pci_setup(void);
-static int mptable_probe(void);
-static int mptable_probe_cpus(void);
-static void mptable_probe_cpus_handler(u_char *entry, void *arg __unused);
-static void mptable_register(void *dummy);
-static int mptable_setup_local(void);
-static int mptable_setup_io(void);
-static void mptable_walk_table(mptable_entry_handler *handler, void *arg);
-static int search_for_sig(u_int32_t target, int count);
-
-static struct apic_enumerator mptable_enumerator = {
- "MPTable",
- mptable_probe,
- mptable_probe_cpus,
- mptable_setup_local,
- mptable_setup_io
-};
-
-/*
- * look for the MP spec signature
- */
-
-static int
-search_for_sig(u_int32_t target, int count)
-{
- int x;
- u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
-
- for (x = 0; x < count; x += 4)
- if (addr[x] == MP_SIG)
- /* make array index a byte index */
- return (target + (x * sizeof(u_int32_t)));
- return (-1);
-}
-
-static int
-lookup_bus_type(char *name)
-{
- int x;
-
- for (x = 0; x < MAX_BUSTYPE; ++x)
- if (strncmp(bus_type_table[x].name, name, 6) == 0)
- return (bus_type_table[x].type);
-
- return (UNKNOWN_BUSTYPE);
-}
-
-/*
- * Look for an Intel MP spec table (ie, SMP capable hardware).
- */
-static int
-mptable_probe(void)
-{
- int x;
- u_long segment;
- u_int32_t target;
-
- /* see if EBDA exists */
- if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
- /* search first 1K of EBDA */
- target = (u_int32_t) (segment << 4);
- if ((x = search_for_sig(target, 1024 / 4)) >= 0)
- goto found;
- } else {
- /* last 1K of base memory, effective 'top of base' passed in */
- target = (u_int32_t) ((basemem * 1024) - 0x400);
- if ((x = search_for_sig(target, 1024 / 4)) >= 0)
- goto found;
- }
-
- /* search the BIOS */
- target = (u_int32_t) BIOS_BASE;
- if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
- goto found;
-
- /* nothing found */
- return (ENXIO);
-
-found:
- mpfps = (mpfps_t)(KERNBASE + x);
-
- /* Map in the configuration table if it exists. */
- if (mpfps->config_type != 0)
- mpct = NULL;
- else {
- if ((uintptr_t)mpfps->pap >= 1024 * 1024) {
- printf("%s: Unable to map MP Configuration Table\n",
- __func__);
- return (ENXIO);
- }
- mpct = (mpcth_t)(KERNBASE + (uintptr_t)mpfps->pap);
- if (mpct->base_table_length + (uintptr_t)mpfps->pap >=
- 1024 * 1024) {
- printf("%s: Unable to map end of MP Config Table\n",
- __func__);
- return (ENXIO);
- }
- if (mpct->signature[0] != 'P' || mpct->signature[1] != 'C' ||
- mpct->signature[2] != 'M' || mpct->signature[3] != 'P') {
- printf("%s: MP Config Table has bad signature:
%c%c%c%c\n",
- __func__, mpct->signature[0], mpct->signature[1],
- mpct->signature[2], mpct->signature[3]);
- return (ENXIO);
- }
- if (bootverbose)
- printf(
- "MP Configuration Table version 1.%d found at %p\n",
- mpct->spec_rev, mpct);
- }
-
- return (-100);
-}
-
-/*
- * Run through the MP table enumerating CPUs.
- */
-static int
-mptable_probe_cpus(void)
-{
- u_int cpu_mask;
-
- /* Is this a pre-defined config? */
- if (mpfps->config_type != 0) {
- lapic_create(0, 1);
- lapic_create(1, 0);
- } else {
- cpu_mask = 0;
- mptable_walk_table(mptable_probe_cpus_handler, &cpu_mask);
-#ifdef MPTABLE_FORCE_HTT
- mptable_hyperthread_fixup(cpu_mask);
-#endif
- }
- return (0);
-}
-
-/*
- * Initialize the local APIC on the BSP.
- */
-static int
-mptable_setup_local(void)
-{
-
- /* Is this a pre-defined config? */
- printf("MPTable: <");
- if (mpfps->config_type != 0) {
- lapic_init(DEFAULT_APIC_BASE);
- printf("Preset Config %d", mpfps->config_type);
- } else {
- lapic_init((uintptr_t)mpct->apic_address);
- printf("%.*s %.*s", (int)sizeof(mpct->oem_id), mpct->oem_id,
- (int)sizeof(mpct->product_id), mpct->product_id);
- }
- printf(">\n");
- return (0);
-}
-
-/*
- * Run through the MP table enumerating I/O APICs.
- */
-static int
-mptable_setup_io(void)
-{
- int i;
- u_char byte;
-
- /* First, we count individual items and allocate arrays. */
- mptable_count_items();
- busses = malloc((mptable_maxbusid + 1) * sizeof(bus_datum), M_MPTABLE,
- M_WAITOK);
- for (i = 0; i <= mptable_maxbusid; i++)
- busses[i].bus_type = NOBUS;
-
- /* Second, we run through adding I/O APIC's and busses. */
- ioapic_enable_mixed_mode();
- mptable_parse_apics_and_busses();
-
- /* Third, we run through the table tweaking interrupt sources. */
- mptable_parse_ints();
-
- /* Fourth, we register all the I/O APIC's. */
- for (i = 0; i < NAPICID; i++)
- if (ioapics[i] != NULL)
- ioapic_register(ioapics[i]);
-
- /* Fifth, we setup data structures to handle PCI interrupt routing. */
- mptable_pci_setup();
-
- /* Finally, we throw the switch to enable the I/O APIC's. */
- if (mpfps->mpfb2 & MPFB2_IMCR_PRESENT) {
- outb(0x22, 0x70); /* select IMCR */
- byte = inb(0x23); /* current contents */
- byte |= 0x01; /* mask external INTR */
- outb(0x23, byte); /* disconnect 8259s/NMI */
- }
-
- return (0);
-}
-
-static void
-mptable_register(void *dummy __unused)
-{
-
- apic_register_enumerator(&mptable_enumerator);
-}
-SYSINIT(mptable_register, SI_SUB_CPU - 1, SI_ORDER_FIRST, mptable_register,
- NULL)
-
-/*
- * Call the handler routine for each entry in the MP config table.
- */
-static void
-mptable_walk_table(mptable_entry_handler *handler, void *arg)
-{
- u_int i;
- u_char *entry;
-
- entry = (u_char *)(mpct + 1);
- for (i = 0; i < mpct->entry_count; i++) {
- switch (*entry) {
- case MPCT_ENTRY_PROCESSOR:
- case MPCT_ENTRY_IOAPIC:
- case MPCT_ENTRY_BUS:
- case MPCT_ENTRY_INT:
- case MPCT_ENTRY_LOCAL_INT:
- break;
- default:
- panic("%s: Unknown MP Config Entry %d\n", __func__,
- (int)*entry);
- }
- handler(entry, arg);
- entry += basetable_entry_types[*entry].length;
- }
-}
-
-static void
-mptable_probe_cpus_handler(u_char *entry, void *arg)
-{
- proc_entry_ptr proc;
- u_int *cpu_mask;
-
- switch (*entry) {
- case MPCT_ENTRY_PROCESSOR:
- proc = (proc_entry_ptr)entry;
- if (proc->cpu_flags & PROCENTRY_FLAG_EN) {
- lapic_create(proc->apic_id, proc->cpu_flags &
- PROCENTRY_FLAG_BP);
- cpu_mask = (u_int *)arg;
- *cpu_mask |= (1 << proc->apic_id);
- }
- break;
- }
-}
-
-static void
-mptable_count_items_handler(u_char *entry, void *arg __unused)
-{
- io_apic_entry_ptr apic;
- bus_entry_ptr bus;
-
- switch (*entry) {
- case MPCT_ENTRY_BUS:
- bus = (bus_entry_ptr)entry;
- mptable_nbusses++;
- if (bus->bus_id > mptable_maxbusid)
- mptable_maxbusid = bus->bus_id;
- break;
- case MPCT_ENTRY_IOAPIC:
- apic = (io_apic_entry_ptr)entry;
- if (apic->apic_flags & IOAPICENTRY_FLAG_EN)
- mptable_nioapics++;
- break;
- }
-}
-
-/*
- * Count items in the table.
- */
-static void
-mptable_count_items(void)
-{
-
- /* Is this a pre-defined config? */
- if (mpfps->config_type != 0) {
- mptable_nioapics = 1;
- switch (mpfps->config_type) {
- case 1:
- case 2:
- case 3:
- case 4:
- mptable_nbusses = 1;
- break;
- case 5:
- case 6:
- case 7:
- mptable_nbusses = 2;
- break;
- default:
- panic("Unknown pre-defined MP Table config type %d",
- mpfps->config_type);
- }
- mptable_maxbusid = mptable_nbusses - 1;
- } else
- mptable_walk_table(mptable_count_items_handler, NULL);
-}
-
-/*
- * Add a bus or I/O APIC from an entry in the table.
- */
-static void
-mptable_parse_apics_and_busses_handler(u_char *entry, void *arg __unused)
-{
- io_apic_entry_ptr apic;
- bus_entry_ptr bus;
- enum busTypes bus_type;
- int i;
-
-
- switch (*entry) {
- case MPCT_ENTRY_BUS:
- bus = (bus_entry_ptr)entry;
- bus_type = lookup_bus_type(bus->bus_type);
- if (bus_type == UNKNOWN_BUSTYPE) {
- printf("MPTable: Unknown bus %d type \"", bus->bus_id);
- for (i = 0; i < 6; i++)
- printf("%c", bus->bus_type[i]);
- printf("\"\n");
- }
- busses[bus->bus_id].bus_id = bus->bus_id;
- busses[bus->bus_id].bus_type = bus_type;
- break;
- case MPCT_ENTRY_IOAPIC:
- apic = (io_apic_entry_ptr)entry;
- if (!(apic->apic_flags & IOAPICENTRY_FLAG_EN))
- break;
- if (apic->apic_id >= NAPICID)
- panic("%s: I/O APIC ID %d too high", __func__,
- apic->apic_id);
- if (ioapics[apic->apic_id] != NULL)
- panic("%s: Double APIC ID %d", __func__,
- apic->apic_id);
- ioapics[apic->apic_id] = ioapic_create(
- (uintptr_t)apic->apic_address, apic->apic_id, -1);
- break;
- default:
- break;
- }
-}
-
-/*
- * Enumerate I/O APIC's and busses.
- */
-static void
-mptable_parse_apics_and_busses(void)
-{
-
- /* Is this a pre-defined config? */
- if (mpfps->config_type != 0) {
- ioapics[0] = ioapic_create(DEFAULT_IO_APIC_BASE, 2, 0);
- busses[0].bus_id = 0;
- busses[0].bus_type = default_data[mpfps->config_type][2];
- if (mptable_nbusses > 1) {
- busses[1].bus_id = 1;
- busses[1].bus_type =
- default_data[mpfps->config_type][4];
- }
- } else
- mptable_walk_table(mptable_parse_apics_and_busses_handler,
- NULL);
-}
-
-/*
- * Determine conforming polarity for a given bus type.
- */
-static enum intr_polarity
-conforming_polarity(u_char src_bus, u_char src_bus_irq)
-{
-
- KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus));
- switch (busses[src_bus].bus_type) {
- case ISA:
- case EISA:
- return (INTR_POLARITY_HIGH);
- case PCI:
- return (INTR_POLARITY_LOW);
- default:
- panic("%s: unknown bus type %d", __func__,
- busses[src_bus].bus_type);
- }
-}
-
-/*
- * Determine conforming trigger for a given bus type.
- */
-static enum intr_trigger
-conforming_trigger(u_char src_bus, u_char src_bus_irq)
-{
-
- KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus));
- switch (busses[src_bus].bus_type) {
- case ISA:
- return (INTR_TRIGGER_EDGE);
- case PCI:
- return (INTR_TRIGGER_LEVEL);
-#if !defined(PC98) && !defined(XEN)
- case EISA:
- KASSERT(src_bus_irq < 16, ("Invalid EISA IRQ %d", src_bus_irq));
- return (elcr_read_trigger(src_bus_irq));
-#endif
- default:
- panic("%s: unknown bus type %d", __func__,
- busses[src_bus].bus_type);
- }
-}
-
-static enum intr_polarity
-intentry_polarity(int_entry_ptr intr)
-{
-
- switch (intr->int_flags & INTENTRY_FLAGS_POLARITY) {
- case INTENTRY_FLAGS_POLARITY_CONFORM:
- return (conforming_polarity(intr->src_bus_id,
- intr->src_bus_irq));
- case INTENTRY_FLAGS_POLARITY_ACTIVEHI:
- return (INTR_POLARITY_HIGH);
- case INTENTRY_FLAGS_POLARITY_ACTIVELO:
- return (INTR_POLARITY_LOW);
- default:
- panic("Bogus interrupt flags");
- }
-}
-
-static enum intr_trigger
-intentry_trigger(int_entry_ptr intr)
-{
-
- switch (intr->int_flags & INTENTRY_FLAGS_TRIGGER) {
- case INTENTRY_FLAGS_TRIGGER_CONFORM:
- return (conforming_trigger(intr->src_bus_id,
- intr->src_bus_irq));
- case INTENTRY_FLAGS_TRIGGER_EDGE:
- return (INTR_TRIGGER_EDGE);
- case INTENTRY_FLAGS_TRIGGER_LEVEL:
- return (INTR_TRIGGER_LEVEL);
- default:
- panic("Bogus interrupt flags");
- }
-}
-
-/*
- * Parse an interrupt entry for an I/O interrupt routed to a pin on an I/O
APIC.
- */
-static void
-mptable_parse_io_int(int_entry_ptr intr)
-{
- void *ioapic;
- u_int pin;
-
- if (intr->dst_apic_id == 0xff) {
- printf("MPTable: Ignoring global interrupt entry for pin %d\n",
- intr->dst_apic_int);
- return;
- }
- if (intr->dst_apic_id >= NAPICID) {
- printf("MPTable: Ignoring interrupt entry for ioapic%d\n",
- intr->dst_apic_id);
- return;
- }
- ioapic = ioapics[intr->dst_apic_id];
- if (ioapic == NULL) {
- printf(
- "MPTable: Ignoring interrupt entry for missing ioapic%d\n",
- intr->dst_apic_id);
- return;
- }
- pin = intr->dst_apic_int;
- switch (intr->int_type) {
- case INTENTRY_TYPE_INT:
- switch (busses[intr->src_bus_id].bus_type) {
- case NOBUS:
- panic("interrupt from missing bus");
- case ISA:
- case EISA:
- if (busses[intr->src_bus_id].bus_type == ISA)
- ioapic_set_bus(ioapic, pin, APIC_BUS_ISA);
- else
- ioapic_set_bus(ioapic, pin, APIC_BUS_EISA);
- if (intr->src_bus_irq == pin)
- break;
- ioapic_remap_vector(ioapic, pin, intr->src_bus_irq);
- if (ioapic_get_vector(ioapic, intr->src_bus_irq) ==
- intr->src_bus_irq)
- ioapic_disable_pin(ioapic, intr->src_bus_irq);
- break;
- case PCI:
- ioapic_set_bus(ioapic, pin, APIC_BUS_PCI);
- break;
- default:
- ioapic_set_bus(ioapic, pin, APIC_BUS_UNKNOWN);
- break;
- }
- break;
- case INTENTRY_TYPE_NMI:
- ioapic_set_nmi(ioapic, pin);
- break;
- case INTENTRY_TYPE_SMI:
- ioapic_set_smi(ioapic, pin);
- break;
- case INTENTRY_TYPE_EXTINT:
- ioapic_set_extint(ioapic, pin);
- break;
- default:
- panic("%s: invalid interrupt entry type %d\n", __func__,
- intr->int_type);
- }
- if (intr->int_type == INTENTRY_TYPE_INT ||
- (intr->int_flags & INTENTRY_FLAGS_TRIGGER) !=
- INTENTRY_FLAGS_TRIGGER_CONFORM)
- ioapic_set_triggermode(ioapic, pin, intentry_trigger(intr));
- if (intr->int_type == INTENTRY_TYPE_INT ||
- (intr->int_flags & INTENTRY_FLAGS_POLARITY) !=
- INTENTRY_FLAGS_POLARITY_CONFORM)
- ioapic_set_polarity(ioapic, pin, intentry_polarity(intr));
-}
-
-/*
- * Parse an interrupt entry for a local APIC LVT pin.
- */
-static void
-mptable_parse_local_int(int_entry_ptr intr)
-{
- u_int apic_id, pin;
-
- if (intr->dst_apic_id == 0xff)
- apic_id = APIC_ID_ALL;
- else
- apic_id = intr->dst_apic_id;
- if (intr->dst_apic_int == 0)
- pin = LVT_LINT0;
- else
- pin = LVT_LINT1;
- switch (intr->int_type) {
- case INTENTRY_TYPE_INT:
-#if 1
- printf(
- "MPTable: Ignoring vectored local interrupt for LINTIN%d vector %d\n",
- intr->dst_apic_int, intr->src_bus_irq);
- return;
-#else
- lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_FIXED);
- break;
-#endif
- case INTENTRY_TYPE_NMI:
- lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI);
- break;
- case INTENTRY_TYPE_SMI:
- lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_SMI);
- break;
- case INTENTRY_TYPE_EXTINT:
- lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_EXTINT);
- break;
- default:
- panic("%s: invalid interrupt entry type %d\n", __func__,
- intr->int_type);
- }
- if ((intr->int_flags & INTENTRY_FLAGS_TRIGGER) !=
- INTENTRY_FLAGS_TRIGGER_CONFORM)
- lapic_set_lvt_triggermode(apic_id, pin,
- intentry_trigger(intr));
- if ((intr->int_flags & INTENTRY_FLAGS_POLARITY) !=
- INTENTRY_FLAGS_POLARITY_CONFORM)
- lapic_set_lvt_polarity(apic_id, pin, intentry_polarity(intr));
-}
-
-/*
- * Parse interrupt entries.
- */
-static void
-mptable_parse_ints_handler(u_char *entry, void *arg __unused)
-{
- int_entry_ptr intr;
-
- intr = (int_entry_ptr)entry;
- switch (*entry) {
- case MPCT_ENTRY_INT:
- mptable_parse_io_int(intr);
- break;
- case MPCT_ENTRY_LOCAL_INT:
- mptable_parse_local_int(intr);
- break;
- }
-}
-
-/*
- * Configure the interrupt pins
- */
-static void
-mptable_parse_ints(void)
-{
-
- /* Is this a pre-defined config? */
- if (mpfps->config_type != 0) {
- /* Configure LINT pins. */
- lapic_set_lvt_mode(APIC_ID_ALL, LVT_LINT0, APIC_LVT_DM_EXTINT);
- lapic_set_lvt_mode(APIC_ID_ALL, LVT_LINT1, APIC_LVT_DM_NMI);
-
- /* Configure I/O APIC pins. */
- if (mpfps->config_type != 7)
- ioapic_set_extint(ioapics[0], 0);
- else
- ioapic_disable_pin(ioapics[0], 0);
- if (mpfps->config_type != 2)
- ioapic_remap_vector(ioapics[0], 2, 0);
- else
- ioapic_disable_pin(ioapics[0], 2);
- if (mpfps->config_type == 2)
- ioapic_disable_pin(ioapics[0], 13);
- } else
- mptable_walk_table(mptable_parse_ints_handler, NULL);
-}
-
-#ifdef MPTABLE_FORCE_HTT
-/*
- * Perform a hyperthreading "fix-up" to enumerate any logical CPU's
- * that aren't already listed in the table.
- *
- * XXX: We assume that all of the physical CPUs in the
- * system have the same number of logical CPUs.
- *
- * XXX: We assume that APIC ID's are allocated such that
- * the APIC ID's for a physical processor are aligned
- * with the number of logical CPU's in the processor.
- */
-static void
-mptable_hyperthread_fixup(u_int id_mask)
-{
- u_int i, id, logical_cpus;
-
- /* Nothing to do if there is no HTT support. */
- if ((cpu_feature & CPUID_HTT) == 0)
- return;
- logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
- if (logical_cpus <= 1)
- return;
-
- /*
- * For each APIC ID of a CPU that is set in the mask,
- * scan the other candidate APIC ID's for this
- * physical processor. If any of those ID's are
- * already in the table, then kill the fixup.
- */
- for (id = 0; id < NAPICID; id++) {
- if ((id_mask & 1 << id) == 0)
- continue;
- /* First, make sure we are on a logical_cpus boundary. */
- if (id % logical_cpus != 0)
- return;
- for (i = id + 1; i < id + logical_cpus; i++)
- if ((id_mask & 1 << i) != 0)
- return;
- }
-
- /*
- * Ok, the ID's checked out, so perform the fixup by
- * adding the logical CPUs.
- */
- while ((id = ffs(id_mask)) != 0) {
- id--;
- for (i = id + 1; i < id + logical_cpus; i++) {
- if (bootverbose)
- printf(
- "MPTable: Adding logical CPU %d from main CPU %d\n",
- i, id);
- lapic_create(i, 0);
- }
- id_mask &= ~(1 << id);
- }
-}
-#endif /* MPTABLE_FORCE_HTT */
-
-/*
- * Support code for routing PCI interrupts using the MP Table.
- */
-static void
-mptable_pci_setup(void)
-{
- int i;
-
- /*
- * Find the first pci bus and call it 0. Panic if pci0 is not
- * bus zero and there are multiple PCI busses.
- */
- for (i = 0; i <= mptable_maxbusid; i++)
- if (busses[i].bus_type == PCI) {
- if (pci0 == -1)
- pci0 = i;
- else if (pci0 != 0)
- panic(
- "MPTable contains multiple PCI busses but no PCI bus 0");
- }
-}
-
-static void
-mptable_pci_probe_table_handler(u_char *entry, void *arg)
-{
- struct pci_probe_table_args *args;
- int_entry_ptr intr;
-
- if (*entry != MPCT_ENTRY_INT)
- return;
- intr = (int_entry_ptr)entry;
- args = (struct pci_probe_table_args *)arg;
- KASSERT(args->bus <= mptable_maxbusid,
- ("bus %d is too big", args->bus));
- KASSERT(busses[args->bus].bus_type == PCI, ("probing for non-PCI bus"));
- if (intr->src_bus_id == args->bus)
- args->found = 1;
-}
-
-int
-mptable_pci_probe_table(int bus)
-{
- struct pci_probe_table_args args;
-
- if (bus < 0)
- return (EINVAL);
- if (pci0 == -1 || pci0 + bus > mptable_maxbusid)
- return (ENXIO);
- if (busses[pci0 + bus].bus_type != PCI)
- return (ENXIO);
- args.bus = pci0 + bus;
- args.found = 0;
- mptable_walk_table(mptable_pci_probe_table_handler, &args);
- if (args.found == 0)
- return (ENXIO);
- return (0);
-}
-
-static void
-mptable_pci_route_interrupt_handler(u_char *entry, void *arg)
-{
- struct pci_route_interrupt_args *args;
- int_entry_ptr intr;
- int vector;
-
- if (*entry != MPCT_ENTRY_INT)
- return;
- intr = (int_entry_ptr)entry;
- args = (struct pci_route_interrupt_args *)arg;
- if (intr->src_bus_id != args->bus || intr->src_bus_irq != args->irq)
- return;
-
- /* Make sure the APIC maps to a known APIC. */
- KASSERT(ioapics[intr->dst_apic_id] != NULL,
- ("No I/O APIC %d to route interrupt to", intr->dst_apic_id));
-
- /*
- * Look up the vector for this APIC / pin combination. If we
- * have previously matched an entry for this PCI IRQ but it
- * has the same vector as this entry, just return. Otherwise,
- * we use the vector for this APIC / pin combination.
- */
- vector = ioapic_get_vector(ioapics[intr->dst_apic_id],
- intr->dst_apic_int);
- if (args->vector == vector)
- return;
- KASSERT(args->vector == -1,
- ("Multiple IRQs for PCI interrupt %d.%d.INT%c: %d and %d\n",
- args->bus, args->irq >> 2, 'A' + (args->irq & 0x3), args->vector,
- vector));
- args->vector = vector;
-}
-
-int
-mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin)
-{
- struct pci_route_interrupt_args args;
- int slot;
-
- /* Like ACPI, pin numbers are 0-3, not 1-4. */
- pin--;
- KASSERT(pci0 != -1, ("do not know how to route PCI interrupts"));
- args.bus = pci_get_bus(dev) + pci0;
- slot = pci_get_slot(dev);
-
- /*
- * PCI interrupt entries in the MP Table encode both the slot and
- * pin into the IRQ with the pin being the two least significant
- * bits, the slot being the next five bits, and the most significant
- * bit being reserved.
- */
- args.irq = slot << 2 | pin;
- args.vector = -1;
- mptable_walk_table(mptable_pci_route_interrupt_handler, &args);
- if (args.vector < 0) {
- device_printf(pcib, "unable to route slot %d INT%c\n", slot,
- 'A' + pin);
- return (PCI_INVALID_IRQ);
- }
- if (bootverbose)
- device_printf(pcib, "slot %d INT%c routed to irq %d\n", slot,
- 'A' + pin, args.vector);
- return (args.vector);
-}
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,3474 +0,0 @@
-/*-
- * Copyright (c) 1991 Regents of the University of California.
- * All rights reserved.
- * Copyright (c) 1994 John S. Dyson
- * All rights reserved.
- * Copyright (c) 1994 David Greenman
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department and William Jolitz of UUNET Technologies Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
- */
-/*-
- * Copyright (c) 2003 Networks Associates Technology, Inc.
- * All rights reserved.
- *
- * This software was developed for the FreeBSD Project by Jake Burkholder,
- * Safeport Network Services, and Network Associates Laboratories, the
- * Security Research Division of Network Associates, Inc. under
- * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
- * CHATS research program.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/pmap.c,v 1.494.2.6 2004/10/10 19:08:00
alc Exp $");
-
-/*
- * Manages physical address maps.
- * XEN NOTES: page table entries (pt_entry_t) and
- * page directory entries (pd_entry_t) contain machine
- * addresses and not physical addresses. Use PT_GET() before
- * dereferencing these structures to convert them into a
- * physical address. Use the PT_SET_VA operations to commit
- * page changes back to XEN. PT_SET_VA_MA should be used with
- * great care!
- *
- *
- * In addition to hardware address maps, this
- * module is called upon to provide software-use-only
- * maps which may or may not be stored in the same
- * form as hardware maps. These pseudo-maps are
- * used to store intermediate results from copy
- * operations to and from address spaces.
- *
- * Since the information managed by this module is
- * also stored by the logical address mapping module,
- * this module may throw away valid virtual-to-physical
- * mappings at almost any time. However, invalidations
- * of virtual-to-physical mappings must be done as
- * requested.
- *
- * In order to cope with hardware architectures which
- * make virtual-to-physical map invalidates expensive,
- * this module may delay invalidate or reduced protection
- * operations until such time as they are actually
- * necessary. This module is given full information as
- * to which processors are currently using which maps,
- * and to when physical maps must be made correct.
- */
-
-#include "opt_cpu.h"
-#include "opt_pmap.h"
-#include "opt_msgbuf.h"
-#include "opt_kstack_pages.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mman.h>
-#include <sys/msgbuf.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/sx.h>
-#include <sys/user.h>
-#include <sys/vmmeter.h>
-#include <sys/sched.h>
-#include <sys/sysctl.h>
-#ifdef SMP
-#include <sys/smp.h>
-#endif
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_object.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
-#include <vm/uma.h>
-
-#include <machine/cpu.h>
-#include <machine/cputypes.h>
-#include <machine/md_var.h>
-#include <machine/specialreg.h>
-#ifdef SMP
-#include <machine/smp.h>
-#endif
-
-#include <machine/xenfunc.h>
-
-#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
-#define CPU_ENABLE_SSE
-#endif
-#if defined(CPU_DISABLE_SSE)
-#undef CPU_ENABLE_SSE
-#endif
-
-#ifndef PMAP_SHPGPERPROC
-#define PMAP_SHPGPERPROC 200
-#endif
-
-#if defined(DIAGNOSTIC)
-#define PMAP_DIAGNOSTIC
-#endif
-
-#define MINPV 2048
-
-#if !defined(PMAP_DIAGNOSTIC)
-#define PMAP_INLINE __inline
-#else
-#define PMAP_INLINE
-#endif
-
-/*
- * Get PDEs and PTEs for user/kernel address space
- */
-#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
-#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
-
-#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0)
-#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0)
-#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0)
-#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0)
-#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0)
-
-#if 0
-#define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \
- atomic_clear_int((u_int *)(pte), PG_W))
-#else
-#define pmap_pte_set_w(pte, v) { \
- if (v) \
- PT_SET_VA_MA(pte, *pte | PG_W, TRUE); \
- else \
- PT_SET_VA_MA(pte, *pte & ~PG_W, TRUE); \
-}
-#endif
-
-struct pmap kernel_pmap_store;
-LIST_HEAD(pmaplist, pmap);
-static struct pmaplist allpmaps;
-static struct mtx allpmaps_lock;
-
-vm_paddr_t avail_end; /* PA of last available physical page */
-vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
-vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
-static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */
-int pgeflag = 0; /* PG_G or-in */
-int pseflag = 0; /* PG_PS or-in */
-
-static int nkpt;
-vm_offset_t kernel_vm_end;
-extern u_int32_t KERNend;
-
-#ifdef PAE
-static uma_zone_t pdptzone;
-#endif
-
-/*
- * Data for the pv entry allocation mechanism
- */
-static uma_zone_t pvzone;
-static struct vm_object pvzone_obj;
-static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
-int pmap_pagedaemon_waken;
-
-/*
- * All those kernel PT submaps that BSD is so fond of
- */
-pt_entry_t *CMAP1 = 0;
-static pt_entry_t *CMAP2, *CMAP3;
-caddr_t CADDR1 = 0, ptvmmap = 0;
-static caddr_t CADDR2, CADDR3;
-static struct mtx CMAPCADDR12_lock;
-struct msgbuf *msgbufp = 0;
-
-/*
- * Crashdump maps.
- */
-static caddr_t crashdumpmap;
-
-#ifdef SMP
-extern pt_entry_t *SMPpt;
-#endif
-static pt_entry_t *PMAP1 = 0, *PMAP2;
-static pt_entry_t *PADDR1 = 0, *PADDR2;
-#ifdef SMP
-static int PMAP1cpu;
-static int PMAP1changedcpu;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD,
- &PMAP1changedcpu, 0,
- "Number of times pmap_pte_quick changed CPU with same PMAP1");
-#endif
-static int PMAP1changed;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD,
- &PMAP1changed, 0,
- "Number of times pmap_pte_quick changed PMAP1");
-static int PMAP1unchanged;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD,
- &PMAP1unchanged, 0,
- "Number of times pmap_pte_quick didn't change PMAP1");
-static struct mtx PMAP2mutex;
-
-static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
-static pv_entry_t get_pv_entry(void);
-static void pmap_clear_ptes(vm_page_t m, int bit);
-
-static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
-static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
-static int pmap_remove_entry(struct pmap *pmap, vm_page_t m,
- vm_offset_t va);
-static void pmap_copy_ma(vm_paddr_t src, vm_paddr_t dst);
-static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
-
-static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
-
-static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
-static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m);
-static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
-static void pmap_pte_release(pt_entry_t *pte);
-static int pmap_unuse_pt(pmap_t, vm_offset_t);
-static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
-#ifdef PAE
-static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int
wait);
-#endif
-
-CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
-CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
-
-#ifndef DEBUG
-#define DEBUG
-#endif
-#ifdef PMAP_DEBUG
-static void pmap_dec_ref(unsigned long ma);
-static void pmap_mark_privileged(unsigned long pa);
-static void pmap_mark_unprivileged(unsigned long pa);
-static void pmap_dec_ref_page(vm_page_t m);
-int pmap_pid_dump(int pid);
-#endif
-
-void
-pd_set(struct pmap *pmap, vm_paddr_t *ptr, vm_paddr_t val, int type)
-{
- vm_paddr_t shadow_pdir_ma = pmap->pm_pdir[PTDPTDI] & ~0xFFF;
- vm_paddr_t shadow_offset = (vm_paddr_t)(ptr -
pmap->pm_pdir)*sizeof(vm_paddr_t);
-
- switch (type) {
- case SH_PD_SET_VA:
- xen_queue_pt_update(shadow_pdir_ma + shadow_offset,
- xpmap_ptom(val & ~(PG_RW|PG_M)));
- xen_queue_pt_update(vtomach(ptr),
- xpmap_ptom(val));
- break;
- case SH_PD_SET_VA_MA:
- xen_queue_pt_update(shadow_pdir_ma + shadow_offset,
- val & ~(PG_RW|PG_M));
- xen_queue_pt_update(vtomach(ptr), val);
- break;
- case SH_PD_SET_VA_CLEAR:
- xen_queue_pt_update(shadow_pdir_ma + shadow_offset, 0);
- xen_queue_pt_update(vtomach(ptr), 0);
- break;
- }
-}
-
-/*
- * Move the kernel virtual free pointer to the next
- * 4MB. This is used to help improve performance
- * by using a large (4MB) page for much of the kernel
- * (.text, .data, .bss)
- */
-static vm_offset_t
-pmap_kmem_choose(vm_offset_t addr)
-{
- vm_offset_t newaddr = addr;
-
-#ifndef DISABLE_PSE
- if (cpu_feature & CPUID_PSE)
- newaddr = (addr + PDRMASK) & ~PDRMASK;
-#endif
- return newaddr;
-}
-
-/*
- * Bootstrap the system enough to run with virtual memory.
- *
- * On the i386 this is called after mapping has already been enabled
- * and just syncs the pmap module with what has already been done.
- * [We can't call it easily with mapping off since the kernel is not
- * mapped with PA == VA, hence we would have to relocate every address
- * from the linked base (virtual) address "KERNBASE" to the actual
- * (physical) address starting relative to 0]
- */
-void
-pmap_bootstrap(firstaddr, loadaddr)
- vm_paddr_t firstaddr;
- vm_paddr_t loadaddr;
-{
- vm_offset_t va;
- pt_entry_t *pte, *unused;
-
- /*
- * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE
too
- * large. It should instead be correctly calculated in locore.s and
- * not based on 'first' (which is a physical address, not a virtual
- * address, for the start of unused physical memory). The kernel
- * page tables are NOT double mapped and thus should not be included
- * in this calculation.
- */
- virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
- virtual_avail = pmap_kmem_choose(virtual_avail);
-
- virtual_end = VM_MAX_KERNEL_ADDRESS;
-
- /*
- * Initialize the kernel pmap (which is statically allocated).
- */
- PMAP_LOCK_INIT(kernel_pmap);
- kernel_pmap->pm_pdir = (pd_entry_t *) xen_start_info->pt_base;
-#ifdef PAE
- kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
-#endif
- kernel_pmap->pm_active = -1; /* don't allow deactivation */
- TAILQ_INIT(&kernel_pmap->pm_pvlist);
- LIST_INIT(&allpmaps);
- mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
- nkpt = NKPT;
-
- /*
- * Reserve some special page table entries/VA space for temporary
- * mapping of pages.
- */
-#define SYSMAP(c, p, v, n) \
- v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
-
- va = virtual_avail;
- pte = vtopte(va);
-
- /*
- * CMAP1/CMAP2 are used for zeroing and copying pages.
- * CMAP3 is used for the idle process page zeroing.
- */
- SYSMAP(caddr_t, CMAP1, CADDR1, 1);
- SYSMAP(caddr_t, CMAP2, CADDR2, 1);
- SYSMAP(caddr_t, CMAP3, CADDR3, 1);
-
- PT_CLEAR_VA(CMAP3, TRUE);
-
- mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF);
-
- /*
- * Crashdump maps.
- */
- SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
-
- /*
- * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
- */
- SYSMAP(caddr_t, unused, ptvmmap, 1)
-
- /*
- * msgbufp is used to map the system message buffer.
- */
- SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE)))
-
- /*
- * ptemap is used for pmap_pte_quick
- */
- SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
- SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
-
- mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
-
- virtual_avail = va;
- PT_CLEAR_VA(CMAP1, FALSE);
- PT_CLEAR_VA(CMAP2, FALSE);
-
- PT_UPDATES_FLUSH();
-#ifdef XEN_UNNEEDED
- /* Turn on PG_G on kernel page(s) */
- pmap_set_pg();
-#endif
-}
-
-/*
- * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on.
- */
-void
-pmap_set_pg(void)
-{
- pd_entry_t pdir;
- pt_entry_t *pte;
- vm_offset_t va, endva;
- int i;
-
- if (pgeflag == 0)
- return;
- panic("this won't work");
- i = KERNLOAD/NBPDR;
- endva = KERNBASE + KERNend;
-
- if (pseflag) {
- va = KERNBASE + KERNLOAD;
- while (va < endva) {
- pdir = kernel_pmap->pm_pdir[KPTDI+i];
- pdir |= pgeflag;
- kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir;
- invltlb(); /* Play it safe, invltlb() every time */
- i++;
- va += NBPDR;
- }
- } else {
- va = (vm_offset_t)btext;
- while (va < endva) {
- pte = vtopte(va);
- if (*pte)
- *pte |= pgeflag;
- invltlb(); /* Play it safe, invltlb() every time */
- va += PAGE_SIZE;
- }
- }
-}
-
-#ifdef PAE
-
-static MALLOC_DEFINE(M_PMAPPDPT, "pmap", "pmap pdpt");
-
-static void *
-pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
-{
- *flags = UMA_SLAB_PRIV;
- return (contigmalloc(PAGE_SIZE, M_PMAPPDPT, 0, 0x0ULL, 0xffffffffULL,
- 1, 0));
-}
-#endif
-
-/*
- * Initialize the pmap module.
- * Called by vm_init, to initialize any structures that the pmap
- * system needs to map virtual memory.
- * pmap_init has been enhanced to support in a fairly consistant
- * way, discontiguous physical memory.
- */
-void
-pmap_init(void)
-{
- int i;
-
- /*
- * Allocate memory for random pmap data structures. Includes the
- * pv_head_table.
- */
-
- for(i = 0; i < vm_page_array_size; i++) {
- vm_page_t m;
-
- m = &vm_page_array[i];
- TAILQ_INIT(&m->md.pv_list);
- m->md.pv_list_count = 0;
- }
-
- /*
- * init the pv free list
- */
- pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
- uma_prealloc(pvzone, MINPV);
-
-#ifdef PAE
- pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
- NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
- UMA_ZONE_VM | UMA_ZONE_NOFREE);
- uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
-#endif
-
- /*
- * Now it is safe to enable pv_table recording.
- */
- pmap_initialized = TRUE;
-}
-
-/*
- * Initialize the address space (zone) for the pv_entries. Set a
- * high water mark so that the system can recover from excessive
- * numbers of pv entries.
- */
-void
-pmap_init2()
-{
- int shpgperproc = PMAP_SHPGPERPROC;
-
- TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
- pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
- TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
- pv_entry_high_water = 9 * (pv_entry_max / 10);
- uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
-}
-
-
-/***************************************************
- * Low level helper routines.....
- ***************************************************/
-
-#if defined(PMAP_DIAGNOSTIC)
-
-/*
- * This code checks for non-writeable/modified pages.
- * This should be an invalid condition.
- */
-static int
-pmap_nw_modified(pt_entry_t ptea)
-{
- int pte;
-
- pte = (int) ptea;
-
- if ((pte & (PG_M|PG_RW)) == PG_M)
- return 1;
- else
- return 0;
-}
-#endif
-
-
-/*
- * this routine defines the region(s) of memory that should
- * not be tested for the modified bit.
- */
-static PMAP_INLINE int
-pmap_track_modified(vm_offset_t va)
-{
- if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
- return 1;
- else
- return 0;
-}
-
-#ifdef I386_CPU
-/*
- * i386 only has "invalidate everything" and no SMP to worry about.
- */
-PMAP_INLINE void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
-
- if (pmap == kernel_pmap || pmap->pm_active)
- invltlb();
-}
-
-PMAP_INLINE void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
-
- if (pmap == kernel_pmap || pmap->pm_active)
- invltlb();
-}
-
-PMAP_INLINE void
-pmap_invalidate_all(pmap_t pmap)
-{
-
- if (pmap == kernel_pmap || pmap->pm_active)
- invltlb();
-}
-#else /* !I386_CPU */
-#ifdef SMP
-/*
- * For SMP, these functions have to use the IPI mechanism for coherence.
- */
-void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
- u_int cpumask;
- u_int other_cpus;
-
- if (smp_started) {
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_rv_mtx);
- } else
- critical_enter();
- /*
- * We need to disable interrupt preemption but MUST NOT have
- * interrupts disabled here.
- * XXX we may need to hold schedlock to get a coherent pm_active
- * XXX critical sections disable interrupts again
- */
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
- invlpg(va);
- smp_invlpg(va);
- } else {
- cpumask = PCPU_GET(cpumask);
- other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
- invlpg(va);
- if (pmap->pm_active & other_cpus)
- smp_masked_invlpg(pmap->pm_active & other_cpus, va);
- }
- if (smp_started)
- mtx_unlock_spin(&smp_rv_mtx);
- else
- critical_exit();
- PT_UPDATES_FLUSH();
-}
-
-void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- u_int cpumask;
- u_int other_cpus;
- vm_offset_t addr;
-
- if (smp_started) {
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_rv_mtx);
- } else
- critical_enter();
- /*
- * We need to disable interrupt preemption but MUST NOT have
- * interrupts disabled here.
- * XXX we may need to hold schedlock to get a coherent pm_active
- * XXX critical sections disable interrupts again
- */
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- smp_invlpg_range(sva, eva);
- } else {
- cpumask = PCPU_GET(cpumask);
- other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- if (pmap->pm_active & other_cpus)
- smp_masked_invlpg_range(pmap->pm_active & other_cpus,
- sva, eva);
- }
- if (smp_started)
- mtx_unlock_spin(&smp_rv_mtx);
- else
- critical_exit();
- PT_UPDATES_FLUSH();
-}
-
-void
-pmap_invalidate_all(pmap_t pmap)
-{
- u_int cpumask;
- u_int other_cpus;
-
- if (smp_started) {
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_rv_mtx);
- } else
- critical_enter();
- /*
- * We need to disable interrupt preemption but MUST NOT have
- * interrupts disabled here.
- * XXX we may need to hold schedlock to get a coherent pm_active
- * XXX critical sections disable interrupts again
- */
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
- invltlb();
- smp_invltlb();
- } else {
- cpumask = PCPU_GET(cpumask);
- other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
- invltlb();
- if (pmap->pm_active & other_cpus)
- smp_masked_invltlb(pmap->pm_active & other_cpus);
- }
- if (smp_started)
- mtx_unlock_spin(&smp_rv_mtx);
- else
- critical_exit();
- PT_UPDATES_FLUSH();
-}
-#else /* !SMP */
-/*
- * Normal, non-SMP, 486+ invalidation functions.
- * We inline these within pmap.c for speed.
- */
-PMAP_INLINE void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
-
- if (pmap == kernel_pmap || pmap->pm_active)
- invlpg(va);
- PT_UPDATES_FLUSH();
-
-}
-
-PMAP_INLINE void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- vm_offset_t addr;
-
- if (pmap == kernel_pmap || pmap->pm_active)
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- PT_UPDATES_FLUSH();
-
-}
-
-PMAP_INLINE void
-pmap_invalidate_all(pmap_t pmap)
-{
-
- if (pmap == kernel_pmap || pmap->pm_active)
- invltlb();
-}
-#endif /* !SMP */
-#endif /* !I386_CPU */
-
-/*
- * Are we current address space or kernel? N.B. We return FALSE when
- * a pmap's page table is in use because a kernel thread is borrowing
- * it. The borrowed page table can change spontaneously, making any
- * dependence on its continued use subject to a race condition.
- */
-static __inline int
-pmap_is_current(pmap_t pmap)
-{
- /* XXX validate */
- return (pmap == kernel_pmap ||
- (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
- (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
-}
-
-/*
- * If the given pmap is not the current or kernel pmap, the returned pte must
- * be released by passing it to pmap_pte_release().
- */
-pt_entry_t *
-pmap_pte(pmap_t pmap, vm_offset_t va)
-{
- pd_entry_t tmppf, newpf;
- pd_entry_t *pde;
-
- pde = pmap_pde(pmap, va);
- if (*pde & PG_PS)
- return (pde);
- if (*pde != 0) {
- /* are we current address space or kernel? */
- if (pmap_is_current(pmap))
- return (vtopte(va));
- mtx_lock(&PMAP2mutex);
- newpf = PT_GET(pde) & PG_FRAME;
- tmppf = PT_GET(PMAP2) & PG_FRAME;
- if (tmppf != newpf) {
- PT_SET_VA(PMAP2, newpf | PG_V | PG_A, FALSE);
- pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
- }
- return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
- }
- return (0);
-}
-
-/*
- * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte
- * being NULL.
- */
-static __inline void
-pmap_pte_release(pt_entry_t *pte)
-{
-
- if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2)
- mtx_unlock(&PMAP2mutex);
-}
-
-static __inline void
-invlcaddr(void *caddr)
-{
-#ifdef I386_CPU
- invltlb();
-#else
- invlpg((u_int)caddr);
-#endif
- PT_UPDATES_FLUSH();
-}
-
-/*
- * Super fast pmap_pte routine best used when scanning
- * the pv lists. This eliminates many coarse-grained
- * invltlb calls. Note that many of the pv list
- * scans are across different pmaps. It is very wasteful
- * to do an entire invltlb for checking a single mapping.
- *
- * If the given pmap is not the current pmap, vm_page_queue_mtx
- * must be held and curthread pinned to a CPU.
- */
-static pt_entry_t *
-pmap_pte_quick(pmap_t pmap, vm_offset_t va)
-{
- pd_entry_t tmppf, newpf;
- pd_entry_t *pde;
-
- pde = pmap_pde(pmap, va);
- if (*pde & PG_PS)
- return (pde);
- if (*pde != 0) {
- /* are we current address space or kernel? */
- if (pmap_is_current(pmap))
- return (vtopte(va));
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
- newpf = PT_GET(pde) & PG_FRAME;
- tmppf = PT_GET(PMAP1) & PG_FRAME;
- if (tmppf != newpf) {
- PT_SET_VA(PMAP1, newpf | PG_V | PG_A, TRUE);
-#ifdef SMP
- PMAP1cpu = PCPU_GET(cpuid);
-#endif
- invlcaddr(PADDR1);
- PMAP1changed++;
- } else
-#ifdef SMP
- if (PMAP1cpu != PCPU_GET(cpuid)) {
- PMAP1cpu = PCPU_GET(cpuid);
- invlcaddr(PADDR1);
- PMAP1changedcpu++;
- } else
-#endif
- PMAP1unchanged++;
- return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
- }
- return (0);
-}
-
-/*
- * Routine: pmap_extract
- * Function:
- * Extract the physical page address associated
- * with the given map/virtual_address pair.
- */
-vm_paddr_t
-pmap_extract(pmap_t pmap, vm_offset_t va)
-{
- vm_paddr_t rtval;
- pt_entry_t *pte;
- pd_entry_t pde;
-
- rtval = 0;
- PMAP_LOCK(pmap);
- pde = PT_GET(&pmap->pm_pdir[va >> PDRSHIFT]);
- if (pde != 0) {
- if ((pde & PG_PS) != 0) {
- rtval = (pde & ~PDRMASK) | (va & PDRMASK);
- PMAP_UNLOCK(pmap);
- return rtval;
- }
- pte = pmap_pte(pmap, va);
- rtval = (PT_GET(pte) & PG_FRAME) | (va & PAGE_MASK);
- pmap_pte_release(pte);
- }
- PMAP_UNLOCK(pmap);
- return (rtval);
-}
-
-/*
- * Routine: pmap_extract_and_hold
- * Function:
- * Atomically extract and hold the physical page
- * with the given pmap and virtual address pair
- * if that mapping permits the given protection.
- */
-vm_page_t
-pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
-{
- pd_entry_t pde;
- pt_entry_t pte;
- vm_page_t m;
-
- m = NULL;
- vm_page_lock_queues();
- PMAP_LOCK(pmap);
- pde = PT_GET(pmap_pde(pmap, va));
- if (pde != 0) {
- if (pde & PG_PS) {
- panic("4MB pages not currently supported");
- if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) |
- (va & PDRMASK));
- vm_page_hold(m);
- }
- } else {
- sched_pin();
- pte = PT_GET(pmap_pte_quick(pmap, va));
- if (pte != 0 &&
- ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
- m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
- vm_page_hold(m);
- }
- sched_unpin();
- }
- }
- vm_page_unlock_queues();
- PMAP_UNLOCK(pmap);
- return (m);
-}
-
-/***************************************************
- * Low level mapping routines.....
- ***************************************************/
-
-/*
- * Add a wired page to the kva.
- * Note: not SMP coherent.
- */
-PMAP_INLINE void
-pmap_kenter(vm_offset_t va, vm_paddr_t pa)
-{
- pt_entry_t *pte;
-
- pte = vtopte(va);
- pte_store(pte, pa | PG_RW | PG_V | pgeflag);
-}
-
-/*
- * Add a wired page to the kva.
- * Note: not SMP coherent.
- */
-PMAP_INLINE void
-pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma)
-{
- pt_entry_t *pte;
-
- pte = vtopte(va);
- PT_SET_VA_MA(pte, ma | PG_RW | PG_V | pgeflag, TRUE);
-}
-
-/*
- * Remove a page from the kernel pagetables.
- * Note: not SMP coherent.
- */
-PMAP_INLINE void
-pmap_kremove(vm_offset_t va)
-{
- pt_entry_t *pte;
-
- pte = vtopte(va);
- pte_clear(pte);
-}
-
-/*
- * Used to map a range of physical addresses into kernel
- * virtual address space.
- *
- * The value passed in '*virt' is a suggested virtual address for
- * the mapping. Architectures which can support a direct-mapped
- * physical to virtual region can return the appropriate address
- * within that region, leaving '*virt' unchanged. Other
- * architectures should map the pages starting at '*virt' and
- * update '*virt' with the first usable address after the mapped
- * region.
- */
-vm_offset_t
-pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
-{
- vm_offset_t va, sva;
-
- va = sva = *virt;
- while (start < end) {
- pmap_kenter(va, start);
- va += PAGE_SIZE;
- start += PAGE_SIZE;
- }
- /* invalidate will flush the update queue */
- pmap_invalidate_range(kernel_pmap, sva, va);
- *virt = va;
- return (sva);
-}
-
-
-/*
- * Add a list of wired pages to the kva
- * this routine is only used for temporary
- * kernel mappings that do not need to have
- * page modification or references recorded.
- * Note that old mappings are simply written
- * over. The page *must* be wired.
- * Note: SMP coherent. Uses a ranged shootdown IPI.
- */
-void
-pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
-{
- vm_offset_t va;
-
- va = sva;
- while (count-- > 0) {
- pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
- va += PAGE_SIZE;
- m++;
- }
- /* invalidate will flush the update queue */
- pmap_invalidate_range(kernel_pmap, sva, va);
-}
-
-/*
- * This routine tears out page mappings from the
- * kernel -- it is meant only for temporary mappings.
- * Note: SMP coherent. Uses a ranged shootdown IPI.
- */
-void
-pmap_qremove(vm_offset_t sva, int count)
-{
- vm_offset_t va;
-
- va = sva;
- while (count-- > 0) {
- pmap_kremove(va);
- va += PAGE_SIZE;
- }
- /* invalidate will flush the update queue */
- pmap_invalidate_range(kernel_pmap, sva, va);
-}
-
-/***************************************************
- * Page table page management routines.....
- ***************************************************/
-
-/*
- * This routine unholds page table pages, and if the hold count
- * drops to zero, then it decrements the wire count.
- */
-static PMAP_INLINE int
-pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
-{
-
- --m->wire_count;
- if (m->wire_count == 0)
- return _pmap_unwire_pte_hold(pmap, m);
- else
- return 0;
-}
-
-static int
-_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
-{
- vm_offset_t pteva;
- /*
- * unmap the page table page
- */
- xen_pt_unpin(pmap->pm_pdir[m->pindex]);
- PD_CLEAR_VA(pmap, &pmap->pm_pdir[m->pindex], TRUE);
- --pmap->pm_stats.resident_count;
-
- /*
- * Do an invltlb to make the invalidated mapping
- * take effect immediately.
- */
- pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
- pmap_invalidate_page(pmap, pteva);
-
- vm_page_free_zero(m);
- atomic_subtract_int(&cnt.v_wire_count, 1);
- return 1;
-}
-
-/*
- * After removing a page table entry, this routine is used to
- * conditionally free the page, and manage the hold/wire counts.
- */
-static int
-pmap_unuse_pt(pmap_t pmap, vm_offset_t va)
-{
- pd_entry_t ptepde;
- vm_page_t mpte;
-
- if (va >= VM_MAXUSER_ADDRESS)
- return 0;
- ptepde = PT_GET(pmap_pde(pmap, va));
- mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
- return pmap_unwire_pte_hold(pmap, mpte);
-}
-
-void
-pmap_pinit0(pmap)
- struct pmap *pmap;
-{
-
- PMAP_LOCK_INIT(pmap);
- pmap->pm_pdir = (pd_entry_t *)(xen_start_info->pt_base);
-#ifdef PAE
- pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
-#endif
- pmap->pm_active = 0;
- PCPU_SET(curpmap, pmap);
- TAILQ_INIT(&pmap->pm_pvlist);
- bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
-}
-
-/*
- * Initialize a preallocated and zeroed pmap structure,
- * such as one in a vmspace structure.
- */
-void
-pmap_pinit(struct pmap *pmap)
-{
- vm_page_t m, ptdpg[NPGPTD*2];
- vm_paddr_t ma, ma_shadow;
- static int color;
- int i;
-
- PMAP_LOCK_INIT(pmap);
-
- /*
- * No need to allocate page table space yet but we do need a valid
- * page directory table.
- */
- if (pmap->pm_pdir == NULL) {
- pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
- NBPTD);
-#ifdef PAE
- pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
- KASSERT(((vm_offset_t)pmap->pm_pdpt &
- ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
- ("pmap_pinit: pdpt misaligned"));
- KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
- ("pmap_pinit: pdpt above 4g"));
-#endif
- }
-
- /*
- * allocate the page directory page(s)
- */
- for (i = 0; i < NPGPTD*2;) {
- m = vm_page_alloc(NULL, color++,
- VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
- if (m == NULL)
- VM_WAIT;
- else {
- pmap_zero_page(m);
- ptdpg[i++] = m;
- }
- }
-#ifdef PAE
- #error "missing shadow handling for PAE"
-#endif
-
- pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
-
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
- /* Wire in kernel global address entries. */
- /* XXX copies current process, does not fill in MPPTDI */
- bcopy(kernel_pmap->pm_pdir + KPTDI, pmap->pm_pdir + KPTDI,
- nkpt * sizeof(pd_entry_t));
- /* XXX need to copy global address entries to page directory's L1
shadow */
- ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD]));
- /* L1 pin shadow page director{y,ies} */
- for (i = 0; i < NPGPTD; i++) {
- ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD + i]));
- pmap_copy_ma(kernel_pmap->pm_pdir[PTDPTDI + i] & ~(PG_RW|PG_M),
ma);
- xen_pt_pin(ma);
- }
-
-#ifdef SMP
- pmap->pm_pdir[MPPTDI] = kernel_pmap->pm_pdir[MPPTDI];
-#endif
-
- /* pin and install L1 shadow */
- for (i = 0; i < NPGPTD; i++) {
- ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i]));
- ma_shadow = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD+i]));
- /* re-map page directory read-only and pin */
- PT_SET_MA(pmap->pm_pdir + i*PAGE_SIZE, ma | PG_V | PG_A);
- xen_pgd_pin(ma);
- /* add L1 shadow of L2 */
- xen_queue_pt_update(vtomach(&pmap->pm_pdir[PTDPTDI + i]),
- ma_shadow | PG_V | PG_A);
- xen_queue_pt_update(ma_shadow + PTDPTDI*sizeof(vm_paddr_t),
- vtomach(pmap->pm_pdir) | PG_V | PG_A);
-
-#ifdef PAE
- #error "unsupported currently"
- pmap->pm_pdpt[i] = ma | PG_V;
-#endif
- }
- xen_flush_queue();
-
- pmap->pm_active = 0;
- TAILQ_INIT(&pmap->pm_pvlist);
- bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-}
-
-/*
- * this routine is called if the page table page is not
- * mapped correctly.
- */
-static vm_page_t
-_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
-{
- vm_paddr_t ptepa;
- vm_page_t m;
-
- KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
- (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
- ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
-
- /*
- * Allocate a page table page.
- */
- if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
- VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
- if (flags & M_WAITOK) {
- PMAP_UNLOCK(pmap);
- vm_page_unlock_queues();
- VM_WAIT;
- vm_page_lock_queues();
- PMAP_LOCK(pmap);
- }
-
- /*
- * Indicate the need to retry. While waiting, the page table
- * page may have been allocated.
- */
- return (NULL);
- }
- if ((m->flags & PG_ZERO) == 0)
- pmap_zero_page(m);
-
- /*
- * Map the pagetable page into the process address space, if
- * it isn't already there.
- */
-
- pmap->pm_stats.resident_count++;
-
- ptepa = VM_PAGE_TO_PHYS(m);
- xen_pt_pin(xpmap_ptom(ptepa));
- PD_SET_VA(pmap, &pmap->pm_pdir[ptepindex],
- (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
-
- return m;
-}
-
-static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
-{
- unsigned ptepindex;
- pd_entry_t ptepa;
- vm_page_t m;
-
- KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
- (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
- ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
-
- /*
- * Calculate pagetable page index
- */
- ptepindex = va >> PDRSHIFT;
-retry:
- /*
- * Get the page directory entry
- */
- ptepa = PT_GET(&pmap->pm_pdir[ptepindex]);
-
- /*
- * This supports switching from a 4MB page to a
- * normal 4K page.
- */
- if (ptepa & PG_PS) {
- pmap->pm_pdir[ptepindex] = 0;
- ptepa = 0;
- pmap_invalidate_all(kernel_pmap);
- }
-
- /*
- * If the page table page is mapped, we just increment the
- * hold count, and activate it.
- */
- if (ptepa) {
- m = PHYS_TO_VM_PAGE(ptepa);
- m->wire_count++;
- } else {
- /*
- * Here if the pte page isn't mapped, or if it has
- * been deallocated.
- */
- m = _pmap_allocpte(pmap, ptepindex, flags);
- if (m == NULL && (flags & M_WAITOK))
- goto retry;
- }
- return (m);
-}
-
-
-/***************************************************
-* Pmap allocation/deallocation routines.
- ***************************************************/
-
-#ifdef SMP
-/*
- * Deal with a SMP shootdown of other users of the pmap that we are
- * trying to dispose of. This can be a bit hairy.
- */
-static u_int *lazymask;
-static u_int lazyptd;
-static volatile u_int lazywait;
-
-
-void
-pmap_lazyfix_action(void)
-{
- u_int mymask = PCPU_GET(cpumask);
-
- if (PCPU_GET(curpcb)->pcb_cr3 == lazyptd)
- load_cr3(PCPU_GET(curpcb)->pcb_cr3);
- atomic_clear_int(lazymask, mymask);
- atomic_store_rel_int(&lazywait, 1);
-}
-
-static void
-pmap_lazyfix_self(u_int mymask)
-{
-
- if (PCPU_GET(curpcb)->pcb_cr3 == lazyptd)
- load_cr3(PCPU_GET(curpcb)->pcb_cr3);
- atomic_clear_int(lazymask, mymask);
-}
-
-
-static void
-pmap_lazyfix(pmap_t pmap)
-{
- u_int mymask = PCPU_GET(cpumask);
- u_int mask;
- register u_int spins;
-
- while ((mask = pmap->pm_active) != 0) {
- spins = 50000000;
- mask = mask & -mask; /* Find least significant set bit */
- mtx_lock_spin(&smp_rv_mtx);
-#ifdef PAE
- lazyptd = vtophys(pmap->pm_pdpt);
-#else
- lazyptd = vtophys(pmap->pm_pdir);
-#endif
- if (mask == mymask) {
- lazymask = &pmap->pm_active;
- pmap_lazyfix_self(mymask);
- } else {
- atomic_store_rel_int((u_int *)&lazymask,
- (u_int)&pmap->pm_active);
- atomic_store_rel_int(&lazywait, 0);
- ipi_selected(mask, IPI_LAZYPMAP);
- while (lazywait == 0) {
- ia32_pause();
- if (--spins == 0)
- break;
- }
- }
- mtx_unlock_spin(&smp_rv_mtx);
- if (spins == 0)
- printf("pmap_lazyfix: spun for 50000000\n");
- }
-}
-
-#else /* SMP */
-
-/*
- * Cleaning up on uniprocessor is easy. For various reasons, we're
- * unlikely to have to even execute this code, including the fact
- * that the cleanup is deferred until the parent does a wait(2), which
- * means that another userland process has run.
- */
-static void
-pmap_lazyfix(pmap_t pmap)
-{
- u_int cr3;
-
- cr3 = vtophys(pmap->pm_pdir);
- if (cr3 == PCPU_GET(curpcb)->pcb_cr3) {
- load_cr3(PCPU_GET(curpcb)->pcb_cr3);
- pmap->pm_active &= ~(PCPU_GET(cpumask));
- }
-}
-#endif /* SMP */
-
-/*
- * Release any resources held by the given physical map.
- * Called when a pmap initialized by pmap_pinit is being released.
- * Should only be called if the map contains no valid mappings.
- */
-void
-pmap_release(pmap_t pmap)
-{
- vm_page_t m, ptdpg[NPGPTD + 1];
- vm_paddr_t ma;
- int i;
-
- KASSERT(pmap->pm_stats.resident_count == 0,
- ("pmap_release: pmap resident count %ld != 0",
- pmap->pm_stats.resident_count));
-
- pmap_lazyfix(pmap);
- mtx_lock_spin(&allpmaps_lock);
- LIST_REMOVE(pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
-
- for (i = 0; i < NPGPTD; i++) {
- ptdpg[i] = PHYS_TO_VM_PAGE(PT_GET(&pmap->pm_pdir[PTDPTDI + i]));
- }
- ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir));
- for (i = 0; i < nkpt + NPGPTD; i++)
- PD_CLEAR_VA(pmap, &pmap->pm_pdir[PTDPTDI + i], FALSE);
-
- bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
- sizeof(*pmap->pm_pdir));
-#ifdef SMP
- PD_CLEAR_VA(pmap, &pmap->pm_pdir[MPPTDI], FALSE);
-#endif
- PT_UPDATES_FLUSH();
- pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
-
- vm_page_lock_queues();
- for (i = 0; i < NPGPTD + 1; i++) {
- m = ptdpg[i];
-
- ma = xpmap_ptom(VM_PAGE_TO_PHYS(m));
- /* unpinning L1 and L2 treated the same */
- xen_pgd_unpin(ma);
-#ifdef PAE
- KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
- ("pmap_release: got wrong ptd page"));
-#endif
- m->wire_count--;
- atomic_subtract_int(&cnt.v_wire_count, 1);
-
- vm_page_free_zero(m);
- }
- vm_page_unlock_queues();
- PMAP_LOCK_DESTROY(pmap);
-}
-
-static int
-kvm_size(SYSCTL_HANDLER_ARGS)
-{
- unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
-
- return sysctl_handle_long(oidp, &ksize, 0, req);
-}
-SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
- 0, 0, kvm_size, "IU", "Size of KVM");
-
-static int
-kvm_free(SYSCTL_HANDLER_ARGS)
-{
- unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
-
- return sysctl_handle_long(oidp, &kfree, 0, req);
-}
-SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
- 0, 0, kvm_free, "IU", "Amount of KVM free");
-
-/*
- * grow the number of kernel page table entries, if needed
- */
-void
-pmap_growkernel(vm_offset_t addr)
-{
- struct pmap *pmap;
- vm_paddr_t ptppaddr;
- vm_page_t nkpg;
- pd_entry_t newpdir;
- pt_entry_t *pde;
-
- mtx_assert(&kernel_map->system_mtx, MA_OWNED);
- if (kernel_vm_end == 0) {
- kernel_vm_end = KERNBASE;
- nkpt = 0;
- while (pdir_pde(PTD, kernel_vm_end)) {
- kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
~(PAGE_SIZE * NPTEPG - 1);
- nkpt++;
- }
- }
- addr = roundup2(addr, PAGE_SIZE * NPTEPG);
- while (kernel_vm_end < addr) {
- if (pdir_pde(PTD, kernel_vm_end)) {
- kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
~(PAGE_SIZE * NPTEPG - 1);
- continue;
- }
-
- /*
- * This index is bogus, but out of the way
- */
- nkpg = vm_page_alloc(NULL, nkpt,
- VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
- if (!nkpg)
- panic("pmap_growkernel: no memory to grow kernel");
-
- nkpt++;
-
- pmap_zero_page(nkpg);
- ptppaddr = VM_PAGE_TO_PHYS(nkpg);
- newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
- PD_SET_VA(kernel_pmap, &pdir_pde(kernel_pmap->pm_pdir,
kernel_vm_end), newpdir, TRUE);
-
- mtx_lock_spin(&allpmaps_lock);
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- pde = pmap_pde(pmap, kernel_vm_end);
- PD_SET_VA(pmap, pde, newpdir, FALSE);
- }
- PT_UPDATES_FLUSH();
- mtx_unlock_spin(&allpmaps_lock);
- kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
~(PAGE_SIZE * NPTEPG - 1);
- }
-}
-
-
-/***************************************************
- * page management routines.
- ***************************************************/
-
-/*
- * free the pv_entry back to the free list
- */
-static PMAP_INLINE void
-free_pv_entry(pv_entry_t pv)
-{
- pv_entry_count--;
- uma_zfree(pvzone, pv);
-}
-
-/*
- * get a new pv_entry, allocating a block from the system
- * when needed.
- * the memory allocation is performed bypassing the malloc code
- * because of the possibility of allocations at interrupt time.
- */
-static pv_entry_t
-get_pv_entry(void)
-{
- pv_entry_count++;
- if (pv_entry_high_water &&
- (pv_entry_count > pv_entry_high_water) &&
- (pmap_pagedaemon_waken == 0)) {
- pmap_pagedaemon_waken = 1;
- wakeup (&vm_pages_needed);
- }
- return uma_zalloc(pvzone, M_NOWAIT);
-}
-
-
-static int
-pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
-{
- pv_entry_t pv;
- int rtval;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- if (pmap == pv->pv_pmap && va == pv->pv_va)
- break;
- }
- } else {
- TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
- if (va == pv->pv_va)
- break;
- }
- }
-
- rtval = 0;
- if (pv) {
- rtval = pmap_unuse_pt(pmap, va);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- m->md.pv_list_count--;
- if (TAILQ_FIRST(&m->md.pv_list) == NULL)
- vm_page_flag_clear(m, PG_WRITEABLE);
-
- TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
- free_pv_entry(pv);
- }
-
- return rtval;
-}
-
-/*
- * Create a pv entry for page at pa for
- * (pmap, va).
- */
-static void
-pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
-{
- pv_entry_t pv;
- pv = get_pv_entry();
- pv->pv_va = va;
- pv->pv_pmap = pmap;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
- m->md.pv_list_count++;
-}
-
-/*
- * pmap_remove_pte: do the things to unmap a page in a process
- */
-static int
-pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
-{
- pt_entry_t oldpte;
- vm_page_t m;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- oldpte = pte_load_clear(ptq);
- if (oldpte & PG_W)
- pmap->pm_stats.wired_count -= 1;
- /*
- * Machines that don't support invlpg, also don't support
- * PG_G.
- */
- if (oldpte & PG_G)
- pmap_invalidate_page(kernel_pmap, va);
- pmap->pm_stats.resident_count -= 1;
- if (oldpte & PG_MANAGED) {
- m = PHYS_TO_VM_PAGE(oldpte);
- if (oldpte & PG_M) {
-#if defined(PMAP_DIAGNOSTIC)
- if (pmap_nw_modified((pt_entry_t) oldpte)) {
- printf(
- "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
- va, oldpte);
- }
-#endif
- if (pmap_track_modified(va))
- vm_page_dirty(m);
- }
- if (oldpte & PG_A)
- vm_page_flag_set(m, PG_REFERENCED);
- return pmap_remove_entry(pmap, m, va);
- } else {
- return pmap_unuse_pt(pmap, va);
- }
-}
-
-/*
- * Remove a single page from a process address space
- */
-static void
-pmap_remove_page(pmap_t pmap, vm_offset_t va)
-{
- pt_entry_t *pte;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
- return;
- pmap_remove_pte(pmap, pte, va);
- pmap_invalidate_page(pmap, va);
-}
-
-/*
- * Remove the given range of addresses from the specified map.
- *
- * It is assumed that the start and end are properly
- * rounded to the page size.
- */
-void
-pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
- pt_entry_t *pte;
- int anyvalid;
-
- /*
- * Perform an unsynchronized read. This is, however, safe.
- */
- if (pmap->pm_stats.resident_count == 0)
- return;
-
- anyvalid = 0;
-
- vm_page_lock_queues();
- sched_pin();
- PMAP_LOCK(pmap);
-
- /*
- * special handling of removing one page. a very
- * common operation and easy to short circuit some
- * code.
- */
- if ((sva + PAGE_SIZE == eva) &&
- ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
- pmap_remove_page(pmap, sva);
- goto out;
- }
-
- for (; sva < eva; sva = pdnxt) {
- unsigned pdirindex;
-
- /*
- * Calculate index for next page table.
- */
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pmap->pm_stats.resident_count == 0)
- break;
-
- pdirindex = sva >> PDRSHIFT;
- ptpaddr = PT_GET(&pmap->pm_pdir[pdirindex]);
-
- /*
- * Weed out invalid mappings. Note: we assume that the page
- * directory table is always allocated, and in kernel virtual.
- */
- if (ptpaddr == 0)
- continue;
-
- /*
- * Check for large page.
- */
- if ((ptpaddr & PG_PS) != 0) {
- PD_CLEAR_VA(pmap, &pmap->pm_pdir[pdirindex], TRUE);
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- anyvalid = 1;
- continue;
- }
-
- /*
- * Limit our scan to either the end of the va represented
- * by the current page table page, or to the end of the
- * range being removed.
- */
- if (pdnxt > eva)
- pdnxt = eva;
-
- for (; sva != pdnxt; sva += PAGE_SIZE) {
- if ((pte = pmap_pte_quick(pmap, sva)) == NULL ||
- *pte == 0)
- continue;
- anyvalid = 1;
- if (pmap_remove_pte(pmap, pte, sva))
- break;
- }
- }
-out:
- sched_unpin();
- vm_page_unlock_queues();
- if (anyvalid)
- pmap_invalidate_all(pmap);
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * Routine: pmap_remove_all
- * Function:
- * Removes this physical page from
- * all physical maps in which it resides.
- * Reflects back modify bits to the pager.
- *
- * Notes:
- * Original versions of this routine were very
- * inefficient because they iteratively called
- * pmap_remove (slow...)
- */
-
-void
-pmap_remove_all(vm_page_t m)
-{
- pv_entry_t pv;
- pt_entry_t *pte, tpte;
-
-#if defined(PMAP_DIAGNOSTIC)
- /*
- * XXX This makes pmap_remove_all() illegal for non-managed pages!
- */
- if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
- panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x",
- VM_PAGE_TO_PHYS(m));
- }
-#endif
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- sched_pin();
- while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
- PMAP_LOCK(pv->pv_pmap);
- pv->pv_pmap->pm_stats.resident_count--;
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
- tpte = pte_load_clear(pte);
- if (tpte & PG_W)
- pv->pv_pmap->pm_stats.wired_count--;
- if (tpte & PG_A)
- vm_page_flag_set(m, PG_REFERENCED);
-
- /*
- * Update the vm_page_t clean and reference bits.
- */
- if (tpte & PG_M) {
-#if defined(PMAP_DIAGNOSTIC)
- if (pmap_nw_modified((pt_entry_t) tpte)) {
- printf(
- "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
- pv->pv_va, tpte);
- }
-#endif
- if (pmap_track_modified(pv->pv_va))
- vm_page_dirty(m);
- }
- pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
- TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- m->md.pv_list_count--;
- pmap_unuse_pt(pv->pv_pmap, pv->pv_va);
- PMAP_UNLOCK(pv->pv_pmap);
- free_pv_entry(pv);
- }
- vm_page_flag_clear(m, PG_WRITEABLE);
- sched_unpin();
-}
-
-/*
- * Set the physical protection on the
- * specified range of this map as requested.
- */
-void
-pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
-{
- vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
- int anychanged;
-
- if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
- pmap_remove(pmap, sva, eva);
- return;
- }
-
- if (prot & VM_PROT_WRITE)
- return;
-
- anychanged = 0;
-
- vm_page_lock_queues();
- sched_pin();
- PMAP_LOCK(pmap);
- for (; sva < eva; sva = pdnxt) {
- unsigned obits, pbits, pdirindex;
-
- pdnxt = (sva + NBPDR) & ~PDRMASK;
-
- pdirindex = sva >> PDRSHIFT;
- ptpaddr = PT_GET(&pmap->pm_pdir[pdirindex]);
-
- /*
- * Weed out invalid mappings. Note: we assume that the page
- * directory table is always allocated, and in kernel virtual.
- */
- if (ptpaddr == 0)
- continue;
-
- /*
- * Check for large page.
- */
- if ((ptpaddr & PG_PS) != 0) {
- pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- anychanged = 1;
- continue;
- }
-
- if (pdnxt > eva)
- pdnxt = eva;
-
- for (; sva != pdnxt; sva += PAGE_SIZE) {
- pt_entry_t *pte;
- vm_page_t m;
-
- if ((pte = pmap_pte_quick(pmap, sva)) == NULL)
- continue;
-#ifdef notyet
-retry:
-#endif
- /*
- * Regardless of whether a pte is 32 or 64 bits in
- * size, PG_RW, PG_A, and PG_M are among the least
- * significant 32 bits.
- */
- obits = pbits = PT_GET(pte);
- if (pbits & PG_MANAGED) {
- m = NULL;
- if (pbits & PG_A) {
- m = PHYS_TO_VM_PAGE(pbits);
- vm_page_flag_set(m, PG_REFERENCED);
- pbits &= ~PG_A;
- }
- if ((pbits & PG_M) != 0 &&
- pmap_track_modified(sva)) {
- if (m == NULL)
- m = PHYS_TO_VM_PAGE(pbits);
- vm_page_dirty(m);
- }
- }
-
- pbits &= ~(PG_RW | PG_M);
-
- if (pbits != obits) {
-#ifdef notyet
- if (!atomic_cmpset_int((u_int *)pte, obits,
- pbits))
- goto retry;
-#endif
- PT_SET_VA(pte, pbits, FALSE);
- anychanged = 1;
- }
- }
- }
- sched_unpin();
- vm_page_unlock_queues();
- if (anychanged)
- pmap_invalidate_all(pmap);
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * Insert the given physical page (p) at
- * the specified virtual address (v) in the
- * target physical map with the protection requested.
- *
- * If specified, the page will be wired down, meaning
- * that the related pte can not be reclaimed.
- *
- * NB: This is the only routine which MAY NOT lazy-evaluate
- * or lose information. That is, this routine must actually
- * insert this page into the given map NOW.
- */
-void
-pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
- boolean_t wired)
-{
- vm_paddr_t pa;
- register pt_entry_t *pte;
- vm_paddr_t opa;
- pt_entry_t origpte, newpte;
- vm_page_t mpte, om;
-
- va &= PG_FRAME;
-#ifdef PMAP_DIAGNOSTIC
- if (va > VM_MAX_KERNEL_ADDRESS)
- panic("pmap_enter: toobig");
- if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
- panic("pmap_enter: invalid to pmap_enter page table pages (va:
0x%x)", va);
-#endif
-
- mpte = NULL;
-
- vm_page_lock_queues();
- PMAP_LOCK(pmap);
- sched_pin();
-
- /*
- * In the case that a page table page is not
- * resident, we are creating it here.
- */
- if (va < VM_MAXUSER_ADDRESS) {
- mpte = pmap_allocpte(pmap, va, M_WAITOK);
- }
-#if 0 && defined(PMAP_DIAGNOSTIC)
- else {
- pd_entry_t *pdeaddr = pmap_pde(pmap, va);
- origpte = PT_GET(pdeaddr);
- if ((origpte & PG_V) == 0) {
- panic("pmap_enter: invalid kernel page table page,
pdir=%p, pde=%p, va=%p\n",
- pmap->pm_pdir[PTDPTDI], origpte, va);
- }
- }
-#endif
-
- pte = pmap_pte_quick(pmap, va);
-
- /*
- * Page Directory table entry not valid, we need a new PT page
- */
- if (pte == NULL) {
- panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n",
- (uintmax_t)pmap->pm_pdir[PTDPTDI], va);
- }
-
- pa = VM_PAGE_TO_PHYS(m);
- om = NULL;
- origpte = PT_GET(pte);
- opa = origpte & PG_FRAME;
-
- if (origpte & PG_PS) {
- /*
- * Yes, I know this will truncate upper address bits for PAE,
- * but I'm actually more interested in the lower bits
- */
- printf("pmap_enter: va %p, pte %p, origpte %p\n",
- (void *)va, (void *)pte, (void *)(uintptr_t)origpte);
- panic("pmap_enter: attempted pmap_enter on 4MB page");
- }
-
- /*
- * Mapping has not changed, must be protection or wiring change.
- */
- if (origpte && (opa == pa)) {
- /*
- * Wiring change, just update stats. We don't worry about
- * wiring PT pages as they remain resident as long as there
- * are valid mappings in them. Hence, if a user page is wired,
- * the PT page will be also.
- */
- if (wired && ((origpte & PG_W) == 0))
- pmap->pm_stats.wired_count++;
- else if (!wired && (origpte & PG_W))
- pmap->pm_stats.wired_count--;
-
-#if defined(PMAP_DIAGNOSTIC)
- if (pmap_nw_modified((pt_entry_t) origpte)) {
- printf(
- "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
- va, origpte);
- }
-#endif
-
- /*
- * Remove extra pte reference
- */
- if (mpte)
- mpte->wire_count--;
-
- /*
- * We might be turning off write access to the page,
- * so we go ahead and sense modify status.
- */
- if (origpte & PG_MANAGED) {
- om = m;
- pa |= PG_MANAGED;
- }
- goto validate;
- }
- /*
- * Mapping has changed, invalidate old range and fall through to
- * handle validating new mapping.
- */
- if (opa) {
- int err;
- if (origpte & PG_W)
- pmap->pm_stats.wired_count--;
- if (origpte & PG_MANAGED) {
- om = PHYS_TO_VM_PAGE(opa);
- err = pmap_remove_entry(pmap, om, va);
- } else
- err = pmap_unuse_pt(pmap, va);
- if (err)
- panic("pmap_enter: pte vanished, va: 0x%x", va);
- } else
- pmap->pm_stats.resident_count++;
-
- /*
- * Enter on the PV list if part of our managed memory. Note that we
- * raise IPL while manipulating pv_table since pmap_enter can be
- * called at interrupt time.
- */
- if (pmap_initialized &&
- (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
- pmap_insert_entry(pmap, va, m);
- pa |= PG_MANAGED;
- }
-
- /*
- * Increment counters
- */
- if (wired)
- pmap->pm_stats.wired_count++;
-
-validate:
- /*
- * Now validate mapping with desired protection/wiring.
- */
- newpte = (pt_entry_t)(pa | PG_V);
- if ((prot & VM_PROT_WRITE) != 0)
- newpte |= PG_RW;
- if (wired)
- newpte |= PG_W;
- if (va < VM_MAXUSER_ADDRESS)
- newpte |= PG_U;
- if (pmap == kernel_pmap)
- newpte |= pgeflag;
-
- /*
- * if the mapping or permission bits are different, we need
- * to update the pte.
- */
- if ((origpte & ~(PG_M|PG_A)) != newpte) {
- if (origpte & PG_MANAGED) {
- origpte = PT_GET(pte);
- PT_SET_VA(pte, newpte | PG_A, TRUE);
- if ((origpte & PG_M) && pmap_track_modified(va))
- vm_page_dirty(om);
- if (origpte & PG_A)
- vm_page_flag_set(om, PG_REFERENCED);
- } else
- PT_SET_VA(pte, newpte | PG_A, TRUE);
- if (origpte) {
- pmap_invalidate_page(pmap, va);
- }
- }
- sched_unpin();
- vm_page_unlock_queues();
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * this code makes some *MAJOR* assumptions:
- * 1. Current pmap & pmap exists.
- * 2. Not wired.
- * 3. Read access.
- * 4. No page table pages.
- * 5. Tlbflush is deferred to calling procedure.
- * 6. Page IS managed.
- * but is *MUCH* faster than pmap_enter...
- */
-
-vm_page_t
-pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
-{
- pt_entry_t *pte;
- vm_paddr_t pa;
-
- vm_page_lock_queues();
- PMAP_LOCK(pmap);
-
- /*
- * In the case that a page table page is not
- * resident, we are creating it here.
- */
- if (va < VM_MAXUSER_ADDRESS) {
- unsigned ptepindex;
- pd_entry_t ptepa;
-
- /*
- * Calculate pagetable page index
- */
- ptepindex = va >> PDRSHIFT;
- if (mpte && (mpte->pindex == ptepindex)) {
- mpte->wire_count++;
- } else {
-retry:
- /*
- * Get the page directory entry
- */
- ptepa = PT_GET(&pmap->pm_pdir[ptepindex]);
-
- /*
- * If the page table page is mapped, we just increment
- * the hold count, and activate it.
- */
- if (ptepa) {
- if (ptepa & PG_PS)
- panic("pmap_enter_quick: unexpected
mapping into 4MB page");
- mpte = PHYS_TO_VM_PAGE(ptepa);
- mpte->wire_count++;
- } else {
- mpte = _pmap_allocpte(pmap, ptepindex,
- M_WAITOK);
- if (mpte == NULL)
- goto retry;
- }
- }
- } else {
- mpte = NULL;
- }
-
- /*
- * This call to vtopte makes the assumption that we are
- * entering the page into the current pmap. In order to support
- * quick entry into any pmap, one would likely use pmap_pte_quick.
- * But that isn't as quick as vtopte.
- */
- pte = vtopte(va);
- if (PT_GET(pte)) {
- if (mpte != NULL) {
- pmap_unwire_pte_hold(pmap, mpte);
- mpte = NULL;
- }
- goto out;
- }
-
- /*
- * Enter on the PV list if part of our managed memory. Note that we
- * raise IPL while manipulating pv_table since pmap_enter can be
- * called at interrupt time.
- */
- if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
- pmap_insert_entry(pmap, va, m);
-
- /*
- * Increment counters
- */
- pmap->pm_stats.resident_count++;
-
- pa = VM_PAGE_TO_PHYS(m);
-
- /*
- * Now validate mapping with RO protection
- */
- if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
- pte_store(pte, pa | PG_V | PG_U);
- else
- pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
-out:
- vm_page_unlock_queues();
- PMAP_UNLOCK(pmap);
- return mpte;
-}
-
-/*
- * Make a temporary mapping for a physical address. This is only intended
- * to be used for panic dumps.
- */
-void *
-pmap_kenter_temporary(vm_paddr_t pa, int i)
-{
- vm_offset_t va;
-
- va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
- pmap_kenter(va, pa);
-#ifndef I386_CPU
- invlpg(va);
-#else
- invltlb();
-#endif
- return ((void *)crashdumpmap);
-}
-
-/*
- * This code maps large physical mmap regions into the
- * processor address space. Note that some shortcuts
- * are taken, but the code works.
- */
-void
-pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
- vm_object_t object, vm_pindex_t pindex,
- vm_size_t size)
-{
- vm_page_t p;
-
- VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
- KASSERT(object->type == OBJT_DEVICE,
- ("pmap_object_init_pt: non-device object"));
- if (pseflag &&
- ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
- int i;
- vm_page_t m[1];
- unsigned int ptepindex;
- int npdes;
- pd_entry_t ptepa;
-
- PMAP_LOCK(pmap);
- if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
- goto out;
- PMAP_UNLOCK(pmap);
-retry:
- p = vm_page_lookup(object, pindex);
- if (p != NULL) {
- vm_page_lock_queues();
- if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
- goto retry;
- } else {
- p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
- if (p == NULL)
- return;
- m[0] = p;
-
- if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK)
{
- vm_page_lock_queues();
- vm_page_free(p);
- vm_page_unlock_queues();
- return;
- }
-
- p = vm_page_lookup(object, pindex);
- vm_page_lock_queues();
- vm_page_wakeup(p);
- }
- vm_page_unlock_queues();
-
- ptepa = VM_PAGE_TO_PHYS(p);
- if (ptepa & (NBPDR - 1))
- return;
-
- p->valid = VM_PAGE_BITS_ALL;
-
- PMAP_LOCK(pmap);
- pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
- npdes = size >> PDRSHIFT;
- for(i = 0; i < npdes; i++) {
- PD_SET_VA(pmap, &pmap->pm_pdir[ptepindex],
- ptepa | PG_U | PG_RW | PG_V | PG_PS, FALSE);
- ptepa += NBPDR;
- ptepindex += 1;
- }
- pmap_invalidate_all(pmap);
-out:
- PMAP_UNLOCK(pmap);
- }
-}
-
-void
-pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
-{
- int i, npages = round_page(len) >> PAGE_SHIFT;
- for (i = 0; i < npages; i++) {
- pt_entry_t *pte;
- pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
- pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M)));
- PMAP_MARK_PRIV(xpmap_mtop(*pte));
- pmap_pte_release(pte);
- }
- PT_UPDATES_FLUSH();
-}
-
-void
-pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
-{
- int i, npages = round_page(len) >> PAGE_SHIFT;
- for (i = 0; i < npages; i++) {
- pt_entry_t *pte;
- pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
- PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
- pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M));
- pmap_pte_release(pte);
- }
- PT_UPDATES_FLUSH();
-}
-
-/*
- * Routine: pmap_change_wiring
- * Function: Change the wiring attribute for a map/virtual-address
- * pair.
- * In/out conditions:
- * The mapping must already exist in the pmap.
- */
-void
-pmap_change_wiring(pmap, va, wired)
- register pmap_t pmap;
- vm_offset_t va;
- boolean_t wired;
-{
- register pt_entry_t *pte;
-
- PMAP_LOCK(pmap);
- pte = pmap_pte(pmap, va);
-
- if (wired && !pmap_pte_w(pte))
- pmap->pm_stats.wired_count++;
- else if (!wired && pmap_pte_w(pte))
- pmap->pm_stats.wired_count--;
-
- /*
- * Wiring is not a hardware characteristic so there is no need to
- * invalidate TLB.
- */
- pmap_pte_set_w(pte, wired);
- pmap_pte_release(pte);
- PMAP_UNLOCK(pmap);
-}
-
-
-
-/*
- * Copy the range specified by src_addr/len
- * from the source map to the range dst_addr/len
- * in the destination map.
- *
- * This routine is only advisory and need not do anything.
- */
-
-void
-pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t
len,
- vm_offset_t src_addr)
-{
- vm_offset_t addr;
- vm_offset_t end_addr = src_addr + len;
- vm_offset_t pdnxt;
- vm_page_t m;
-
- if (dst_addr != src_addr)
- return;
-
- if (!pmap_is_current(src_pmap))
- return;
-
- vm_page_lock_queues();
- if (dst_pmap < src_pmap) {
- PMAP_LOCK(dst_pmap);
- PMAP_LOCK(src_pmap);
- } else {
- PMAP_LOCK(src_pmap);
- PMAP_LOCK(dst_pmap);
- }
- sched_pin();
- for (addr = src_addr; addr < end_addr; addr = pdnxt) {
- pt_entry_t *src_pte, *dst_pte;
- vm_page_t dstmpte, srcmpte;
- pd_entry_t srcptepaddr;
- unsigned ptepindex;
-
- if (addr >= UPT_MIN_ADDRESS)
- panic("pmap_copy: invalid to pmap_copy page tables");
-
- /*
- * Don't let optional prefaulting of pages make us go
- * way below the low water mark of free pages or way
- * above high water mark of used pv entries.
- */
- if (cnt.v_free_count < cnt.v_free_reserved ||
- pv_entry_count > pv_entry_high_water)
- break;
-
- pdnxt = (addr + NBPDR) & ~PDRMASK;
- ptepindex = addr >> PDRSHIFT;
-
- srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
- if (srcptepaddr == 0)
- continue;
-
- if (srcptepaddr & PG_PS) {
- if (dst_pmap->pm_pdir[ptepindex] == 0) {
- PD_SET_VA(dst_pmap,
&dst_pmap->pm_pdir[ptepindex], srcptepaddr, TRUE);
- dst_pmap->pm_stats.resident_count +=
- NBPDR / PAGE_SIZE;
- }
- continue;
- }
-
- srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
- if (srcmpte->wire_count == 0)
- panic("pmap_copy: source page table page is unused");
-
- if (pdnxt > end_addr)
- pdnxt = end_addr;
-
- src_pte = vtopte(addr);
- while (addr < pdnxt) {
- pt_entry_t ptetemp;
- ptetemp = PT_GET(src_pte);
- /*
- * we only virtual copy managed pages
- */
- if ((ptetemp & PG_MANAGED) != 0) {
- /*
- * We have to check after allocpte for the
- * pte still being around... allocpte can
- * block.
- */
- dstmpte = pmap_allocpte(dst_pmap, addr,
- M_NOWAIT);
- if (dstmpte == NULL)
- break;
- dst_pte = pmap_pte_quick(dst_pmap, addr);
- if (*dst_pte == 0) {
- /*
- * Clear the modified and
- * accessed (referenced) bits
- * during the copy.
- */
- m = PHYS_TO_VM_PAGE(ptetemp);
- PT_SET_VA(dst_pte, ptetemp & ~(PG_M |
PG_A), FALSE);
- dst_pmap->pm_stats.resident_count++;
- pmap_insert_entry(dst_pmap, addr, m);
- } else
- pmap_unwire_pte_hold(dst_pmap, dstmpte);
- if (dstmpte->wire_count >= srcmpte->wire_count)
- break;
- }
- addr += PAGE_SIZE;
- src_pte++;
- }
- }
- PT_UPDATES_FLUSH();
- sched_unpin();
- vm_page_unlock_queues();
- PMAP_UNLOCK(src_pmap);
- PMAP_UNLOCK(dst_pmap);
-}
-
-static __inline void
-pagezero(void *page)
-{
-#if defined(I686_CPU)
- if (cpu_class == CPUCLASS_686) {
-#if defined(CPU_ENABLE_SSE)
- if (cpu_feature & CPUID_SSE2)
- sse2_pagezero(page);
- else
-#endif
- i686_pagezero(page);
- } else
-#endif
- bzero(page, PAGE_SIZE);
-}
-
-/*
- * pmap_zero_page zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents.
- */
-void
-pmap_zero_page(vm_page_t m)
-{
-
- mtx_lock(&CMAPCADDR12_lock);
- if (*CMAP2)
- panic("pmap_zero_page: CMAP2 busy");
- sched_pin();
- PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M,
FALSE);
- invlcaddr(CADDR2);
- pagezero(CADDR2);
- PT_CLEAR_VA(CMAP2, TRUE);
- sched_unpin();
- mtx_unlock(&CMAPCADDR12_lock);
-}
-
-/*
- * pmap_zero_page_area zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents.
- *
- * off and size may not cover an area beyond a single hardware page.
- */
-void
-pmap_zero_page_area(vm_page_t m, int off, int size)
-{
-
- mtx_lock(&CMAPCADDR12_lock);
- if (*CMAP2)
- panic("pmap_zero_page: CMAP2 busy");
- sched_pin();
- PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M,
FALSE);
- invlcaddr(CADDR2);
- if (off == 0 && size == PAGE_SIZE)
- pagezero(CADDR2);
- else
- bzero((char *)CADDR2 + off, size);
- PT_CLEAR_VA(CMAP2, TRUE);
- sched_unpin();
- mtx_unlock(&CMAPCADDR12_lock);
-}
-
-/*
- * pmap_zero_page_idle zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents. This
- * is intended to be called from the vm_pagezero process only and
- * outside of Giant.
- */
-void
-pmap_zero_page_idle(vm_page_t m)
-{
-
- if (*CMAP3)
- panic("pmap_zero_page: CMAP3 busy");
- sched_pin();
- PT_SET_VA(CMAP3, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, TRUE);
- invlcaddr(CADDR3);
- pagezero(CADDR3);
- PT_CLEAR_VA(CMAP3, TRUE);
- sched_unpin();
-}
-
-/*
- * pmap_copy_page copies the specified (machine independent)
- * page by mapping the page into virtual memory and using
- * bcopy to copy the page, one machine dependent page at a
- * time.
- */
-void
-pmap_copy_page(vm_page_t src, vm_page_t dst)
-{
-
- mtx_lock(&CMAPCADDR12_lock);
- if (*CMAP1)
- panic("pmap_copy_page: CMAP1 busy");
- if (*CMAP2)
- panic("pmap_copy_page: CMAP2 busy");
- sched_pin();
-#ifdef I386_CPU
- invltlb();
-#else
- invlpg((u_int)CADDR1);
- invlpg((u_int)CADDR2);
-#endif
- PT_SET_VA(CMAP1, PG_V | VM_PAGE_TO_PHYS(src) | PG_A, FALSE);
- PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M,
TRUE);
-
- bcopy(CADDR1, CADDR2, PAGE_SIZE);
- PT_CLEAR_VA(CMAP1, FALSE);
- PT_CLEAR_VA(CMAP2, TRUE);
- sched_unpin();
- mtx_unlock(&CMAPCADDR12_lock);
-}
-
-void
-pmap_copy_ma(vm_paddr_t src, vm_paddr_t dst)
-{
-
- mtx_lock(&CMAPCADDR12_lock);
- if (*CMAP1)
- panic("pmap_copy_ma: CMAP1 busy");
- if (*CMAP2)
- panic("pmap_copy_ma: CMAP2 busy");
- sched_pin();
-#ifdef I386_CPU
- invltlb();
-#else
- invlpg((u_int)CADDR1);
- invlpg((u_int)CADDR2);
-#endif
- PT_SET_VA_MA(CMAP1, PG_V | src | PG_A, FALSE);
- PT_SET_VA_MA(CMAP2, PG_V | PG_RW | dst | PG_A | PG_M, TRUE);
-
- bcopy(CADDR1, CADDR2, PAGE_SIZE);
- PT_CLEAR_VA(CMAP1, FALSE);
- PT_CLEAR_VA(CMAP2, TRUE);
- sched_unpin();
- mtx_unlock(&CMAPCADDR12_lock);
-}
-
-/*
- * Returns true if the pmap's pv is one of the first
- * 16 pvs linked to from this page. This count may
- * be changed upwards or downwards in the future; it
- * is only necessary that true be returned for a small
- * subset of pmaps for proper page aging.
- */
-boolean_t
-pmap_page_exists_quick(pmap, m)
- pmap_t pmap;
- vm_page_t m;
-{
- pv_entry_t pv;
- int loops = 0;
-
- if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
- return FALSE;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- if (pv->pv_pmap == pmap) {
- return TRUE;
- }
- loops++;
- if (loops >= 16)
- break;
- }
- return (FALSE);
-}
-
-#define PMAP_REMOVE_PAGES_CURPROC_ONLY
-/*
- * Remove all pages from specified address space
- * this aids process exit speeds. Also, this code
- * is special cased for current process only, but
- * can have the more generic (and slightly slower)
- * mode enabled. This is much faster than pmap_remove
- * in the case of running down an entire address space.
- */
-void
-pmap_remove_pages(pmap, sva, eva)
- pmap_t pmap;
- vm_offset_t sva, eva;
-{
- pt_entry_t *pte, tpte;
- vm_page_t m;
- pv_entry_t pv, npv;
-
-#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
- if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
- printf("warning: pmap_remove_pages called with non-current
pmap\n");
- return;
- }
-#endif
- vm_page_lock_queues();
- PMAP_LOCK(pmap);
- sched_pin();
-
- for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
- if (pv->pv_va >= eva || pv->pv_va < sva) {
- npv = TAILQ_NEXT(pv, pv_plist);
- continue;
- }
-
-#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
- pte = vtopte(pv->pv_va);
-#else
- pte = pmap_pte_quick(pmap, pv->pv_va);
-#endif
- tpte = PT_GET(pte);
-
- if (tpte == 0) {
- printf("TPTE at %p IS ZERO @ VA %08x\n",
- pte, pv->pv_va);
- panic("bad pte");
- }
-
-/*
- * We cannot remove wired pages from a process' mapping at this time
- */
- if (tpte & PG_W) {
- npv = TAILQ_NEXT(pv, pv_plist);
- continue;
- }
-
- m = PHYS_TO_VM_PAGE(tpte);
- KASSERT(m->phys_addr == (tpte & PG_FRAME),
- ("vm_page_t %p phys_addr mismatch %016jx %016jx",
- m, (uintmax_t)m->phys_addr, (uintmax_t)tpte));
-
- KASSERT(m < &vm_page_array[vm_page_array_size],
- ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte));
-
- pmap->pm_stats.resident_count--;
-
- pte_clear(pte);
-
- /*
- * Update the vm_page_t clean and reference bits.
- */
- if (tpte & PG_M) {
- vm_page_dirty(m);
- }
-
- npv = TAILQ_NEXT(pv, pv_plist);
- TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
-
- m->md.pv_list_count--;
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_flag_clear(m, PG_WRITEABLE);
-
- pmap_unuse_pt(pmap, pv->pv_va);
- free_pv_entry(pv);
- }
- sched_unpin();
- pmap_invalidate_all(pmap);
- PMAP_UNLOCK(pmap);
- vm_page_unlock_queues();
-}
-
-/*
- * pmap_is_modified:
- *
- * Return whether or not the specified physical page was modified
- * in any physical maps.
- */
-boolean_t
-pmap_is_modified(vm_page_t m)
-{
- pv_entry_t pv;
- pt_entry_t *pte;
- boolean_t rv;
-
- rv = FALSE;
- if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
- return (rv);
-
- sched_pin();
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- /*
- * if the bit being tested is the modified bit, then
- * mark clean_map and ptes as never
- * modified.
- */
- if (!pmap_track_modified(pv->pv_va))
- continue;
-#if defined(PMAP_DIAGNOSTIC)
- if (!pv->pv_pmap) {
- printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
- continue;
- }
-#endif
- PMAP_LOCK(pv->pv_pmap);
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
- rv = (*pte & PG_M) != 0;
- PMAP_UNLOCK(pv->pv_pmap);
- if (rv)
- break;
- }
- sched_unpin();
- return (rv);
-}
-
-/*
- * pmap_is_prefaultable:
- *
- * Return whether or not the specified virtual address is elgible
- * for prefault.
- */
-boolean_t
-pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
-{
- pt_entry_t *pte;
- boolean_t rv;
-
- rv = FALSE;
- /* XXX
- * in order for writable pagetables to help,
- * this has to work - check if we aren't doing
- * an invlpg on the page tables linear mappings
- */
- return (rv);
- PMAP_LOCK(pmap);
- if (pmap_pde(pmap, addr)) {
- pte = vtopte(addr);
- rv = *pte == 0;
- }
- PMAP_UNLOCK(pmap);
- return (rv);
-}
-
-/*
- * Clear the given bit in each of the given page's ptes. The bit is
- * expressed as a 32-bit mask. Consequently, if the pte is 64 bits in
- * size, only a bit within the least significant 32 can be cleared.
- */
-static __inline void
-pmap_clear_ptes(vm_page_t m, int bit)
-{
- register pv_entry_t pv;
- pt_entry_t pbits, *pte;
-
- if (!pmap_initialized || (m->flags & PG_FICTITIOUS) ||
- (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
- return;
-
- sched_pin();
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- /*
- * Loop over all current mappings setting/clearing as appropos If
- * setting RO do we need to clear the VAC?
- */
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- /*
- * don't write protect pager mappings
- */
- if (bit == PG_RW) {
- if (!pmap_track_modified(pv->pv_va))
- continue;
- }
-
-#if defined(PMAP_DIAGNOSTIC)
- if (!pv->pv_pmap) {
- printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
- continue;
- }
-#endif
-
- PMAP_LOCK(pv->pv_pmap);
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
-#ifdef notyet
-retry:
-#endif
- pbits = PT_GET(pte);
- if (pbits & bit) {
- if (bit == PG_RW) {
- /*
- * Regardless of whether a pte is 32 or 64 bits
- * in size, PG_RW and PG_M are among the least
- * significant 32 bits.
- */
-#ifdef notyet
- if (!atomic_cmpset_int((u_int *)pte, pbits,
- pbits & ~(PG_RW | PG_M)))
- goto retry;
-#endif
- PT_SET_VA(pte, pbits & ~(PG_M|PG_RW), TRUE);
-
-
- if (pbits & PG_M) {
- vm_page_dirty(m);
- }
- } else {
-#ifdef notyet
- atomic_clear_int((u_int *)pte, bit);
-#endif
- /* XXX */
- PT_SET_VA(pte, pbits & ~bit, TRUE);
- }
- pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
- }
- PMAP_UNLOCK(pv->pv_pmap);
- }
- if (bit == PG_RW)
- vm_page_flag_clear(m, PG_WRITEABLE);
- sched_unpin();
-}
-
-/*
- * pmap_page_protect:
- *
- * Lower the permission for all mappings to a given page.
- */
-void
-pmap_page_protect(vm_page_t m, vm_prot_t prot)
-{
- if ((prot & VM_PROT_WRITE) == 0) {
- if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
- pmap_clear_ptes(m, PG_RW);
- } else {
- pmap_remove_all(m);
- }
- }
-}
-
-/*
- * pmap_ts_referenced:
- *
- * Return a count of reference bits for a page, clearing those bits.
- * It is not necessary for every reference bit to be cleared, but it
- * is necessary that 0 only be returned when there are truly no
- * reference bits set.
- *
- * XXX: The exact number of bits to check and clear is a matter that
- * should be tested and standardized at some point in the future for
- * optimal aging of shared pages.
- */
-int
-pmap_ts_referenced(vm_page_t m)
-{
- register pv_entry_t pv, pvf, pvn;
- pt_entry_t *pte;
- pt_entry_t v;
- int rtval = 0;
-
- if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
- return (rtval);
-
- sched_pin();
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
-
- pvf = pv;
-
- do {
- pvn = TAILQ_NEXT(pv, pv_list);
-
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
-
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
-
- if (!pmap_track_modified(pv->pv_va))
- continue;
-
- PMAP_LOCK(pv->pv_pmap);
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
-
- if (pte && ((v = PT_GET(pte)) & PG_A) != 0) {
-#ifdef notyet
- atomic_clear_int((u_int *)pte, PG_A);
-#endif
- PT_SET_VA(pte, v & ~PG_A, FALSE);
- pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
-
- rtval++;
- if (rtval > 4) {
- PMAP_UNLOCK(pv->pv_pmap);
- break;
- }
- }
- PMAP_UNLOCK(pv->pv_pmap);
- } while ((pv = pvn) != NULL && pv != pvf);
- }
- sched_unpin();
-
- return (rtval);
-}
-
-/*
- * Clear the modify bits on the specified physical page.
- */
-void
-pmap_clear_modify(vm_page_t m)
-{
- pmap_clear_ptes(m, PG_M);
-}
-
-/*
- * pmap_clear_reference:
- *
- * Clear the reference bit on the specified physical page.
- */
-void
-pmap_clear_reference(vm_page_t m)
-{
- pmap_clear_ptes(m, PG_A);
-}
-
-/*
- * Miscellaneous support routines follow
- */
-
-/*
- * Map a set of physical memory pages into the kernel virtual
- * address space. Return a pointer to where it is mapped. This
- * routine is intended to be used for mapping device memory,
- * NOT real memory.
- */
-void *
-pmap_mapdev(pa, size)
- vm_paddr_t pa;
- vm_size_t size;
-{
- vm_offset_t va, tmpva, offset;
-
- offset = pa & PAGE_MASK;
- size = roundup(offset + size, PAGE_SIZE);
- pa = pa & PG_FRAME;
-
- if (pa < KERNLOAD && pa + size <= KERNLOAD)
- va = KERNBASE + pa;
- else
- va = kmem_alloc_nofault(kernel_map, size);
- if (!va)
- panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
-
- for (tmpva = va; size > 0; ) {
- pmap_kenter(tmpva, pa);
- size -= PAGE_SIZE;
- tmpva += PAGE_SIZE;
- pa += PAGE_SIZE;
- }
- pmap_invalidate_range(kernel_pmap, va, tmpva);
- return ((void *)(va + offset));
-}
-
-void
-pmap_unmapdev(va, size)
- vm_offset_t va;
- vm_size_t size;
-{
- vm_offset_t base, offset, tmpva;
- panic("unused");
- if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
- return;
- base = va & PG_FRAME;
- offset = va & PAGE_MASK;
- size = roundup(offset + size, PAGE_SIZE);
- for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
- pmap_kremove(tmpva);
- pmap_invalidate_range(kernel_pmap, va, tmpva);
- kmem_free(kernel_map, base, size);
-}
-
-/*
- * perform the pmap work for mincore
- */
-int
-pmap_mincore(pmap, addr)
- pmap_t pmap;
- vm_offset_t addr;
-{
- pt_entry_t *ptep, pte;
- vm_page_t m;
- int val = 0;
-
- PMAP_LOCK(pmap);
- ptep = pmap_pte(pmap, addr);
- pte = (ptep != NULL) ? PT_GET(ptep) : 0;
- pmap_pte_release(ptep);
- PMAP_UNLOCK(pmap);
-
- if (pte != 0) {
- vm_paddr_t pa;
-
- val = MINCORE_INCORE;
- if ((pte & PG_MANAGED) == 0)
- return val;
-
- pa = pte & PG_FRAME;
-
- m = PHYS_TO_VM_PAGE(pa);
-
- /*
- * Modified by us
- */
- if (pte & PG_M)
- val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
- else {
- /*
- * Modified by someone else
- */
- vm_page_lock_queues();
- if (m->dirty || pmap_is_modified(m))
- val |= MINCORE_MODIFIED_OTHER;
- vm_page_unlock_queues();
- }
- /*
- * Referenced by us
- */
- if (pte & PG_A)
- val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
- else {
- /*
- * Referenced by someone else
- */
- vm_page_lock_queues();
- if ((m->flags & PG_REFERENCED) ||
- pmap_ts_referenced(m)) {
- val |= MINCORE_REFERENCED_OTHER;
- vm_page_flag_set(m, PG_REFERENCED);
- }
- vm_page_unlock_queues();
- }
- }
- return val;
-}
-
-void
-pmap_activate(struct thread *td)
-{
- struct proc *p = td->td_proc;
- pmap_t pmap, oldpmap;
- u_int32_t cr3;
-
- critical_enter();
- pmap = vmspace_pmap(td->td_proc->p_vmspace);
- oldpmap = PCPU_GET(curpmap);
-#if defined(SMP)
- atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
- atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
-#else
- oldpmap->pm_active &= ~1;
- pmap->pm_active |= 1;
-#endif
-#ifdef PAE
- cr3 = vtophys(pmap->pm_pdpt);
-#else
- cr3 = vtophys(pmap->pm_pdir);
-#endif
- /* XXXKSE this is wrong.
- * pmap_activate is for the current thread on the current cpu
- */
- if (p->p_flag & P_SA) {
- /* Make sure all other cr3 entries are updated. */
- /* what if they are running? XXXKSE (maybe abort them) */
- FOREACH_THREAD_IN_PROC(p, td) {
- td->td_pcb->pcb_cr3 = cr3;
- }
- } else {
- td->td_pcb->pcb_cr3 = cr3;
- }
- load_cr3(cr3);
- PCPU_SET(curpmap, pmap);
- critical_exit();
-}
-
-vm_offset_t
-pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
-{
-
- if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
- return addr;
- }
-
- addr = (addr + PDRMASK) & ~PDRMASK;
- return addr;
-}
-
-
-#if defined(PMAP_DEBUG)
-extern int init_first;
-void
-pmap_ref(pt_entry_t *pte, unsigned long ma)
-{
- int ind, i, count;
- unsigned long ebp_prev, eip_prev, oma = 0;
- unsigned long pa = xpmap_mtop(ma);
-
- /* are we to the point where mappings are set up? */
- if (!init_first)
- return;
-
- ind = pa >> PAGE_SHIFT;
- /* privileged? */
- if ((pa & PG_RW) && pteinfo_list[ind].pt_ref & (1 << 31))
- BKPT;
-
- /* is MA already mapped ? */
- oma = *pte;
-
- /* old reference being lost */
- if (oma && (oma & PG_RW) && ((oma & PG_FRAME) != (ma & PG_FRAME)))
- pmap_dec_ref(oma);
-
- /* ignore RO mappings - unless were downgrading */
- if (!(ma & PG_RW)) {
- /* downgrading mapping - lose reference */
- if (((oma & PG_FRAME) == (ma & PG_FRAME)) &&
- (oma & PG_RW))
- pmap_dec_ref(ma);
- return;
- }
-
- if (pteinfo_list[ind].pt_ref < 0)
- BKPT;
-
-
- /* same address and not upgrading the mapping */
- if (((oma & PG_FRAME) == (ma & PG_FRAME)) &&
- (oma & PG_RW))
- return;
-
- count = pteinfo_list[ind].pt_ref;
- __asm__("movl %%ebp, %0" : "=r" (ebp_prev));
- for (i = 0; i < XPQ_CALL_DEPTH && ebp_prev > KERNBASE; i++) {
- __asm__("movl 4(%1), %0" : "=r" (eip_prev) : "r" (ebp_prev));
- pteinfo_list[ind].pt_eip[count%XPQ_CALL_COUNT][i] = eip_prev;
- __asm__("movl (%1), %0" : "=r" (ebp_prev) : "r" (ebp_prev));
- }
-
- pteinfo_list[ind].pt_ref++;
-
-}
-
-void
-pmap_dec_ref(unsigned long ma)
-{
- unsigned long pa;
- int ind, count;
-
- if (!ma) BKPT;
-
- pa = xpmap_mtop(ma);
-
- ind = pa >> PAGE_SHIFT;
- if (pteinfo_list[ind].pt_ref & (1 << 31)) BKPT;
-
- count = pteinfo_list[ind].pt_ref & ~(1 << 31);
- if (count < 1) {
- printk("ma: %lx has ref count of 0\n", ma);
- BKPT;
- }
- pteinfo_list[ind].pt_ref = (--count | (pteinfo_list[ind].pt_ref & (1 <<
31)));
-
-}
-
-void
-pmap_dec_ref_page(vm_page_t m)
-{
- unsigned long *pt;
- int i;
- mtx_lock(&CMAPCADDR12_lock);
- if (*CMAP2)
- panic("pmap_zero_page: CMAP2 busy");
- sched_pin();
- PT_SET_VA(CMAP2, PG_V | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, FALSE);
- invlcaddr(CADDR2);
- pt = (unsigned long *)CADDR2;
- for (i = 0; i < 1024; i++)
- if (pt[i] & PG_RW)
- pmap_dec_ref(xpmap_ptom(pt[i]));
- PT_CLEAR_VA(CMAP2, TRUE);
- sched_unpin();
- mtx_unlock(&CMAPCADDR12_lock);
-}
-
-void
-pmap_mark_privileged(unsigned long pa)
-{
- int ind = pa >> PAGE_SHIFT;
-
- if (pteinfo_list[ind].pt_ref & (1 << 31)) BKPT;
- if ((pteinfo_list[ind].pt_ref & ~(1 << 31)) > 0) BKPT;
-
- pteinfo_list[ind].pt_ref |= (1 << 31);
-
-}
-
-void
-pmap_mark_unprivileged(unsigned long pa)
-{
- int ind = pa >> PAGE_SHIFT;
-
- if (pteinfo_list[ind].pt_ref != (1 << 31)) BKPT;
-
- pteinfo_list[ind].pt_ref &= ~(1 << 31);
-
-}
-
-
-int
-pmap_pid_dump(int pid)
-{
- pmap_t pmap;
- struct proc *p;
- int npte = 0;
- int index;
-
- sx_slock(&allproc_lock);
- LIST_FOREACH(p, &allproc, p_list) {
- if (p->p_pid != pid)
- continue;
-
- if (p->p_vmspace) {
- int i,j;
- index = 0;
- pmap = vmspace_pmap(p->p_vmspace);
- for (i = 0; i < NPDEPTD; i++) {
- pd_entry_t *pde;
- pt_entry_t *pte;
- vm_offset_t base = i << PDRSHIFT;
-
- pde = &pmap->pm_pdir[i];
- if (pde && pmap_pde_v(pde)) {
- for (j = 0; j < NPTEPG; j++) {
- vm_offset_t va = base + (j <<
PAGE_SHIFT);
- if (va >= (vm_offset_t)
VM_MIN_KERNEL_ADDRESS) {
- if (index) {
- index = 0;
- printf("\n");
- }
-
sx_sunlock(&allproc_lock);
- return npte;
- }
- pte = pmap_pte(pmap, va);
- if (pte && pmap_pte_v(pte)) {
- pt_entry_t pa;
- vm_page_t m;
- pa = PT_GET(pte);
- m = PHYS_TO_VM_PAGE(pa);
- printf("va: 0x%x, pt:
0x%x, h: %d, w: %d, f: 0x%x",
- va, pa,
m->hold_count, m->wire_count, m->flags);
- npte++;
- index++;
- if (index >= 2) {
- index = 0;
- printf("\n");
- } else {
- printf(" ");
- }
- }
- }
- }
- }
- }
- }
- sx_sunlock(&allproc_lock);
- return npte;
-}
-#endif /* PMAP_DEBUG */
-
-#if defined(DEBUG)
-
-static void pads(pmap_t pm);
-void pmap_pvdump(vm_offset_t pa);
-
-/* print address space of pmap*/
-static void
-pads(pm)
- pmap_t pm;
-{
- int i, j;
- vm_paddr_t va;
- pt_entry_t *ptep;
-
- if (pm == kernel_pmap)
- return;
- for (i = 0; i < NPDEPTD; i++)
- if (pm->pm_pdir[i])
- for (j = 0; j < NPTEPG; j++) {
- va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
- if (pm == kernel_pmap && va < KERNBASE)
- continue;
- if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
- continue;
- ptep = pmap_pte(pm, va);
- if (pmap_pte_v(ptep))
- printf("%x:%x ", va, *ptep);
- };
-
-}
-
-void
-pmap_pvdump(pa)
- vm_paddr_t pa;
-{
- pv_entry_t pv;
- vm_page_t m;
-
- printf("pa %x", pa);
- m = PHYS_TO_VM_PAGE(pa);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
- pads(pv->pv_pmap);
- }
- printf(" ");
-}
-#endif
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/support.s
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/support.s Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,1553 +0,0 @@
-/*-
- * Copyright (c) 1993 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/i386/i386/support.s,v 1.100 2003/11/03 21:28:54 jhb Exp $
- */
-
-#include "opt_npx.h"
-
-#include <machine/asmacros.h>
-#include <machine/cputypes.h>
-#include <machine/intr_machdep.h>
-#include <machine/pmap.h>
-#include <machine/specialreg.h>
-
-#include "assym.s"
-
-#define IDXSHIFT 10
-
- .data
- .globl bcopy_vector
-bcopy_vector:
- .long generic_bcopy
- .globl bzero_vector
-bzero_vector:
- .long generic_bzero
- .globl copyin_vector
-copyin_vector:
- .long generic_copyin
- .globl copyout_vector
-copyout_vector:
- .long generic_copyout
-#if defined(I586_CPU) && defined(DEV_NPX)
-kernel_fpu_lock:
- .byte 0xfe
- .space 3
-#endif
- ALIGN_DATA
- .globl intrcnt, eintrcnt
-intrcnt:
- .space INTRCNT_COUNT * 4
-eintrcnt:
-
- .globl intrnames, eintrnames
-intrnames:
- .space INTRCNT_COUNT * (MAXCOMLEN + 1)
-eintrnames:
-
- .text
-
-/*
- * bcopy family
- * void bzero(void *buf, u_int len)
- */
-
-ENTRY(bzero)
- MEXITCOUNT
- jmp *bzero_vector
-
-ENTRY(generic_bzero)
- pushl %edi
- movl 8(%esp),%edi
- movl 12(%esp),%ecx
- xorl %eax,%eax
- shrl $2,%ecx
- cld
- rep
- stosl
- movl 12(%esp),%ecx
- andl $3,%ecx
- rep
- stosb
- popl %edi
- ret
-
-#ifdef I486_CPU
-ENTRY(i486_bzero)
- movl 4(%esp),%edx
- movl 8(%esp),%ecx
- xorl %eax,%eax
-/*
- * do 64 byte chunks first
- *
- * XXX this is probably over-unrolled at least for DX2's
- */
-2:
- cmpl $64,%ecx
- jb 3f
- movl %eax,(%edx)
- movl %eax,4(%edx)
- movl %eax,8(%edx)
- movl %eax,12(%edx)
- movl %eax,16(%edx)
- movl %eax,20(%edx)
- movl %eax,24(%edx)
- movl %eax,28(%edx)
- movl %eax,32(%edx)
- movl %eax,36(%edx)
- movl %eax,40(%edx)
- movl %eax,44(%edx)
- movl %eax,48(%edx)
- movl %eax,52(%edx)
- movl %eax,56(%edx)
- movl %eax,60(%edx)
- addl $64,%edx
- subl $64,%ecx
- jnz 2b
- ret
-
-/*
- * do 16 byte chunks
- */
- SUPERALIGN_TEXT
-3:
- cmpl $16,%ecx
- jb 4f
- movl %eax,(%edx)
- movl %eax,4(%edx)
- movl %eax,8(%edx)
- movl %eax,12(%edx)
- addl $16,%edx
- subl $16,%ecx
- jnz 3b
- ret
-
-/*
- * do 4 byte chunks
- */
- SUPERALIGN_TEXT
-4:
- cmpl $4,%ecx
- jb 5f
- movl %eax,(%edx)
- addl $4,%edx
- subl $4,%ecx
- jnz 4b
- ret
-
-/*
- * do 1 byte chunks
- * a jump table seems to be faster than a loop or more range reductions
- *
- * XXX need a const section for non-text
- */
- .data
-jtab:
- .long do0
- .long do1
- .long do2
- .long do3
-
- .text
- SUPERALIGN_TEXT
-5:
- jmp *jtab(,%ecx,4)
-
- SUPERALIGN_TEXT
-do3:
- movw %ax,(%edx)
- movb %al,2(%edx)
- ret
-
- SUPERALIGN_TEXT
-do2:
- movw %ax,(%edx)
- ret
-
- SUPERALIGN_TEXT
-do1:
- movb %al,(%edx)
- ret
-
- SUPERALIGN_TEXT
-do0:
- ret
-#endif
-
-#if defined(I586_CPU) && defined(DEV_NPX)
-ENTRY(i586_bzero)
- movl 4(%esp),%edx
- movl 8(%esp),%ecx
-
- /*
- * The FPU register method is twice as fast as the integer register
- * method unless the target is in the L1 cache and we pre-allocate a
- * cache line for it (then the integer register method is 4-5 times
- * faster). However, we never pre-allocate cache lines, since that
- * would make the integer method 25% or more slower for the common
- * case when the target isn't in either the L1 cache or the L2 cache.
- * Thus we normally use the FPU register method unless the overhead
- * would be too large.
- */
- cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */
- jb intreg_i586_bzero
-
- /*
- * The FPU registers may belong to an application or to fastmove()
- * or to another invocation of bcopy() or ourself in a higher level
- * interrupt or trap handler. Preserving the registers is
- * complicated since we avoid it if possible at all levels. We
- * want to localize the complications even when that increases them.
- * Here the extra work involves preserving CR0_TS in TS.
- * `fpcurthread != NULL' is supposed to be the condition that all the
- * FPU resources belong to an application, but fpcurthread and CR0_TS
- * aren't set atomically enough for this condition to work in
- * interrupt handlers.
- *
- * Case 1: FPU registers belong to the application: we must preserve
- * the registers if we use them, so we only use the FPU register
- * method if the target size is large enough to amortize the extra
- * overhead for preserving them. CR0_TS must be preserved although
- * it is very likely to end up as set.
- *
- * Case 2: FPU registers belong to fastmove(): fastmove() currently
- * makes the registers look like they belong to an application so
- * that cpu_switch() and savectx() don't have to know about it, so
- * this case reduces to case 1.
- *
- * Case 3: FPU registers belong to the kernel: don't use the FPU
- * register method. This case is unlikely, and supporting it would
- * be more complicated and might take too much stack.
- *
- * Case 4: FPU registers don't belong to anyone: the FPU registers
- * don't need to be preserved, so we always use the FPU register
- * method. CR0_TS must be preserved although it is very likely to
- * always end up as clear.
- */
- cmpl $0,PCPU(FPCURTHREAD)
- je i586_bz1
-
- /*
- * XXX don't use the FPU for cases 1 and 2, since preemptive
- * scheduling of ithreads broke these cases. Note that we can
- * no longer get here from an interrupt handler, since the
- * context sitch to the interrupt handler will have saved the
- * FPU state.
- */
- jmp intreg_i586_bzero
-
- cmpl $256+184,%ecx /* empirical; not quite 2*108 more */
- jb intreg_i586_bzero
- sarb $1,kernel_fpu_lock
- jc intreg_i586_bzero
- smsw %ax
- clts
- subl $108,%esp
- fnsave 0(%esp)
- jmp i586_bz2
-
-i586_bz1:
- sarb $1,kernel_fpu_lock
- jc intreg_i586_bzero
- smsw %ax
- clts
- fninit /* XXX should avoid needing this */
-i586_bz2:
- fldz
-
- /*
- * Align to an 8 byte boundary (misalignment in the main loop would
- * cost a factor of >= 2). Avoid jumps (at little cost if it is
- * already aligned) by always zeroing 8 bytes and using the part up
- * to the _next_ alignment position.
- */
- fstl 0(%edx)
- addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */
- addl $8,%edx
- andl $~7,%edx
- subl %edx,%ecx
-
- /*
- * Similarly align `len' to a multiple of 8.
- */
- fstl -8(%edx,%ecx)
- decl %ecx
- andl $~7,%ecx
-
- /*
- * This wouldn't be any faster if it were unrolled, since the loop
- * control instructions are much faster than the fstl and/or done
- * in parallel with it so their overhead is insignificant.
- */
-fpureg_i586_bzero_loop:
- fstl 0(%edx)
- addl $8,%edx
- subl $8,%ecx
- cmpl $8,%ecx
- jae fpureg_i586_bzero_loop
-
- cmpl $0,PCPU(FPCURTHREAD)
- je i586_bz3
-
- /* XXX check that the condition for cases 1-2 stayed false. */
-i586_bzero_oops:
- int $3
- jmp i586_bzero_oops
-
- frstor 0(%esp)
- addl $108,%esp
- lmsw %ax
- movb $0xfe,kernel_fpu_lock
- ret
-
-i586_bz3:
- fstp %st(0)
- lmsw %ax
- movb $0xfe,kernel_fpu_lock
- ret
-
-intreg_i586_bzero:
- /*
- * `rep stos' seems to be the best method in practice for small
- * counts. Fancy methods usually take too long to start up due
- * to cache and BTB misses.
- */
- pushl %edi
- movl %edx,%edi
- xorl %eax,%eax
- shrl $2,%ecx
- cld
- rep
- stosl
- movl 12(%esp),%ecx
- andl $3,%ecx
- jne 1f
- popl %edi
- ret
-
-1:
- rep
- stosb
- popl %edi
- ret
-#endif /* I586_CPU && defined(DEV_NPX) */
-
-ENTRY(sse2_pagezero)
- pushl %ebx
- movl 8(%esp),%ecx
- movl %ecx,%eax
- addl $4096,%eax
- xor %ebx,%ebx
-1:
- movnti %ebx,(%ecx)
- addl $4,%ecx
- cmpl %ecx,%eax
- jne 1b
- sfence
- popl %ebx
- ret
-
-ENTRY(i686_pagezero)
- pushl %edi
- pushl %ebx
-
- movl 12(%esp), %edi
- movl $1024, %ecx
- cld
-
- ALIGN_TEXT
-1:
- xorl %eax, %eax
- repe
- scasl
- jnz 2f
-
- popl %ebx
- popl %edi
- ret
-
- ALIGN_TEXT
-
-2:
- incl %ecx
- subl $4, %edi
-
- movl %ecx, %edx
- cmpl $16, %ecx
-
- jge 3f
-
- movl %edi, %ebx
- andl $0x3f, %ebx
- shrl %ebx
- shrl %ebx
- movl $16, %ecx
- subl %ebx, %ecx
-
-3:
- subl %ecx, %edx
- rep
- stosl
-
- movl %edx, %ecx
- testl %edx, %edx
- jnz 1b
-
- popl %ebx
- popl %edi
- ret
-
-/* fillw(pat, base, cnt) */
-ENTRY(fillw)
- pushl %edi
- movl 8(%esp),%eax
- movl 12(%esp),%edi
- movl 16(%esp),%ecx
- cld
- rep
- stosw
- popl %edi
- ret
-
-ENTRY(bcopyb)
- pushl %esi
- pushl %edi
- movl 12(%esp),%esi
- movl 16(%esp),%edi
- movl 20(%esp),%ecx
- movl %edi,%eax
- subl %esi,%eax
- cmpl %ecx,%eax /* overlapping && src < dst? */
- jb 1f
- cld /* nope, copy forwards */
- rep
- movsb
- popl %edi
- popl %esi
- ret
-
- ALIGN_TEXT
-1:
- addl %ecx,%edi /* copy backwards. */
- addl %ecx,%esi
- decl %edi
- decl %esi
- std
- rep
- movsb
- popl %edi
- popl %esi
- cld
- ret
-
-ENTRY(bcopy)
- MEXITCOUNT
- jmp *bcopy_vector
-
-/*
- * generic_bcopy(src, dst, cnt)
- * ws@xxxxxxxx (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
- */
-ENTRY(generic_bcopy)
- pushl %esi
- pushl %edi
- movl 12(%esp),%esi
- movl 16(%esp),%edi
- movl 20(%esp),%ecx
-
- movl %edi,%eax
- subl %esi,%eax
- cmpl %ecx,%eax /* overlapping && src < dst? */
- jb 1f
-
- shrl $2,%ecx /* copy by 32-bit words */
- cld /* nope, copy forwards */
- rep
- movsl
- movl 20(%esp),%ecx
- andl $3,%ecx /* any bytes left? */
- rep
- movsb
- popl %edi
- popl %esi
- ret
-
- ALIGN_TEXT
-1:
- addl %ecx,%edi /* copy backwards */
- addl %ecx,%esi
- decl %edi
- decl %esi
- andl $3,%ecx /* any fractional bytes? */
- std
- rep
- movsb
- movl 20(%esp),%ecx /* copy remainder by 32-bit
words */
- shrl $2,%ecx
- subl $3,%esi
- subl $3,%edi
- rep
- movsl
- popl %edi
- popl %esi
- cld
- ret
-
-#if defined(I586_CPU) && defined(DEV_NPX)
-ENTRY(i586_bcopy)
- pushl %esi
- pushl %edi
- movl 12(%esp),%esi
- movl 16(%esp),%edi
- movl 20(%esp),%ecx
-
- movl %edi,%eax
- subl %esi,%eax
- cmpl %ecx,%eax /* overlapping && src < dst? */
- jb 1f
-
- cmpl $1024,%ecx
- jb small_i586_bcopy
-
- sarb $1,kernel_fpu_lock
- jc small_i586_bcopy
- cmpl $0,PCPU(FPCURTHREAD)
- je i586_bc1
-
- /* XXX turn off handling of cases 1-2, as above. */
- movb $0xfe,kernel_fpu_lock
- jmp small_i586_bcopy
-
- smsw %dx
- clts
- subl $108,%esp
- fnsave 0(%esp)
- jmp 4f
-
-i586_bc1:
- smsw %dx
- clts
- fninit /* XXX should avoid needing this */
-
- ALIGN_TEXT
-4:
- pushl %ecx
-#define DCACHE_SIZE 8192
- cmpl $(DCACHE_SIZE-512)/2,%ecx
- jbe 2f
- movl $(DCACHE_SIZE-512)/2,%ecx
-2:
- subl %ecx,0(%esp)
- cmpl $256,%ecx
- jb 5f /* XXX should prefetch if %ecx >= 32 */
- pushl %esi
- pushl %ecx
- ALIGN_TEXT
-3:
- movl 0(%esi),%eax
- movl 32(%esi),%eax
- movl 64(%esi),%eax
- movl 96(%esi),%eax
- movl 128(%esi),%eax
- movl 160(%esi),%eax
- movl 192(%esi),%eax
- movl 224(%esi),%eax
- addl $256,%esi
- subl $256,%ecx
- cmpl $256,%ecx
- jae 3b
- popl %ecx
- popl %esi
-5:
- ALIGN_TEXT
-large_i586_bcopy_loop:
- fildq 0(%esi)
- fildq 8(%esi)
- fildq 16(%esi)
- fildq 24(%esi)
- fildq 32(%esi)
- fildq 40(%esi)
- fildq 48(%esi)
- fildq 56(%esi)
- fistpq 56(%edi)
- fistpq 48(%edi)
- fistpq 40(%edi)
- fistpq 32(%edi)
- fistpq 24(%edi)
- fistpq 16(%edi)
- fistpq 8(%edi)
- fistpq 0(%edi)
- addl $64,%esi
- addl $64,%edi
- subl $64,%ecx
- cmpl $64,%ecx
- jae large_i586_bcopy_loop
- popl %eax
- addl %eax,%ecx
- cmpl $64,%ecx
- jae 4b
-
- cmpl $0,PCPU(FPCURTHREAD)
- je i586_bc2
-
- /* XXX check that the condition for cases 1-2 stayed false. */
-i586_bcopy_oops:
- int $3
- jmp i586_bcopy_oops
-
- frstor 0(%esp)
- addl $108,%esp
-i586_bc2:
- lmsw %dx
- movb $0xfe,kernel_fpu_lock
-
-/*
- * This is a duplicate of the main part of generic_bcopy. See the comments
- * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and
- * would mess up high resolution profiling.
- */
- ALIGN_TEXT
-small_i586_bcopy:
- shrl $2,%ecx
- cld
- rep
- movsl
- movl 20(%esp),%ecx
- andl $3,%ecx
- rep
- movsb
- popl %edi
- popl %esi
- ret
-
- ALIGN_TEXT
-1:
- addl %ecx,%edi
- addl %ecx,%esi
- decl %edi
- decl %esi
- andl $3,%ecx
- std
- rep
- movsb
- movl 20(%esp),%ecx
- shrl $2,%ecx
- subl $3,%esi
- subl $3,%edi
- rep
- movsl
- popl %edi
- popl %esi
- cld
- ret
-#endif /* I586_CPU && defined(DEV_NPX) */
-
-/*
- * Note: memcpy does not support overlapping copies
- */
-ENTRY(memcpy)
- pushl %edi
- pushl %esi
- movl 12(%esp),%edi
- movl 16(%esp),%esi
- movl 20(%esp),%ecx
- movl %edi,%eax
- shrl $2,%ecx /* copy by 32-bit words */
- cld /* nope, copy forwards */
- rep
- movsl
- movl 20(%esp),%ecx
- andl $3,%ecx /* any bytes left? */
- rep
- movsb
- popl %esi
- popl %edi
- ret
-
-
-/*****************************************************************************/
-/* copyout and fubyte family */
-/*****************************************************************************/
-/*
- * Access user memory from inside the kernel. These routines and possibly
- * the math- and DOS emulators should be the only places that do this.
- *
- * We have to access the memory with user's permissions, so use a segment
- * selector with RPL 3. For writes to user space we have to additionally
- * check the PTE for write permission, because the 386 does not check
- * write permissions when we are executing with EPL 0. The 486 does check
- * this if the WP bit is set in CR0, so we can use a simpler version here.
- *
- * These routines set curpcb->onfault for the time they execute. When a
- * protection violation occurs inside the functions, the trap handler
- * returns to *curpcb->onfault instead of the function.
- */
-
-/*
- * copyout(from_kernel, to_user, len) - MP SAFE (if not I386_CPU)
- */
-ENTRY(copyout)
- MEXITCOUNT
- jmp *copyout_vector
-
-ENTRY(generic_copyout)
- movl PCPU(CURPCB),%eax
- movl $copyout_fault,PCB_ONFAULT(%eax)
- pushl %esi
- pushl %edi
- pushl %ebx
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- movl 24(%esp),%ebx
- testl %ebx,%ebx /* anything to do? */
- jz done_copyout
-
- /*
- * Check explicitly for non-user addresses. If 486 write protection
- * is being used, this check is essential because we are in kernel
- * mode so the h/w does not provide any protection against writing
- * kernel addresses.
- */
-
- /*
- * First, prevent address wrapping.
- */
- movl %edi,%eax
- addl %ebx,%eax
- jc copyout_fault
-/*
- * XXX STOP USING VM_MAXUSER_ADDRESS.
- * It is an end address, not a max, so every time it is used correctly it
- * looks like there is an off by one error, and of course it caused an off
- * by one error in several places.
- */
- cmpl $VM_MAXUSER_ADDRESS,%eax
- ja copyout_fault
-
- /* bcopy(%esi, %edi, %ebx) */
- movl %ebx,%ecx
-
-#if defined(I586_CPU) && defined(DEV_NPX)
- ALIGN_TEXT
-slow_copyout:
-#endif
- shrl $2,%ecx
- cld
- rep
- movsl
- movb %bl,%cl
- andb $3,%cl
- rep
- movsb
-
-done_copyout:
- popl %ebx
- popl %edi
- popl %esi
- xorl %eax,%eax
- movl PCPU(CURPCB),%edx
- movl %eax,PCB_ONFAULT(%edx)
- ret
-
- ALIGN_TEXT
-copyout_fault:
- popl %ebx
- popl %edi
- popl %esi
- movl PCPU(CURPCB),%edx
- movl $0,PCB_ONFAULT(%edx)
- movl $EFAULT,%eax
- ret
-
-#if defined(I586_CPU) && defined(DEV_NPX)
-ENTRY(i586_copyout)
- /*
- * Duplicated from generic_copyout. Could be done a bit better.
- */
- movl PCPU(CURPCB),%eax
- movl $copyout_fault,PCB_ONFAULT(%eax)
- pushl %esi
- pushl %edi
- pushl %ebx
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- movl 24(%esp),%ebx
- testl %ebx,%ebx /* anything to do? */
- jz done_copyout
-
- /*
- * Check explicitly for non-user addresses. If 486 write protection
- * is being used, this check is essential because we are in kernel
- * mode so the h/w does not provide any protection against writing
- * kernel addresses.
- */
-
- /*
- * First, prevent address wrapping.
- */
- movl %edi,%eax
- addl %ebx,%eax
- jc copyout_fault
-/*
- * XXX STOP USING VM_MAXUSER_ADDRESS.
- * It is an end address, not a max, so every time it is used correctly it
- * looks like there is an off by one error, and of course it caused an off
- * by one error in several places.
- */
- cmpl $VM_MAXUSER_ADDRESS,%eax
- ja copyout_fault
-
- /* bcopy(%esi, %edi, %ebx) */
-3:
- movl %ebx,%ecx
- /*
- * End of duplicated code.
- */
-
- cmpl $1024,%ecx
- jb slow_copyout
-
- pushl %ecx
- call fastmove
- addl $4,%esp
- jmp done_copyout
-#endif /* I586_CPU && defined(DEV_NPX) */
-
-/*
- * copyin(from_user, to_kernel, len) - MP SAFE
- */
-ENTRY(copyin)
- MEXITCOUNT
- jmp *copyin_vector
-
-ENTRY(generic_copyin)
- movl PCPU(CURPCB),%eax
- movl $copyin_fault,PCB_ONFAULT(%eax)
- pushl %esi
- pushl %edi
- movl 12(%esp),%esi /* caddr_t from */
- movl 16(%esp),%edi /* caddr_t to */
- movl 20(%esp),%ecx /* size_t len */
-
- /*
- * make sure address is valid
- */
- movl %esi,%edx
- addl %ecx,%edx
- jc copyin_fault
- cmpl $VM_MAXUSER_ADDRESS,%edx
- ja copyin_fault
-
-#if defined(I586_CPU) && defined(DEV_NPX)
- ALIGN_TEXT
-slow_copyin:
-#endif
- movb %cl,%al
- shrl $2,%ecx /* copy longword-wise */
- cld
- rep
- movsl
- movb %al,%cl
- andb $3,%cl /* copy remaining bytes */
- rep
- movsb
-
-#if defined(I586_CPU) && defined(DEV_NPX)
- ALIGN_TEXT
-done_copyin:
-#endif
- popl %edi
- popl %esi
- xorl %eax,%eax
- movl PCPU(CURPCB),%edx
- movl %eax,PCB_ONFAULT(%edx)
- ret
-
- ALIGN_TEXT
-copyin_fault:
- popl %edi
- popl %esi
- movl PCPU(CURPCB),%edx
- movl $0,PCB_ONFAULT(%edx)
- movl $EFAULT,%eax
- ret
-
-#if defined(I586_CPU) && defined(DEV_NPX)
-ENTRY(i586_copyin)
- /*
- * Duplicated from generic_copyin. Could be done a bit better.
- */
- movl PCPU(CURPCB),%eax
- movl $copyin_fault,PCB_ONFAULT(%eax)
- pushl %esi
- pushl %edi
- movl 12(%esp),%esi /* caddr_t from */
- movl 16(%esp),%edi /* caddr_t to */
- movl 20(%esp),%ecx /* size_t len */
-
- /*
- * make sure address is valid
- */
- movl %esi,%edx
- addl %ecx,%edx
- jc copyin_fault
- cmpl $VM_MAXUSER_ADDRESS,%edx
- ja copyin_fault
- /*
- * End of duplicated code.
- */
-
- cmpl $1024,%ecx
- jb slow_copyin
-
- pushl %ebx /* XXX prepare for fastmove_fault */
- pushl %ecx
- call fastmove
- addl $8,%esp
- jmp done_copyin
-#endif /* I586_CPU && defined(DEV_NPX) */
-
-#if defined(I586_CPU) && defined(DEV_NPX)
-/* fastmove(src, dst, len)
- src in %esi
- dst in %edi
- len in %ecx XXX changed to on stack for profiling
- uses %eax and %edx for tmp. storage
- */
-/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */
-ENTRY(fastmove)
- pushl %ebp
- movl %esp,%ebp
- subl $PCB_SAVEFPU_SIZE+3*4,%esp
-
- movl 8(%ebp),%ecx
- cmpl $63,%ecx
- jbe fastmove_tail
-
- testl $7,%esi /* check if src addr is multiple of 8 */
- jnz fastmove_tail
-
- testl $7,%edi /* check if dst addr is multiple of 8 */
- jnz fastmove_tail
-
- /* XXX grab FPU context atomically. */
- call ni_cli
-
-/* if (fpcurthread != NULL) { */
- cmpl $0,PCPU(FPCURTHREAD)
- je 6f
-/* fnsave(&curpcb->pcb_savefpu); */
- movl PCPU(CURPCB),%eax
- fnsave PCB_SAVEFPU(%eax)
-/* FPCURTHREAD = NULL; */
- movl $0,PCPU(FPCURTHREAD)
-/* } */
-6:
-/* now we own the FPU. */
-
-/*
- * The process' FP state is saved in the pcb, but if we get
- * switched, the cpu_switch() will store our FP state in the
- * pcb. It should be possible to avoid all the copying for
- * this, e.g., by setting a flag to tell cpu_switch() to
- * save the state somewhere else.
- */
-/* tmp = curpcb->pcb_savefpu; */
- movl %ecx,-12(%ebp)
- movl %esi,-8(%ebp)
- movl %edi,-4(%ebp)
- movl %esp,%edi
- movl PCPU(CURPCB),%esi
- addl $PCB_SAVEFPU,%esi
- cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
- rep
- movsl
- movl -12(%ebp),%ecx
- movl -8(%ebp),%esi
- movl -4(%ebp),%edi
-/* stop_emulating(); */
- clts
-/* fpcurthread = curthread; */
- movl PCPU(CURTHREAD),%eax
- movl %eax,PCPU(FPCURTHREAD)
- movl PCPU(CURPCB),%eax
-
- /* XXX end of atomic FPU context grab. */
- call ni_sti
-
- movl $fastmove_fault,PCB_ONFAULT(%eax)
-4:
- movl %ecx,-12(%ebp)
- cmpl $1792,%ecx
- jbe 2f
- movl $1792,%ecx
-2:
- subl %ecx,-12(%ebp)
- cmpl $256,%ecx
- jb 5f
- movl %ecx,-8(%ebp)
- movl %esi,-4(%ebp)
- ALIGN_TEXT
-3:
- movl 0(%esi),%eax
- movl 32(%esi),%eax
- movl 64(%esi),%eax
- movl 96(%esi),%eax
- movl 128(%esi),%eax
- movl 160(%esi),%eax
- movl 192(%esi),%eax
- movl 224(%esi),%eax
- addl $256,%esi
- subl $256,%ecx
- cmpl $256,%ecx
- jae 3b
- movl -8(%ebp),%ecx
- movl -4(%ebp),%esi
-5:
- ALIGN_TEXT
-fastmove_loop:
- fildq 0(%esi)
- fildq 8(%esi)
- fildq 16(%esi)
- fildq 24(%esi)
- fildq 32(%esi)
- fildq 40(%esi)
- fildq 48(%esi)
- fildq 56(%esi)
- fistpq 56(%edi)
- fistpq 48(%edi)
- fistpq 40(%edi)
- fistpq 32(%edi)
- fistpq 24(%edi)
- fistpq 16(%edi)
- fistpq 8(%edi)
- fistpq 0(%edi)
- addl $-64,%ecx
- addl $64,%esi
- addl $64,%edi
- cmpl $63,%ecx
- ja fastmove_loop
- movl -12(%ebp),%eax
- addl %eax,%ecx
- cmpl $64,%ecx
- jae 4b
-
- /* XXX ungrab FPU context atomically. */
- call ni_cli
-
-/* curpcb->pcb_savefpu = tmp; */
- movl %ecx,-12(%ebp)
- movl %esi,-8(%ebp)
- movl %edi,-4(%ebp)
- movl PCPU(CURPCB),%edi
- addl $PCB_SAVEFPU,%edi
- movl %esp,%esi
- cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
- rep
- movsl
- movl -12(%ebp),%ecx
- movl -8(%ebp),%esi
- movl -4(%ebp),%edi
-
-/* start_emulating(); */
- smsw %ax
- orb $CR0_TS,%al
- lmsw %ax
-/* fpcurthread = NULL; */
- movl $0,PCPU(FPCURTHREAD)
-
- /* XXX end of atomic FPU context ungrab. */
- call ni_sti
-
- ALIGN_TEXT
-fastmove_tail:
- movl PCPU(CURPCB),%eax
- movl $fastmove_tail_fault,PCB_ONFAULT(%eax)
-
- movb %cl,%al
- shrl $2,%ecx /* copy longword-wise */
- cld
- rep
- movsl
- movb %al,%cl
- andb $3,%cl /* copy remaining bytes */
- rep
- movsb
-
- movl %ebp,%esp
- popl %ebp
- ret
-
- ALIGN_TEXT
-fastmove_fault:
- /* XXX ungrab FPU context atomically. */
- call ni_cli
-
- movl PCPU(CURPCB),%edi
- addl $PCB_SAVEFPU,%edi
- movl %esp,%esi
- cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
- rep
- movsl
-
- smsw %ax
- orb $CR0_TS,%al
- lmsw %ax
- movl $0,PCPU(FPCURTHREAD)
-
- /* XXX end of atomic FPU context ungrab. */
- call ni_sti
-
-fastmove_tail_fault:
- movl %ebp,%esp
- popl %ebp
- addl $8,%esp
- popl %ebx
- popl %edi
- popl %esi
- movl PCPU(CURPCB),%edx
- movl $0,PCB_ONFAULT(%edx)
- movl $EFAULT,%eax
- ret
-#endif /* I586_CPU && defined(DEV_NPX) */
-
-/*
- * casuptr. Compare and set user pointer. Returns -1 or the current value.
- */
-ENTRY(casuptr)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx /* dst */
- movl 8(%esp),%eax /* old */
- movl 12(%esp),%ecx /* new */
-
- cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
- ja fusufault
-
-#ifdef SMP
- lock
-#endif
- cmpxchgl %ecx, (%edx) /* Compare and set. */
-
- /*
- * The old value is in %eax. If the store succeeded it will be the
- * value we expected (old) from before the store, otherwise it will
- * be the current value.
- */
-
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl $0,PCB_ONFAULT(%ecx)
- ret
-
-/*
- * fu{byte,sword,word} - MP SAFE
- *
- * Fetch a byte (sword, word) from user memory
- */
-ENTRY(fuword)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx /* from */
-
- cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
- ja fusufault
-
- movl (%edx),%eax
- movl $0,PCB_ONFAULT(%ecx)
- ret
-
-ENTRY(fuword32)
- jmp fuword
-
-/*
- * These two routines are called from the profiling code, potentially
- * at interrupt time. If they fail, that's okay, good things will
- * happen later. Fail all the time for now - until the trap code is
- * able to deal with this.
- */
-ALTENTRY(suswintr)
-ENTRY(fuswintr)
- movl $-1,%eax
- ret
-
-/*
- * fuword16 - MP SAFE
- */
-ENTRY(fuword16)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx
-
- cmpl $VM_MAXUSER_ADDRESS-2,%edx
- ja fusufault
-
- movzwl (%edx),%eax
- movl $0,PCB_ONFAULT(%ecx)
- ret
-
-/*
- * fubyte - MP SAFE
- */
-ENTRY(fubyte)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx
-
- cmpl $VM_MAXUSER_ADDRESS-1,%edx
- ja fusufault
-
- movzbl (%edx),%eax
- movl $0,PCB_ONFAULT(%ecx)
- ret
-
- ALIGN_TEXT
-fusufault:
- movl PCPU(CURPCB),%ecx
- xorl %eax,%eax
- movl %eax,PCB_ONFAULT(%ecx)
- decl %eax
- ret
-
-/*
- * su{byte,sword,word} - MP SAFE (if not I386_CPU)
- *
- * Write a byte (word, longword) to user memory
- */
-ENTRY(suword)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx
-
- cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */
- ja fusufault
-
- movl 8(%esp),%eax
- movl %eax,(%edx)
- xorl %eax,%eax
- movl PCPU(CURPCB),%ecx
- movl %eax,PCB_ONFAULT(%ecx)
- ret
-
-ENTRY(suword32)
- jmp suword
-
-/*
- * suword16 - MP SAFE (if not I386_CPU)
- */
-ENTRY(suword16)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx
-
- cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */
- ja fusufault
-
- movw 8(%esp),%ax
- movw %ax,(%edx)
- xorl %eax,%eax
- movl PCPU(CURPCB),%ecx /* restore trashed register */
- movl %eax,PCB_ONFAULT(%ecx)
- ret
-
-/*
- * subyte - MP SAFE (if not I386_CPU)
- */
-ENTRY(subyte)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx
-
- cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */
- ja fusufault
-
- movb 8(%esp),%al
- movb %al,(%edx)
- xorl %eax,%eax
- movl PCPU(CURPCB),%ecx /* restore trashed register */
- movl %eax,PCB_ONFAULT(%ecx)
- ret
-
-/*
- * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
- *
- * copy a string from from to to, stop when a 0 character is reached.
- * return ENAMETOOLONG if string is longer than maxlen, and
- * EFAULT on protection violations. If lencopied is non-zero,
- * return the actual length in *lencopied.
- */
-ENTRY(copyinstr)
- pushl %esi
- pushl %edi
- movl PCPU(CURPCB),%ecx
- movl $cpystrflt,PCB_ONFAULT(%ecx)
-
- movl 12(%esp),%esi /* %esi = from */
- movl 16(%esp),%edi /* %edi = to */
- movl 20(%esp),%edx /* %edx = maxlen */
-
- movl $VM_MAXUSER_ADDRESS,%eax
-
- /* make sure 'from' is within bounds */
- subl %esi,%eax
- jbe cpystrflt
-
- /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
- cmpl %edx,%eax
- jae 1f
- movl %eax,%edx
- movl %eax,20(%esp)
-1:
- incl %edx
- cld
-
-2:
- decl %edx
- jz 3f
-
- lodsb
- stosb
- orb %al,%al
- jnz 2b
-
- /* Success -- 0 byte reached */
- decl %edx
- xorl %eax,%eax
- jmp cpystrflt_x
-3:
- /* edx is zero - return ENAMETOOLONG or EFAULT */
- cmpl $VM_MAXUSER_ADDRESS,%esi
- jae cpystrflt
-4:
- movl $ENAMETOOLONG,%eax
- jmp cpystrflt_x
-
-cpystrflt:
- movl $EFAULT,%eax
-
-cpystrflt_x:
- /* set *lencopied and return %eax */
- movl PCPU(CURPCB),%ecx
- movl $0,PCB_ONFAULT(%ecx)
- movl 20(%esp),%ecx
- subl %edx,%ecx
- movl 24(%esp),%edx
- testl %edx,%edx
- jz 1f
- movl %ecx,(%edx)
-1:
- popl %edi
- popl %esi
- ret
-
-
-/*
- * copystr(from, to, maxlen, int *lencopied) - MP SAFE
- */
-ENTRY(copystr)
- pushl %esi
- pushl %edi
-
- movl 12(%esp),%esi /* %esi = from */
- movl 16(%esp),%edi /* %edi = to */
- movl 20(%esp),%edx /* %edx = maxlen */
- incl %edx
- cld
-1:
- decl %edx
- jz 4f
- lodsb
- stosb
- orb %al,%al
- jnz 1b
-
- /* Success -- 0 byte reached */
- decl %edx
- xorl %eax,%eax
- jmp 6f
-4:
- /* edx is zero -- return ENAMETOOLONG */
- movl $ENAMETOOLONG,%eax
-
-6:
- /* set *lencopied and return %eax */
- movl 20(%esp),%ecx
- subl %edx,%ecx
- movl 24(%esp),%edx
- testl %edx,%edx
- jz 7f
- movl %ecx,(%edx)
-7:
- popl %edi
- popl %esi
- ret
-
-ENTRY(bcmp)
- pushl %edi
- pushl %esi
- movl 12(%esp),%edi
- movl 16(%esp),%esi
- movl 20(%esp),%edx
- xorl %eax,%eax
-
- movl %edx,%ecx
- shrl $2,%ecx
- cld /* compare forwards */
- repe
- cmpsl
- jne 1f
-
- movl %edx,%ecx
- andl $3,%ecx
- repe
- cmpsb
- je 2f
-1:
- incl %eax
-2:
- popl %esi
- popl %edi
- ret
-
-
-/*
- * Handling of special 386 registers and descriptor tables etc
- */
-/* void lgdt(struct region_descriptor *rdp); */
-ENTRY(lgdt_finish)
-#if 0
- /* reload the descriptor table */
- movl 4(%esp),%eax
- lgdt (%eax)
-#endif
- /* flush the prefetch q */
- jmp 1f
- nop
-1:
- /* reload "stale" selectors */
- movl $KDSEL,%eax
- movl %eax,%ds
- movl %eax,%es
- movl %eax,%gs
- movl %eax,%ss
- movl $KPSEL,%eax
- movl %eax,%fs
-
- /* reload code selector by turning return into intersegmental return */
- movl (%esp),%eax
- pushl %eax
- movl $KCSEL,4(%esp)
- lret
-
-/* ssdtosd(*ssdp,*sdp) */
-ENTRY(ssdtosd)
- pushl %ebx
- movl 8(%esp),%ecx
- movl 8(%ecx),%ebx
- shll $16,%ebx
- movl (%ecx),%edx
- roll $16,%edx
- movb %dh,%bl
- movb %dl,%bh
- rorl $8,%ebx
- movl 4(%ecx),%eax
- movw %ax,%dx
- andl $0xf0000,%eax
- orl %eax,%ebx
- movl 12(%esp),%ecx
- movl %edx,(%ecx)
- movl %ebx,4(%ecx)
- popl %ebx
- ret
-
-/* void reset_dbregs() */
-ENTRY(reset_dbregs)
- movl $0,%eax
- movl %eax,%dr7 /* disable all breapoints first */
- movl %eax,%dr0
- movl %eax,%dr1
- movl %eax,%dr2
- movl %eax,%dr3
- movl %eax,%dr6
- ret
-
-/*****************************************************************************/
-/* setjump, longjump */
-/*****************************************************************************/
-
-ENTRY(setjmp)
- movl 4(%esp),%eax
- movl %ebx,(%eax) /* save ebx */
- movl %esp,4(%eax) /* save esp */
- movl %ebp,8(%eax) /* save ebp */
- movl %esi,12(%eax) /* save esi */
- movl %edi,16(%eax) /* save edi */
- movl (%esp),%edx /* get rta */
- movl %edx,20(%eax) /* save eip */
- xorl %eax,%eax /* return(0); */
- ret
-
-ENTRY(longjmp)
- movl 4(%esp),%eax
- movl (%eax),%ebx /* restore ebx */
- movl 4(%eax),%esp /* restore esp */
- movl 8(%eax),%ebp /* restore ebp */
- movl 12(%eax),%esi /* restore esi */
- movl 16(%eax),%edi /* restore edi */
- movl 20(%eax),%edx /* get rta */
- movl %edx,(%esp) /* put in return frame */
- xorl %eax,%eax /* return(1); */
- incl %eax
- ret
-
-/*
- * Support for BB-profiling (gcc -a). The kernbb program will extract
- * the data from the kernel.
- */
-
- .data
- ALIGN_DATA
- .globl bbhead
-bbhead:
- .long 0
-
- .text
-NON_GPROF_ENTRY(__bb_init_func)
- movl 4(%esp),%eax
- movl $1,(%eax)
- movl bbhead,%edx
- movl %edx,16(%eax)
- movl %eax,bbhead
- NON_GPROF_RET
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/swtch.s
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/swtch.s Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,445 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.143 2003/09/30 08:11:35 jeff Exp $
- */
-
-#include "opt_npx.h"
-
-#include <machine/asmacros.h>
-
-#include "assym.s"
-
-
-/*****************************************************************************/
-/* Scheduling */
-/*****************************************************************************/
-
- .text
-
-/*
- * cpu_throw()
- *
- * This is the second half of cpu_swtch(). It is used when the current
- * thread is either a dummy or slated to die, and we no longer care
- * about its state. This is only a slight optimization and is probably
- * not worth it anymore. Note that we need to clear the pm_active bits so
- * we do need the old proc if it still exists.
- * 0(%esp) = ret
- * 4(%esp) = oldtd
- * 8(%esp) = newtd
- */
-ENTRY(cpu_throw)
- movl PCPU(CPUID), %esi
- movl 4(%esp),%ecx /* Old thread */
- testl %ecx,%ecx /* no thread? */
- jz 1f
- /* release bit from old pm_active */
- movl PCPU(CURPMAP), %ebx
-#ifdef SMP
- lock
-#endif
- btrl %esi, PM_ACTIVE(%ebx) /* clear old */
-1:
- movl 8(%esp),%ecx /* New thread */
- movl TD_PCB(%ecx),%edx
- movl PCB_CR3(%edx),%eax
-
- movl %eax,PCPU(CR3) /* new address space */
-
- pushl %ecx
- pushl %edx
- pushl %esi
- pushl %eax
- call load_cr3
- addl $4,%esp
- popl %esi
- popl %edx
- popl %ecx
-
- /* set bit in new pm_active */
- movl TD_PROC(%ecx),%eax
- movl P_VMSPACE(%eax), %ebx
- addl $VM_PMAP, %ebx
- movl %ebx, PCPU(CURPMAP)
-#ifdef SMP
- lock
-#endif
- btsl %esi, PM_ACTIVE(%ebx) /* set new */
- jmp sw1
-
-/*
- * cpu_switch(old, new)
- *
- * Save the current thread state, then select the next thread to run
- * and load its state.
- * 0(%esp) = ret
- * 4(%esp) = oldtd
- * 8(%esp) = newtd
- */
-ENTRY(cpu_switch)
-
- /* Switch to new thread. First, save context. */
- movl 4(%esp),%ecx
-
-#ifdef INVARIANTS
- testl %ecx,%ecx /* no thread? */
- jz badsw2 /* no, panic */
-#endif
-
- movl TD_PCB(%ecx),%edx
-
- movl (%esp),%eax /* Hardware registers */
- movl %eax,PCB_EIP(%edx)
- movl %ebx,PCB_EBX(%edx)
- movl %esp,PCB_ESP(%edx)
- movl %ebp,PCB_EBP(%edx)
- movl %esi,PCB_ESI(%edx)
- movl %edi,PCB_EDI(%edx)
- movl %gs,PCB_GS(%edx)
-#if 0
- pushfl /* PSL */
- popl PCB_PSL(%edx)
-#endif
- /* Check to see if we need to call a switchout function. */
- movl PCB_SWITCHOUT(%edx),%eax
- cmpl $0, %eax
- je 1f
- call *%eax
-1:
- /* Test if debug registers should be saved. */
- testl $PCB_DBREGS,PCB_FLAGS(%edx)
- jz 1f /* no, skip over */
- movl %dr7,%eax /* yes, do the save */
- movl %eax,PCB_DR7(%edx)
- andl $0x0000fc00, %eax /* disable all watchpoints */
- movl %eax,%dr7
- movl %dr6,%eax
- movl %eax,PCB_DR6(%edx)
- movl %dr3,%eax
- movl %eax,PCB_DR3(%edx)
- movl %dr2,%eax
- movl %eax,PCB_DR2(%edx)
- movl %dr1,%eax
- movl %eax,PCB_DR1(%edx)
- movl %dr0,%eax
- movl %eax,PCB_DR0(%edx)
-1:
-
-#ifdef DEV_NPX
- /* have we used fp, and need a save? */
- cmpl %ecx,PCPU(FPCURTHREAD)
- jne 1f
- addl $PCB_SAVEFPU,%edx /* h/w bugs make saving
complicated */
- pushl %edx
- call npxsave /* do it in a big C function */
- popl %eax
-1:
-#endif
-
-
- /* Save is done. Now fire up new thread. Leave old vmspace. */
- movl %ecx,%edi
- movl 8(%esp),%ecx /* New thread */
-#ifdef INVARIANTS
- testl %ecx,%ecx /* no thread? */
- jz badsw3 /* no, panic */
-#endif
- movl TD_PCB(%ecx),%edx
- movl PCPU(CPUID), %esi
-
- /* switch address space */
- movl PCB_CR3(%edx),%eax
-
- cmpl %eax,IdlePTD /* Kernel address space? */
-
- je sw1
- /* XXX optimize later KMM */
-#if 0
- movl %cr3,%ebx /* The same address space? */
-#else
- movl PCPU(CR3),%ebx
-#endif
- cmpl %ebx,%eax
- je sw1
-
- movl %eax,PCPU(CR3) /* new address space */
-
- pushl %edx
- pushl %ecx
- pushl %esi
- pushl %eax
- call load_cr3 /* inform xen of the switch */
- addl $4,%esp
- popl %esi
- popl %ecx
- popl %edx
-
- /* Release bit from old pmap->pm_active */
- movl PCPU(CURPMAP), %ebx
-
-#ifdef SMP
- lock
-#endif
- btrl %esi, PM_ACTIVE(%ebx) /* clear old */
- /* Set bit in new pmap->pm_active */
- movl TD_PROC(%ecx),%eax /* newproc */
- movl P_VMSPACE(%eax), %ebx
- addl $VM_PMAP, %ebx
- movl %ebx, PCPU(CURPMAP)
-#ifdef SMP
- lock
-#endif
- btsl %esi, PM_ACTIVE(%ebx) /* set new */
-sw1:
-
-#if 0
-
- /* only one task selector under Xen */
- /*
- * At this point, we've switched address spaces and are ready
- * to load up the rest of the next context.
- */
- cmpl $0, PCB_EXT(%edx) /* has pcb extension? */
- je 1f /* If not, use the default */
- btsl %esi, private_tss /* mark use of private tss */
- movl PCB_EXT(%edx), %edi /* new tss descriptor */
- jmp 2f /* Load it up */
-
-1: /*
- * Use the common default TSS instead of our own.
- * Set our stack pointer into the TSS, it's set to just
- * below the PCB. In C, common_tss.tss_esp0 = &pcb - 16;
- */
- leal -16(%edx), %ebx /* leave space for vm86 */
- movl %ebx, PCPU(COMMON_TSS) + TSS_ESP0
-
- /*
- * Test this CPU's bit in the bitmap to see if this
- * CPU was using a private TSS.
- */
- btrl %esi, private_tss /* Already using the common? */
- jae 3f /* if so, skip reloading */
- PCPU_ADDR(COMMON_TSSD, %edi)
-2:
- /* Move correct tss descriptor into GDT slot, then reload tr. */
- movl PCPU(TSS_GDT), %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
-
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-#endif /* !XEN */
-3:
- /* notify Xen of task switch */
- pushl %edx /* &pcb is the new stack base */
- pushl $KDSEL
- pushl $HYPERVISOR_STACK_SWITCH
- call ni_queue_multicall2
- addl $12,%esp
- /* XXX handle DOM0 IOPL case here (KMM) */
- /* we currently don't support running FreeBSD */
- /* in DOM0 so we can skip for now */
-
- call ni_execute_multicall_list
-
- /* Restore context. */
- movl PCB_EBX(%edx),%ebx
- movl PCB_ESP(%edx),%esp
- movl PCB_EBP(%edx),%ebp
- movl PCB_ESI(%edx),%esi
- movl PCB_EDI(%edx),%edi
- movl PCB_EIP(%edx),%eax
- movl %eax,(%esp)
-#if 0
- pushl PCB_PSL(%edx)
- popfl
-#endif
- movl %edx, PCPU(CURPCB)
- movl %ecx, PCPU(CURTHREAD) /* into next thread */
-
- /*
- * Determine the LDT to use and load it if is the default one and
- * that is not the current one.
- */
- movl TD_PROC(%ecx),%eax
- cmpl $0,P_MD+MD_LDT(%eax)
- jnz 1f
- movl _default_ldt,%eax
- cmpl PCPU(CURRENTLDT),%eax
- je 2f
- pushl %edx
- pushl %eax
- xorl %eax,%eax
- movl %eax,%gs
- call i386_reset_ldt
- popl %eax
- popl %edx
-
- movl %eax,PCPU(CURRENTLDT)
- jmp 2f
-1:
- /* Load the LDT when it is not the default one. */
- pushl %edx /* Preserve pointer to pcb. */
- addl $P_MD,%eax /* Pointer to mdproc is arg. */
- pushl %eax
- call set_user_ldt
- addl $4,%esp
- popl %edx
-2:
- /* This must be done after loading the user LDT. */
- .globl cpu_switch_load_gs
-cpu_switch_load_gs:
- movl PCB_GS(%edx),%gs
-
- /* XXX evidently setting debug registers needs to be
- * routed through Xen - this appears to work - so I
- * am leaving it as it is for now - (KMM)
- */
-
- /* Test if debug registers should be restored. */
- testl $PCB_DBREGS,PCB_FLAGS(%edx)
- jz 1f
-
- /*
- * Restore debug registers. The special code for dr7 is to
- * preserve the current values of its reserved bits.
- */
- movl PCB_DR6(%edx),%eax
- movl %eax,%dr6
- movl PCB_DR3(%edx),%eax
- movl %eax,%dr3
- movl PCB_DR2(%edx),%eax
- movl %eax,%dr2
- movl PCB_DR1(%edx),%eax
- movl %eax,%dr1
- movl PCB_DR0(%edx),%eax
- movl %eax,%dr0
- movl %dr7,%eax
- andl $0x0000fc00,%eax
- movl PCB_DR7(%edx),%ecx
- andl $~0x0000fc00,%ecx
- orl %ecx,%eax
- movl %eax,%dr7
-1:
- ret
-
-#ifdef INVARIANTS
-badsw1:
- pushal
- pushl $sw0_1
- call panic
-sw0_1: .asciz "cpu_throw: no newthread supplied"
-
-badsw2:
- pushal
- pushl $sw0_2
- call panic
-sw0_2: .asciz "cpu_switch: no curthread supplied"
-
-badsw3:
- pushal
- pushl $sw0_3
- call panic
-sw0_3: .asciz "cpu_switch: no newthread supplied"
-#endif
-
-/*
- * savectx(pcb)
- * Update pcb, saving current processor state.
- */
-ENTRY(savectx)
- /* Fetch PCB. */
- movl 4(%esp),%ecx
-
- /* Save caller's return address. Child won't execute this routine. */
- movl (%esp),%eax
- movl %eax,PCB_EIP(%ecx)
-
-#if 0
- movl %cr3,%eax
-#else
- movl PCPU(CR3),%eax
-#endif
- movl %eax,PCB_CR3(%ecx)
-
- movl %ebx,PCB_EBX(%ecx)
- movl %esp,PCB_ESP(%ecx)
- movl %ebp,PCB_EBP(%ecx)
- movl %esi,PCB_ESI(%ecx)
- movl %edi,PCB_EDI(%ecx)
- movl %gs,PCB_GS(%ecx)
-#if 0
- pushfl
- popl PCB_PSL(%ecx)
-#endif
-#ifdef DEV_NPX
- /*
- * If fpcurthread == NULL, then the npx h/w state is irrelevant and the
- * state had better already be in the pcb. This is true for forks
- * but not for dumps (the old book-keeping with FP flags in the pcb
- * always lost for dumps because the dump pcb has 0 flags).
- *
- * If fpcurthread != NULL, then we have to save the npx h/w state to
- * fpcurthread's pcb and copy it to the requested pcb, or save to the
- * requested pcb and reload. Copying is easier because we would
- * have to handle h/w bugs for reloading. We used to lose the
- * parent's npx state for forks by forgetting to reload.
- */
- pushfl
- call ni_cli
- movl PCPU(FPCURTHREAD),%eax
- testl %eax,%eax
- je 1f
-
- pushl %ecx
- movl TD_PCB(%eax),%eax
- leal PCB_SAVEFPU(%eax),%eax
- pushl %eax
- pushl %eax
- call npxsave
- addl $4,%esp
- popl %eax
- popl %ecx
-
- pushl $PCB_SAVEFPU_SIZE
- leal PCB_SAVEFPU(%ecx),%ecx
- pushl %ecx
- pushl %eax
- call bcopy
- addl $12,%esp
-1:
- popfl
-#endif /* DEV_NPX */
-
- ret
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/symbols.raw
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/symbols.raw Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,75 +0,0 @@
-# @(#)symbols.raw 7.6 (Berkeley) 5/8/91
-#
-# $FreeBSD: src/sys/i386/i386/symbols.raw,v 1.15 1999/08/28 00:43:51 peter Exp
$
-#
-
-
-#gdb
- _IdlePTD
- _PTD
- _panicstr
- _atdevbase
-# _version
-#dmesg
- _msgbufp
-# _msgbuf
-#iostat
- _tk_nin
- _tk_nout
- _cp_time
-# _io_info
-#ps
- _nswap
- _maxslp
- _ccpu
- _fscale
- _avail_start
- _avail_end
-#pstat
-# _cons
- _nswap
- _swapblist
-# _swaplist
-#vmstat
- _cp_time
-# _rate
-# _total
-# _sum
-# _rectime
-# _pgintime
- _boottime
-#w
- _swapdev
- _nswap
- _averunnable
- _boottime
-#netstat
- _mbstat
- _ipstat
- _tcb
- _tcpstat
- _udb
- _udpstat
-# _rawcb
- _ifnet
-# _rthost
-# _rtnet
- _icmpstat
- _filehead
- _nfiles
-# _rthashsize
-# _radix_node_head
-#routed
- _ifnet
-#rwho
- _boottime
-#savecore
- _dumpdev
- _dumplo
- _time_second
- _version
- _dumpsize
- _panicstr
- _dumpmag
-#deprecated
-# _avenrun
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/sys_machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/sys_machdep.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,703 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.91 2003/09/07 05:23:28
davidxu Exp $");
-
-#include "opt_kstack_pages.h"
-#include "opt_mac.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/lock.h>
-#include <sys/mac.h>
-#include <sys/malloc.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/smp.h>
-#include <sys/sysproto.h>
-#include <sys/user.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <vm/vm_map.h>
-#include <vm/vm_extern.h>
-
-#include <machine/cpu.h>
-#include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */
-#include <machine/proc.h>
-#include <machine/sysarch.h>
-#include <machine/xenfunc.h>
-
-#include <vm/vm_kern.h> /* for kernel_map */
-
-#define MAX_LD 8192
-#define LD_PER_PAGE 512
-#define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
-#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
-
-void i386_reset_ldt(struct proc_ldt *pldt);
-
-static int i386_get_ldt(struct thread *, char *);
-static int i386_set_ldt(struct thread *, char *);
-static int i386_set_ldt_data(struct thread *, int start, int num,
- union descriptor *descs);
-static int i386_ldt_grow(struct thread *td, int len);
-static int i386_get_ioperm(struct thread *, char *);
-static int i386_set_ioperm(struct thread *, char *);
-#ifdef SMP
-static void set_user_ldt_rv(struct thread *);
-#endif
-
-#ifndef _SYS_SYSPROTO_H_
-struct sysarch_args {
- int op;
- char *parms;
-};
-#endif
-
-int
-sysarch(td, uap)
- struct thread *td;
- register struct sysarch_args *uap;
-{
- int error;
-
- mtx_lock(&Giant);
- switch(uap->op) {
- case I386_GET_LDT:
- error = i386_get_ldt(td, uap->parms);
- break;
-
- case I386_SET_LDT:
- error = i386_set_ldt(td, uap->parms);
- break;
- case I386_GET_IOPERM:
- error = i386_get_ioperm(td, uap->parms);
- break;
- case I386_SET_IOPERM:
- error = i386_set_ioperm(td, uap->parms);
- break;
-#if 0
- case I386_VM86:
- error = vm86_sysarch(td, uap->parms);
- break;
-#endif
- default:
- error = EINVAL;
- break;
- }
- mtx_unlock(&Giant);
- return (error);
-}
-
-int
-i386_extend_pcb(struct thread *td)
-{
- int i, offset;
- u_long *addr;
- struct pcb_ext *ext;
- struct soft_segment_descriptor ssd = {
- 0, /* segment base address (overwritten) */
- ctob(IOPAGES + 1) - 1, /* length */
- SDT_SYS386TSS, /* segment type */
- 0, /* priority level */
- 1, /* descriptor present */
- 0, 0,
- 0, /* default 32 size */
- 0 /* granularity */
- };
-
- if (td->td_proc->p_flag & P_SA)
- return (EINVAL); /* XXXKSE */
-/* XXXKSE All the code below only works in 1:1 needs changing */
- ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1));
- if (ext == 0)
- return (ENOMEM);
- bzero(ext, sizeof(struct pcb_ext));
- /* -16 is so we can convert a trapframe into vm86trapframe inplace */
- ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) -
- sizeof(struct pcb) - 16;
- ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
- /*
- * The last byte of the i/o map must be followed by an 0xff byte.
- * We arbitrarily allocate 16 bytes here, to keep the starting
- * address on a doubleword boundary.
- */
- offset = PAGE_SIZE - 16;
- ext->ext_tss.tss_ioopt =
- (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
- ext->ext_iomap = (caddr_t)ext + offset;
- ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
-
- addr = (u_long *)ext->ext_vm86.vm86_intmap;
- for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
- *addr++ = ~0;
-
- ssd.ssd_base = (unsigned)&ext->ext_tss;
- ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
- ssdtosd(&ssd, &ext->ext_tssd);
-
- KASSERT(td->td_proc == curthread->td_proc, ("giving TSS to !curproc"));
- KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!"));
- mtx_lock_spin(&sched_lock);
- td->td_pcb->pcb_ext = ext;
-
- /* switch to the new TSS after syscall completes */
- td->td_flags |= TDF_NEEDRESCHED;
- mtx_unlock_spin(&sched_lock);
-
- return 0;
-}
-
-static int
-i386_set_ioperm(td, args)
- struct thread *td;
- char *args;
-{
- int i, error;
- struct i386_ioperm_args ua;
- char *iomap;
-
- if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
- return (error);
-
-#ifdef MAC
- if ((error = mac_check_sysarch_ioperm(td->td_ucred)) != 0)
- return (error);
-#endif
- if ((error = suser(td)) != 0)
- return (error);
- if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
- return (error);
- /*
- * XXX
- * While this is restricted to root, we should probably figure out
- * whether any other driver is using this i/o address, as so not to
- * cause confusion. This probably requires a global 'usage registry'.
- */
-
- if (td->td_pcb->pcb_ext == 0)
- if ((error = i386_extend_pcb(td)) != 0)
- return (error);
- iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
-
- if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY)
- return (EINVAL);
-
- for (i = ua.start; i < ua.start + ua.length; i++) {
- if (ua.enable)
- iomap[i >> 3] &= ~(1 << (i & 7));
- else
- iomap[i >> 3] |= (1 << (i & 7));
- }
- return (error);
-}
-
-static int
-i386_get_ioperm(td, args)
- struct thread *td;
- char *args;
-{
- int i, state, error;
- struct i386_ioperm_args ua;
- char *iomap;
-
- if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
- return (error);
- if (ua.start >= IOPAGES * PAGE_SIZE * NBBY)
- return (EINVAL);
-
- if (td->td_pcb->pcb_ext == 0) {
- ua.length = 0;
- goto done;
- }
-
- iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
-
- i = ua.start;
- state = (iomap[i >> 3] >> (i & 7)) & 1;
- ua.enable = !state;
- ua.length = 1;
-
- for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
- if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
- break;
- ua.length++;
- }
-
-done:
- error = copyout(&ua, args, sizeof(struct i386_ioperm_args));
- return (error);
-}
-
-/*
- * Update the GDT entry pointing to the LDT to point to the LDT of the
- * current process.
- *
- * This must be called with sched_lock held. Unfortunately, we can't use a
- * mtx_assert() here because cpu_switch() calls this function after changing
- * curproc but before sched_lock's owner is updated in mi_switch().
- */
-void
-set_user_ldt(struct mdproc *mdp)
-{
- struct proc_ldt *pldt;
- pldt = mdp->md_ldt;
- i386_reset_ldt(pldt);
- PCPU_SET(currentldt, (int)pldt);
-
-}
-
-#ifdef SMP
-static void
-set_user_ldt_rv(struct thread *td)
-{
-
- if (td->td_proc != curthread->td_proc)
- return;
-
- set_user_ldt(&td->td_proc->p_md);
-}
-#endif
-
-/*
- * Must be called with either sched_lock free or held but not recursed.
- * If it does not return NULL, it will return with it owned.
- */
-struct proc_ldt *
-user_ldt_alloc(struct mdproc *mdp, int len)
-{
- struct proc_ldt *pldt,*new_ldt;
-
-
- if (mtx_owned(&sched_lock))
- mtx_unlock_spin(&sched_lock);
- mtx_assert(&sched_lock, MA_NOTOWNED);
- MALLOC(new_ldt, struct proc_ldt *, sizeof(struct proc_ldt),
- M_SUBPROC, M_WAITOK);
-
- new_ldt->ldt_len = len = NEW_MAX_LD(len);
- new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
- round_page(len * sizeof(union descriptor)));
- if (new_ldt->ldt_base == NULL) {
- FREE(new_ldt, M_SUBPROC);
- return NULL;
- }
- new_ldt->ldt_refcnt = 1;
- new_ldt->ldt_active = 0;
-
- mtx_lock_spin(&sched_lock);
-
- if ((pldt = mdp->md_ldt)) {
- if (len > pldt->ldt_len)
- len = pldt->ldt_len;
- bcopy(pldt->ldt_base, new_ldt->ldt_base,
- len * sizeof(union descriptor));
- } else {
- bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE);
- }
- pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base,
- new_ldt->ldt_len*sizeof(union descriptor));
- return new_ldt;
-}
-
-/*
- * Must be called either with sched_lock free or held but not recursed.
- * If md_ldt is not NULL, it will return with sched_lock released.
- */
-void
-user_ldt_free(struct thread *td)
-{
- struct mdproc *mdp = &td->td_proc->p_md;
- struct proc_ldt *pldt = mdp->md_ldt;
- if (pldt == NULL)
- return;
-
- if (!mtx_owned(&sched_lock))
- mtx_lock_spin(&sched_lock);
- mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
- if (td == PCPU_GET(curthread)) {
- PCPU_SET(currentldt, _default_ldt);
- i386_reset_ldt((struct proc_ldt *)_default_ldt);
- }
-
- mdp->md_ldt = NULL;
- if (--pldt->ldt_refcnt == 0) {
- mtx_unlock_spin(&sched_lock);
-
- pmap_map_readwrite(kernel_pmap,(vm_offset_t) pldt->ldt_base,
- pldt->ldt_len*sizeof(union descriptor));
- kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base,
- pldt->ldt_len * sizeof(union descriptor));
- FREE(pldt, M_SUBPROC);
- } else
- mtx_unlock_spin(&sched_lock);
-}
-
-void
-i386_reset_ldt(struct proc_ldt *pldt)
-{
- xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len);
-}
-
-static int
-i386_get_ldt(td, args)
- struct thread *td;
- char *args;
-{
- int error = 0;
- struct proc_ldt *pldt = td->td_proc->p_md.md_ldt;
- int nldt, num;
- union descriptor *lp;
- struct i386_ldt_args ua, *uap = &ua;
-
- if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
- return(error);
-
-#ifdef DEBUG
- printf("i386_get_ldt: start=%d num=%d descs=%p\n",
- uap->start, uap->num, (void *)uap->descs);
-#endif
-
- /* verify range of LDTs exist */
- if ((uap->start < 0) || (uap->num <= 0))
- return(EINVAL);
-
- if (pldt) {
- nldt = pldt->ldt_len;
- num = min(uap->num, nldt);
- lp = &((union descriptor *)(pldt->ldt_base))[uap->start];
- } else {
- nldt = sizeof(ldt)/sizeof(ldt[0]);
- num = min(uap->num, nldt);
- lp = &ldt[uap->start];
- }
- if (uap->start + num > nldt)
- return(EINVAL);
-
- error = copyout(lp, uap->descs, num * sizeof(union descriptor));
- if (!error)
- td->td_retval[0] = num;
-
- return(error);
-}
-
-static int ldt_warnings;
-#define NUM_LDT_WARNINGS 10
-
-static int
-i386_set_ldt(struct thread *td, char *args)
-{
- int error = 0, i;
- int largest_ld;
- struct mdproc *mdp = &td->td_proc->p_md;
- struct proc_ldt *pldt = 0;
- struct i386_ldt_args ua, *uap = &ua;
- union descriptor *descs, *dp;
- int descs_size;
-
- if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
- return(error);
-#ifdef DEBUG
- printf("i386_set_ldt: start=%d num=%d descs=%p\n",
- uap->start, uap->num, (void *)uap->descs);
-
-#endif
-
- if (uap->descs == NULL) {
- /* Free descriptors */
- if (uap->start == 0 && uap->num == 0) {
- /*
- * Treat this as a special case, so userland needn't
- * know magic number NLDT.
- */
- uap->start = NLDT;
- uap->num = MAX_LD - NLDT;
- }
- if (uap->start <= LUDATA_SEL || uap->num <= 0)
- return (EINVAL);
- mtx_lock_spin(&sched_lock);
- pldt = mdp->md_ldt;
- if (pldt == NULL || uap->start >= pldt->ldt_len) {
- mtx_unlock_spin(&sched_lock);
- return (0);
- }
- largest_ld = uap->start + uap->num;
- if (largest_ld > pldt->ldt_len)
- largest_ld = pldt->ldt_len;
- i = largest_ld - uap->start;
- bzero(&((union descriptor *)(pldt->ldt_base))[uap->start],
- sizeof(union descriptor) * i);
- mtx_unlock_spin(&sched_lock);
- return (0);
- }
-
- if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) {
- /* complain a for a while if using old methods */
- if (ldt_warnings++ < NUM_LDT_WARNINGS) {
- printf("Warning: pid %d used static ldt allocation.\n",
- td->td_proc->p_pid);
- printf("See the i386_set_ldt man page for more info\n");
- }
- /* verify range of descriptors to modify */
- largest_ld = uap->start + uap->num;
- if (uap->start >= MAX_LD ||
- uap->num < 0 || largest_ld > MAX_LD) {
- return (EINVAL);
- }
- }
-
- descs_size = uap->num * sizeof(union descriptor);
- descs = (union descriptor *)kmem_alloc(kernel_map, descs_size);
- if (descs == NULL)
- return (ENOMEM);
- error = copyin(uap->descs, descs, descs_size);
- if (error) {
- kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
- return (error);
- }
-
- /* Check descriptors for access violations */
- for (i = 0; i < uap->num; i++) {
- dp = &descs[i];
-
- switch (dp->sd.sd_type) {
- case SDT_SYSNULL: /* system null */
- dp->sd.sd_p = 0;
- break;
- case SDT_SYS286TSS: /* system 286 TSS available */
- case SDT_SYSLDT: /* system local descriptor table */
- case SDT_SYS286BSY: /* system 286 TSS busy */
- case SDT_SYSTASKGT: /* system task gate */
- case SDT_SYS286IGT: /* system 286 interrupt gate */
- case SDT_SYS286TGT: /* system 286 trap gate */
- case SDT_SYSNULL2: /* undefined by Intel */
- case SDT_SYS386TSS: /* system 386 TSS available */
- case SDT_SYSNULL3: /* undefined by Intel */
- case SDT_SYS386BSY: /* system 386 TSS busy */
- case SDT_SYSNULL4: /* undefined by Intel */
- case SDT_SYS386IGT: /* system 386 interrupt gate */
- case SDT_SYS386TGT: /* system 386 trap gate */
- case SDT_SYS286CGT: /* system 286 call gate */
- case SDT_SYS386CGT: /* system 386 call gate */
- /* I can't think of any reason to allow a user proc
- * to create a segment of these types. They are
- * for OS use only.
- */
- kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
- return (EACCES);
- /*NOTREACHED*/
-
- /* memory segment types */
- case SDT_MEMEC: /* memory execute only conforming */
- case SDT_MEMEAC: /* memory execute only accessed conforming */
- case SDT_MEMERC: /* memory execute read conforming */
- case SDT_MEMERAC: /* memory execute read accessed conforming */
- /* Must be "present" if executable and conforming. */
- if (dp->sd.sd_p == 0) {
- kmem_free(kernel_map, (vm_offset_t)descs,
- descs_size);
- return (EACCES);
- }
- break;
- case SDT_MEMRO: /* memory read only */
- case SDT_MEMROA: /* memory read only accessed */
- case SDT_MEMRW: /* memory read write */
- case SDT_MEMRWA: /* memory read write accessed */
- case SDT_MEMROD: /* memory read only expand dwn limit */
- case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
- case SDT_MEMRWD: /* memory read write expand dwn limit */
- case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
- case SDT_MEME: /* memory execute only */
- case SDT_MEMEA: /* memory execute only accessed */
- case SDT_MEMER: /* memory execute read */
- case SDT_MEMERA: /* memory execute read accessed */
- break;
- default:
- kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
- return(EINVAL);
- /*NOTREACHED*/
- }
-
- /* Only user (ring-3) descriptors may be present. */
- if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL)) {
- kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
- return (EACCES);
- }
- }
-
- if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
- /* Allocate a free slot */
- pldt = mdp->md_ldt;
- if (pldt == NULL) {
- load_gs(0);
- error = i386_ldt_grow(td, NLDT+1);
- if (error) {
- kmem_free(kernel_map, (vm_offset_t)descs,
- descs_size);
- return (error);
- }
- pldt = mdp->md_ldt;
- }
-again:
- mtx_lock_spin(&sched_lock);
- /*
- * start scanning a bit up to leave room for NVidia and
- * Wine, which still user the "Blat" method of allocation.
- */
- dp = &((union descriptor *)(pldt->ldt_base))[NLDT];
- for (i = NLDT; i < pldt->ldt_len; ++i) {
- if (dp->sd.sd_type == SDT_SYSNULL)
- break;
- dp++;
- }
- if (i >= pldt->ldt_len) {
- mtx_unlock_spin(&sched_lock);
- error = i386_ldt_grow(td, pldt->ldt_len+1);
- if (error) {
- kmem_free(kernel_map, (vm_offset_t)descs,
- descs_size);
- return (error);
- }
- goto again;
- }
- uap->start = i;
- error = i386_set_ldt_data(td, i, 1, descs);
- mtx_unlock_spin(&sched_lock);
- } else {
- largest_ld = uap->start + uap->num;
- error = i386_ldt_grow(td, largest_ld);
- if (error == 0) {
- mtx_lock_spin(&sched_lock);
- error = i386_set_ldt_data(td, uap->start, uap->num,
- descs);
- mtx_unlock_spin(&sched_lock);
- }
- }
- kmem_free(kernel_map, (vm_offset_t)descs, descs_size);
- if (error == 0)
- td->td_retval[0] = uap->start;
- return (error);
-}
-typedef struct uint64_lohi {
- unsigned long lo;
- unsigned long hi;
-} uint64_lohi;
-
-static int
-i386_set_ldt_data(struct thread *td, int start, int num,
- union descriptor *descs)
-{
- struct mdproc *mdp = &td->td_proc->p_md;
- struct proc_ldt *pldt = mdp->md_ldt;
- int i, error;
-
- mtx_assert(&sched_lock, MA_OWNED);
-
- /* Fill in range */
- for (i = 0; i < num; i++) {
- error = HYPERVISOR_update_descriptor(vtomach(&((union
descriptor *)(pldt->ldt_base))[start + i]), ((uint64_lohi *)descs)[i].lo,
((uint64_lohi *)descs)[i].hi);
- if (error)
- panic("failed to update ldt: %d", error);
- }
- return (0);
-}
-
-static int
-i386_ldt_grow(struct thread *td, int len)
-{
- struct mdproc *mdp = &td->td_proc->p_md;
- struct proc_ldt *pldt;
- caddr_t old_ldt_base;
- int old_ldt_len;
-
- if (len > MAX_LD)
- return (ENOMEM);
- if (len < NLDT+1)
- len = NLDT+1;
- pldt = mdp->md_ldt;
- /* allocate user ldt */
- if (!pldt || len > pldt->ldt_len) {
- struct proc_ldt *new_ldt = user_ldt_alloc(mdp, len);
- if (new_ldt == NULL)
- return (ENOMEM);
- pldt = mdp->md_ldt;
- /* sched_lock was held by user_ldt_alloc */
- if (pldt) {
- if (new_ldt->ldt_len > pldt->ldt_len) {
- old_ldt_base = pldt->ldt_base;
- old_ldt_len = pldt->ldt_len;
- pldt->ldt_sd = new_ldt->ldt_sd;
- pldt->ldt_base = new_ldt->ldt_base;
- pldt->ldt_len = new_ldt->ldt_len;
- mtx_unlock_spin(&sched_lock);
- pmap_map_readwrite(kernel_pmap,
- (vm_offset_t)old_ldt_base,
- old_ldt_len * sizeof(union descriptor));
- kmem_free(kernel_map, (vm_offset_t)old_ldt_base,
- old_ldt_len * sizeof(union descriptor));
- FREE(new_ldt, M_SUBPROC);
- mtx_lock_spin(&sched_lock);
- } else {
- /*
- * If other threads already did the work,
- * do nothing
- */
- mtx_unlock_spin(&sched_lock);
- pmap_map_readwrite(kernel_pmap,
- (vm_offset_t)new_ldt->ldt_base,
- new_ldt->ldt_len * sizeof(union descriptor));
- kmem_free(kernel_map,
- (vm_offset_t)new_ldt->ldt_base,
- new_ldt->ldt_len * sizeof(union descriptor));
- FREE(new_ldt, M_SUBPROC);
- return (0);
- }
- } else {
- mdp->md_ldt = pldt = new_ldt;
- }
-#ifdef SMP
- mtx_unlock_spin(&sched_lock);
- /* signal other cpus to reload ldt */
- smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv,
- NULL, td);
-#else
- set_user_ldt(mdp);
- mtx_unlock_spin(&sched_lock);
-#endif
- }
- return (0);
-}
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,998 +0,0 @@
-/*-
- * Copyright (C) 1994, David Greenman
- * Copyright (c) 1990, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the University of Utah, and William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/trap.c,v 1.260 2003/11/03 21:53:37 jhb
Exp $");
-
-/*
- * 386 Trap and System call handling
- */
-
-#include "opt_clock.h"
-#include "opt_cpu.h"
-#include "opt_isa.h"
-#include "opt_ktrace.h"
-#include "opt_npx.h"
-#include "opt_trap.h"
-
-#include <sys/param.h>
-#include <sys/bus.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/pioctl.h>
-#include <sys/ptrace.h>
-#include <sys/kdb.h>
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/resourcevar.h>
-#include <sys/signalvar.h>
-#include <sys/syscall.h>
-#include <sys/sysctl.h>
-#include <sys/sysent.h>
-#include <sys/uio.h>
-#include <sys/vmmeter.h>
-#ifdef KTRACE
-#include <sys/ktrace.h>
-#endif
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_map.h>
-#include <vm/vm_page.h>
-#include <vm/vm_extern.h>
-
-#include <machine/cpu.h>
-#include <machine/intr_machdep.h>
-#include <machine/md_var.h>
-#include <machine/pcb.h>
-#ifdef SMP
-#include <machine/smp.h>
-#endif
-#include <machine/tss.h>
-#ifdef POWERFAIL_NMI
-#include <sys/syslog.h>
-#include <machine/clock.h>
-#endif
-
-
-#include <machine/xenfunc.h>
-#include <machine/hypervisor.h>
-#include <machine/xenvar.h>
-#include <machine/hypervisor-ifs.h>
-
-
-extern void trap(struct trapframe frame);
-extern void syscall(struct trapframe frame);
-
-static int trap_pfault(struct trapframe *, int, vm_offset_t);
-static void trap_fatal(struct trapframe *, vm_offset_t);
-void dblfault_handler(void);
-
-extern inthand_t IDTVEC(lcall_syscall);
-
-#define MAX_TRAP_MSG 28
-static char *trap_msg[] = {
- "", /* 0 unused */
- "privileged instruction fault", /* 1 T_PRIVINFLT */
- "", /* 2 unused */
- "breakpoint instruction fault", /* 3 T_BPTFLT */
- "", /* 4 unused */
- "", /* 5 unused */
- "arithmetic trap", /* 6 T_ARITHTRAP */
- "", /* 7 unused */
- "", /* 8 unused */
- "general protection fault", /* 9 T_PROTFLT */
- "trace trap", /* 10 T_TRCTRAP */
- "", /* 11 unused */
- "page fault", /* 12 T_PAGEFLT */
- "", /* 13 unused */
- "alignment fault", /* 14 T_ALIGNFLT */
- "", /* 15 unused */
- "", /* 16 unused */
- "hypervisor callback", /* 17 T_HYPCALLBACK */
- "integer divide fault", /* 18 T_DIVIDE */
- "non-maskable interrupt trap", /* 19 T_NMI */
- "overflow trap", /* 20 T_OFLOW */
- "FPU bounds check fault", /* 21 T_BOUND */
- "FPU device not available", /* 22 T_DNA */
- "double fault", /* 23 T_DOUBLEFLT */
- "FPU operand fetch fault", /* 24 T_FPOPFLT */
- "invalid TSS fault", /* 25 T_TSSFLT */
- "segment not present fault", /* 26 T_SEGNPFLT */
- "stack fault", /* 27 T_STKFLT */
- "machine check trap", /* 28 T_MCHK */
-};
-
-#if defined(I586_CPU) && !defined(NO_F00F_HACK)
-extern int has_f00f_bug;
-#endif
-
-#ifdef KDB
-static int kdb_on_nmi = 1;
-SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
- &kdb_on_nmi, 0, "Go to KDB on NMI");
-#endif
-static int panic_on_nmi = 1;
-SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
- &panic_on_nmi, 0, "Panic on NMI");
-
-#ifdef WITNESS
-extern char *syscallnames[];
-#endif
-
-#ifdef DEVICE_POLLING
-extern u_int32_t poll_in_trap;
-extern int ether_poll(int count);
-#endif /* DEVICE_POLLING */
-
-
-/*
- * Exception, fault, and trap interface to the FreeBSD kernel.
- * This common code is called from assembly language IDT gate entry
- * routines that prepare a suitable stack frame, and restore this
- * frame after the exception has been processed.
- */
-
-void
-trap(struct trapframe frame)
-{
- struct thread *td = curthread;
- struct proc *p = td->td_proc;
- u_int sticks = 0;
- int i = 0, ucode = 0, type, code;
- vm_offset_t eva;
-
-#ifdef POWERFAIL_NMI
- static int lastalert = 0;
-#endif
-
- atomic_add_int(&cnt.v_trap, 1);
- type = frame.tf_trapno;
-#ifdef KDB
- if (kdb_active) {
- kdb_reenter();
- goto out;
- }
-#endif
-
- eva = 0;
- code = frame.tf_err;
-
- if (type == T_HYPCALLBACK) {
- evtchn_do_upcall((struct intrframe *)&frame);
- if (ISPL(frame.tf_cs) == SEL_KPL)
- goto out;
- goto userout;
- } else if (type == 0)
- panic("invalid trap type/code %d/%d\n",type, code);
-
-
- if (type == T_PAGEFLT) {
- /*
- * For some Cyrix CPUs, %cr2 is clobbered by
- * interrupts. This problem is worked around by using
- * an interrupt gate for the pagefault handler. We
- * are finally ready to read %cr2 and then must
- * reenable interrupts.
- *
- * If we get a page fault while in a critical section, then
- * it is most likely a fatal kernel page fault. The kernel
- * is already going to panic trying to get a sleep lock to
- * do the VM lookup, so just consider it a fatal trap so the
- * kernel can print out a useful trap message and even get
- * to the debugger.
- */
- eva = PCPU_GET(cr2);
-
- if (td->td_critnest != 0)
- trap_fatal(&frame, eva);
- }
-
-#ifdef DEVICE_POLLING
- if (poll_in_trap)
- ether_poll(poll_in_trap);
-#endif /* DEVICE_POLLING */
-
- if ((ISPL(frame.tf_cs) == SEL_UPL)
- || ((frame.tf_eflags & PSL_VM) &&
- !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) {
- /* user trap */
-
- sticks = td->td_sticks;
- td->td_frame = &frame;
- if (td->td_ucred != p->p_ucred)
- cred_update_thread(td);
-
- switch (type) {
- case T_PRIVINFLT: /* privileged instruction fault */
- ucode = type;
- i = SIGILL;
- break;
-
- case T_BPTFLT: /* bpt instruction fault */
- case T_TRCTRAP: /* trace trap */
- enable_intr();
- frame.tf_eflags &= ~PSL_T;
- i = SIGTRAP;
- break;
-
- case T_ARITHTRAP: /* arithmetic trap */
-#ifdef DEV_NPX
- ucode = npxtrap();
- if (ucode == -1)
- goto userout;
-#else
- ucode = code;
-#endif
- i = SIGFPE;
- break;
-
- case T_PROTFLT: /* general protection fault */
- case T_STKFLT: /* stack fault */
- case T_SEGNPFLT: /* segment not present fault */
- case T_TSSFLT: /* invalid TSS fault */
- case T_DOUBLEFLT: /* double fault */
- default:
- ucode = code + BUS_SEGM_FAULT ;
- printf("unexpected trap type/code %d/%d\n",type, code);
/* XXX temporary */
-
- i = SIGBUS;
- break;
-
- case T_PAGEFLT: /* page fault */
- if (td->td_pflags & TDP_SA)
- thread_user_enter(td);
-
- i = trap_pfault(&frame, TRUE, eva);
-#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if (i == -2) {
- /*
- * The f00f hack workaround has triggered, so
- * treat the fault as an illegal instruction
- * (T_PRIVINFLT) instead of a page fault.
- */
- type = frame.tf_trapno = T_PRIVINFLT;
-
- /* Proceed as in that case. */
- ucode = type;
- i = SIGILL;
- break;
- }
-#endif
- if (i == -1)
- goto userout;
- if (i == 0)
- goto user;
-
- ucode = T_PAGEFLT;
- break;
-
- case T_DIVIDE: /* integer divide fault */
- ucode = FPE_INTDIV;
- i = SIGFPE;
- break;
-
-#ifdef DEV_ISA
- case T_NMI:
-#ifdef POWERFAIL_NMI
-#ifndef TIMER_FREQ
-# define TIMER_FREQ 1193182
-#endif
- mtx_lock(&Giant);
- if (time_second - lastalert > 10) {
- log(LOG_WARNING, "NMI: power fail\n");
- sysbeep(TIMER_FREQ/880, hz);
- lastalert = time_second;
- }
- mtx_unlock(&Giant);
- goto userout;
-#else /* !POWERFAIL_NMI */
- /* machine/parity/power fail/"kitchen sink" faults */
- /* XXX Giant */
- if (isa_nmi(code) == 0) {
-#ifdef KDB
- /*
- * NMI can be hooked up to a pushbutton
- * for debugging.
- */
- if (kdb_on_nmi) {
- printf ("NMI ... going to debugger\n");
- kdb_trap (type, 0, &frame);
- }
-#endif /* KDB */
- goto userout;
- } else if (panic_on_nmi)
- panic("NMI indicates hardware failure");
- break;
-#endif /* POWERFAIL_NMI */
-#endif /* DEV_ISA */
-
- case T_OFLOW: /* integer overflow fault */
- ucode = FPE_INTOVF;
- i = SIGFPE;
- break;
-
- case T_BOUND: /* bounds check fault */
- ucode = FPE_FLTSUB;
- i = SIGFPE;
- break;
-
- case T_DNA:
-#ifdef DEV_NPX
- /* transparent fault (due to context switch "late") */
- if (npxdna())
- goto userout;
-#endif
- i = SIGFPE;
- ucode = FPE_FPU_NP_TRAP;
- break;
-
- case T_FPOPFLT: /* FPU operand fetch fault */
- ucode = T_FPOPFLT;
- i = SIGILL;
- break;
-
- case T_XMMFLT: /* SIMD floating-point exception */
- ucode = 0; /* XXX */
- i = SIGFPE;
- break;
- }
- } else {
- /* kernel trap */
-
- KASSERT(cold || td->td_ucred != NULL,
- ("kernel trap doesn't have ucred"));
- switch (type) {
- case T_PAGEFLT: /* page fault */
- (void) trap_pfault(&frame, FALSE, eva);
- goto out;
-
- case T_DNA:
-#ifdef DEV_NPX
- /*
- * The kernel is apparently using npx for copying.
- * XXX this should be fatal unless the kernel has
- * registered such use.
- */
- if (npxdna())
- goto out;
-#endif
- break;
-
- /*
- * The following two traps can happen in
- * vm86 mode, and, if so, we want to handle
- * them specially.
- */
- case T_PROTFLT: /* general protection fault */
- case T_STKFLT: /* stack fault */
-#if 0
- if (frame.tf_eflags & PSL_VM) {
- i = vm86_emulate((struct vm86frame *)&frame);
- if (i != 0)
- /*
- * returns to original process
- */
- vm86_trap((struct vm86frame *)&frame);
- goto out;
- }
-#endif
- if (type == T_STKFLT)
- break;
-
- /* FALL THROUGH */
-
- case T_SEGNPFLT: /* segment not present fault */
- if (PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)
- break;
-
- /*
- * Invalid %fs's and %gs's can be created using
- * procfs or PT_SETREGS or by invalidating the
- * underlying LDT entry. This causes a fault
- * in kernel mode when the kernel attempts to
- * switch contexts. Lose the bad context
- * (XXX) so that we can continue, and generate
- * a signal.
- */
- if (frame.tf_eip == (int)cpu_switch_load_gs) {
- PCPU_GET(curpcb)->pcb_gs = 0;
-#if 0
- PROC_LOCK(p);
- psignal(p, SIGBUS);
- PROC_UNLOCK(p);
-#endif
- goto out;
- }
-
- if (td->td_intr_nesting_level != 0)
- break;
-
- /*
- * Invalid segment selectors and out of bounds
- * %eip's and %esp's can be set up in user mode.
- * This causes a fault in kernel mode when the
- * kernel tries to return to user mode. We want
- * to get this fault so that we can fix the
- * problem here and not have to check all the
- * selectors and pointers when the user changes
- * them.
- */
- if (frame.tf_eip == (int)doreti_iret) {
- frame.tf_eip = (int)doreti_iret_fault;
- goto out;
- }
- if (frame.tf_eip == (int)doreti_popl_ds) {
- frame.tf_eip = (int)doreti_popl_ds_fault;
- goto out;
- }
- if (frame.tf_eip == (int)doreti_popl_es) {
- frame.tf_eip = (int)doreti_popl_es_fault;
- goto out;
- }
- if (frame.tf_eip == (int)doreti_popl_fs) {
- frame.tf_eip = (int)doreti_popl_fs_fault;
- goto out;
- }
- if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
- frame.tf_eip =
- (int)PCPU_GET(curpcb)->pcb_onfault;
- goto out;
- }
- break;
-
- case T_TSSFLT:
- /*
- * PSL_NT can be set in user mode and isn't cleared
- * automatically when the kernel is entered. This
- * causes a TSS fault when the kernel attempts to
- * `iret' because the TSS link is uninitialized. We
- * want to get this fault so that we can fix the
- * problem here and not every time the kernel is
- * entered.
- */
- if (frame.tf_eflags & PSL_NT) {
- frame.tf_eflags &= ~PSL_NT;
- goto out;
- }
- break;
-
- case T_TRCTRAP: /* trace trap */
- if (frame.tf_eip == (int)IDTVEC(lcall_syscall)) {
- /*
- * We've just entered system mode via the
- * syscall lcall. Continue single stepping
- * silently until the syscall handler has
- * saved the flags.
- */
- goto out;
- }
- if (frame.tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
- /*
- * The syscall handler has now saved the
- * flags. Stop single stepping it.
- */
- frame.tf_eflags &= ~PSL_T;
- goto out;
- }
- /*
- * Ignore debug register trace traps due to
- * accesses in the user's address space, which
- * can happen under several conditions such as
- * if a user sets a watchpoint on a buffer and
- * then passes that buffer to a system call.
- * We still want to get TRCTRAPS for addresses
- * in kernel space because that is useful when
- * debugging the kernel.
- */
- /* XXX Giant */
- if (user_dbreg_trap() &&
- !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)) {
- /*
- * Reset breakpoint bits because the
- * processor doesn't
- */
- load_dr6(rdr6() & 0xfffffff0);
- goto out;
- }
- /*
- * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
- */
- case T_BPTFLT:
- /*
- * If KDB is enabled, let it handle the debugger trap.
- * Otherwise, debugger traps "can't happen".
- */
-#ifdef KDB
- /* XXX Giant */
- if (kdb_trap (type, 0, &frame))
- goto out;
-#endif
- break;
-
-#ifdef DEV_ISA
- case T_NMI:
-#ifdef POWERFAIL_NMI
- mtx_lock(&Giant);
- if (time_second - lastalert > 10) {
- log(LOG_WARNING, "NMI: power fail\n");
- sysbeep(TIMER_FREQ/880, hz);
- lastalert = time_second;
- }
- mtx_unlock(&Giant);
- goto out;
-#else /* !POWERFAIL_NMI */
- /* XXX Giant */
- /* machine/parity/power fail/"kitchen sink" faults */
- if (isa_nmi(code) == 0) {
-#ifdef KDB
- /*
- * NMI can be hooked up to a pushbutton
- * for debugging.
- */
- if (kdb_on_nmi) {
- printf ("NMI ... going to debugger\n");
- kdb_trap (type, 0, &frame);
- }
-#endif /* KDB */
- goto out;
- } else if (panic_on_nmi == 0)
- goto out;
- /* FALLTHROUGH */
-#endif /* POWERFAIL_NMI */
-#endif /* DEV_ISA */
- }
-
- trap_fatal(&frame, eva);
- goto out;
- }
-
- /* Translate fault for emulators (e.g. Linux) */
- if (*p->p_sysent->sv_transtrap)
- i = (*p->p_sysent->sv_transtrap)(i, type);
-
- trapsignal(td, i, ucode);
-
-#if 1 /* DEBUG */
- if (type <= MAX_TRAP_MSG) {
- uprintf("fatal process exception: %s",
- trap_msg[type]);
- if ((type == T_PAGEFLT) || (type == T_PROTFLT))
- uprintf(", fault VA = 0x%lx", (u_long)eva);
- uprintf("\n");
- }
-#endif
-
-user:
- userret(td, &frame, sticks);
- mtx_assert(&Giant, MA_NOTOWNED);
-userout:
-out:
- return;
-}
-
-static int
-trap_pfault(frame, usermode, eva)
- struct trapframe *frame;
- int usermode;
- vm_offset_t eva;
-{
- vm_offset_t va;
- struct vmspace *vm = NULL;
- vm_map_t map = 0;
- int rv = 0;
- vm_prot_t ftype;
- struct thread *td = curthread;
- struct proc *p = td->td_proc;
-
- va = trunc_page(eva);
- if (va >= KERNBASE) {
- /*
- * Don't allow user-mode faults in kernel address space.
- * An exception: if the faulting address is the invalid
- * instruction entry in the IDT, then the Intel Pentium
- * F00F bug workaround was triggered, and we need to
- * treat it is as an illegal instruction, and not a page
- * fault.
- */
-#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
- return -2;
-#endif
- if (usermode)
- goto nogo;
-
- map = kernel_map;
- } else {
- /*
- * This is a fault on non-kernel virtual memory.
- * vm is initialized above to NULL. If curproc is NULL
- * or curproc->p_vmspace is NULL the fault is fatal.
- */
- if (p != NULL)
- vm = p->p_vmspace;
-
- if (vm == NULL)
- goto nogo;
-
- map = &vm->vm_map;
- }
-
- if (frame->tf_err & PGEX_W)
- ftype = VM_PROT_WRITE;
- else
- ftype = VM_PROT_READ;
-
- if (map != kernel_map) {
- /*
- * Keep swapout from messing with us during this
- * critical time.
- */
- PROC_LOCK(p);
- ++p->p_lock;
- PROC_UNLOCK(p);
-
- /* Fault in the user page: */
- rv = vm_fault(map, va, ftype,
- (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
- : VM_FAULT_NORMAL);
-
- PROC_LOCK(p);
- --p->p_lock;
- PROC_UNLOCK(p);
- } else {
- /*
- * Don't have to worry about process locking or stacks in the
- * kernel.
- */
- rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
- }
- if (rv == KERN_SUCCESS)
- return (0);
-nogo:
- if (!usermode) {
- if (td->td_intr_nesting_level == 0 &&
- PCPU_GET(curpcb)->pcb_onfault != NULL) {
- frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
- return (0);
- }
- trap_fatal(frame, eva);
- return (-1);
- }
-
- /* kludge to pass faulting virtual address to sendsig */
- frame->tf_err = eva;
-
- return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
-}
-
-static void
-trap_fatal(struct trapframe *frame, vm_offset_t eva)
-{
- int code, type, ss, esp;
- struct soft_segment_descriptor softseg;
-
- code = frame->tf_err;
- type = frame->tf_trapno;
-#if 0
- XENPRINTF("trying to read gdt\n");
- sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
- XENPRINTF("read gdt\n");
-#endif
- if (type <= MAX_TRAP_MSG)
- printf("\n\nFatal trap %d: %s while in %s mode\n",
- type, trap_msg[type],
- frame->tf_eflags & PSL_VM ? "vm86" :
- ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
-#ifdef SMP
- /* two separate prints in case of a trap on an unmapped page */
- printf("cpuid = %d; ", PCPU_GET(cpuid));
- printf("apic id = %02x\n", PCPU_GET(apic_id));
-#endif
- if (type == T_PAGEFLT) {
- printf("fault virtual address = 0x%x\n", eva);
- printf("fault code = %s %s, %s\n",
- code & PGEX_U ? "user" : "supervisor",
- code & PGEX_W ? "write" : "read",
- code & PGEX_P ? "protection violation" : "page not
present");
- }
- printf("instruction pointer = 0x%x:0x%x\n",
- frame->tf_cs & 0xffff, frame->tf_eip);
- if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
- ss = frame->tf_ss & 0xffff;
- esp = frame->tf_esp;
- } else {
- ss = GSEL(GDATA_SEL, SEL_KPL);
- esp = (int)&frame->tf_esp;
- }
- printf("stack pointer = 0x%x:0x%x\n", ss, esp);
- printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp);
- printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n",
- softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
- printf(" = DPL %d, pres %d, def32 %d, gran %d\n",
- softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
- softseg.ssd_gran);
- printf("processor eflags = ");
- if (frame->tf_eflags & PSL_T)
- printf("trace trap, ");
- if (frame->tf_eflags & PSL_I)
- printf("interrupt enabled, ");
- if (frame->tf_eflags & PSL_NT)
- printf("nested task, ");
- if (frame->tf_eflags & PSL_RF)
- printf("resume, ");
- if (frame->tf_eflags & PSL_VM)
- printf("vm86, ");
- printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
- printf("current process = ");
- if (curproc) {
- printf("%lu (%s)\n",
- (u_long)curproc->p_pid, curproc->p_comm ?
- curproc->p_comm : "");
- } else {
- printf("Idle\n");
- }
- /* XXX */
-
-#ifdef KDB
- if (kdb_trap(type, 0, frame))
- return;
-#endif
- printf("trap number = %d\n", type);
- if (type <= MAX_TRAP_MSG)
- panic("%s", trap_msg[type]);
- else
- panic("unknown/reserved trap");
-}
-
-/*
- * Double fault handler. Called when a fault occurs while writing
- * a frame for a trap/exception onto the stack. This usually occurs
- * when the stack overflows (such is the case with infinite recursion,
- * for example).
- *
- * XXX Note that the current PTD gets replaced by IdlePTD when the
- * task switch occurs. This means that the stack that was active at
- * the time of the double fault is not available at <kstack> unless
- * the machine was idle when the double fault occurred. The downside
- * of this is that "trace <ebp>" in ddb won't work.
- */
-void
-dblfault_handler()
-{
- printf("\nFatal double fault:\n");
- printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip));
- printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp));
- printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp));
-#ifdef SMP
- /* two separate prints in case of a trap on an unmapped page */
- printf("cpuid = %d; ", PCPU_GET(cpuid));
- printf("apic id = %02x\n", PCPU_GET(apic_id));
-#endif
- panic("double fault");
-}
-
-/*
- * syscall - system call request C handler
- *
- * A system call is essentially treated as a trap.
- */
-void
-syscall(frame)
- struct trapframe frame;
-{
- caddr_t params;
- struct sysent *callp;
- struct thread *td = curthread;
- struct proc *p = td->td_proc;
- register_t orig_tf_eflags;
- u_int sticks;
- int error;
- int narg;
- int args[8];
- u_int code;
-
- /*
- * note: PCPU_LAZY_INC() can only be used if we can afford
- * occassional inaccuracy in the count.
- */
- PCPU_LAZY_INC(cnt.v_syscall);
-
-#ifdef DIAGNOSTIC
- if (ISPL(frame.tf_cs) != SEL_UPL) {
- mtx_lock(&Giant); /* try to stabilize the system XXX */
- panic("syscall");
- /* NOT REACHED */
- mtx_unlock(&Giant);
- }
-#endif
-
- sticks = td->td_sticks;
- td->td_frame = &frame;
- if (td->td_ucred != p->p_ucred)
- cred_update_thread(td);
- if (p->p_flag & P_SA)
- thread_user_enter(td);
- params = (caddr_t)frame.tf_esp + sizeof(int);
- code = frame.tf_eax;
- orig_tf_eflags = frame.tf_eflags;
-
- if (p->p_sysent->sv_prepsyscall) {
- /*
- * The prep code is MP aware.
- */
- (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms);
- } else {
- /*
- * Need to check if this is a 32 bit or 64 bit syscall.
- * fuword is MP aware.
- */
- if (code == SYS_syscall) {
- /*
- * Code is first argument, followed by actual args.
- */
- code = fuword(params);
- params += sizeof(int);
- } else if (code == SYS___syscall) {
- /*
- * Like syscall, but code is a quad, so as to maintain
- * quad alignment for the rest of the arguments.
- */
- code = fuword(params);
- params += sizeof(quad_t);
- }
- }
-
- if (p->p_sysent->sv_mask)
- code &= p->p_sysent->sv_mask;
-
- if (code >= p->p_sysent->sv_size)
- callp = &p->p_sysent->sv_table[0];
- else
- callp = &p->p_sysent->sv_table[code];
-
- narg = callp->sy_narg & SYF_ARGMASK;
-
- /*
- * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
- */
- if (params != NULL && narg != 0)
- error = copyin(params, (caddr_t)args,
- (u_int)(narg * sizeof(int)));
- else
- error = 0;
-
-#ifdef KTRACE
- if (KTRPOINT(td, KTR_SYSCALL))
- ktrsyscall(code, narg, args);
-#endif
- CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
- td->td_proc->p_pid, td->td_proc->p_comm, code);
-
- /*
- * Try to run the syscall without Giant if the syscall
- * is MP safe.
- */
- if ((callp->sy_narg & SYF_MPSAFE) == 0)
- mtx_lock(&Giant);
-
- if (error == 0) {
- td->td_retval[0] = 0;
- td->td_retval[1] = frame.tf_edx;
-
- STOPEVENT(p, S_SCE, narg);
-
- PTRACESTOP_SC(p, td, S_PT_SCE);
-
- error = (*callp->sy_call)(td, args);
- }
-
- switch (error) {
- case 0:
- frame.tf_eax = td->td_retval[0];
- frame.tf_edx = td->td_retval[1];
- frame.tf_eflags &= ~PSL_C;
- break;
-
- case ERESTART:
- /*
- * Reconstruct pc, assuming lcall $X,y is 7 bytes,
- * int 0x80 is 2 bytes. We saved this in tf_err.
- */
- frame.tf_eip -= frame.tf_err;
- break;
-
- case EJUSTRETURN:
- break;
-
- default:
- if (p->p_sysent->sv_errsize) {
- if (error >= p->p_sysent->sv_errsize)
- error = -1; /* XXX */
- else
- error = p->p_sysent->sv_errtbl[error];
- }
- frame.tf_eax = error;
- frame.tf_eflags |= PSL_C;
- break;
- }
-
- /*
- * Release Giant if we previously set it.
- */
- if ((callp->sy_narg & SYF_MPSAFE) == 0)
- mtx_unlock(&Giant);
-
- /*
- * Traced syscall.
- */
- if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
- frame.tf_eflags &= ~PSL_T;
- trapsignal(td, SIGTRAP, 0);
- }
-
- /*
- * Handle reschedule and other end-of-syscall issues
- */
- userret(td, &frame, sticks);
-
-#ifdef KTRACE
- if (KTRPOINT(td, KTR_SYSRET))
- ktrsysret(code, error, td->td_retval[0]);
-#endif
-
- /*
- * This works because errno is findable through the
- * register set. If we ever support an emulation where this
- * is not the case, this code will need to be revisited.
- */
- STOPEVENT(p, S_SCX, code);
-
- PTRACESTOP_SC(p, td, S_PT_SCX);
-
- WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
- (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
- mtx_assert(&sched_lock, MA_NOTOWNED);
- mtx_assert(&Giant, MA_NOTOWNED);
-}
-
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,620 +0,0 @@
-/*-
- * Copyright (c) 1982, 1986 The Regents of the University of California.
- * Copyright (c) 1989, 1990 William Jolitz
- * Copyright (c) 1994 John Dyson
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department, and William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
- * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.219 2003/11/17 18:22:24
alc Exp $");
-
-#include "opt_npx.h"
-#ifdef PC98
-#include "opt_pc98.h"
-#endif
-#include "opt_reset.h"
-#include "opt_cpu.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/kse.h>
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/sf_buf.h>
-#include <sys/smp.h>
-#include <sys/sysctl.h>
-#include <sys/unistd.h>
-#include <sys/user.h>
-#include <sys/vnode.h>
-#include <sys/vmmeter.h>
-
-#include <machine/cpu.h>
-#include <machine/cputypes.h>
-#include <machine/md_var.h>
-#include <machine/pcb.h>
-#include <machine/pcb_ext.h>
-
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_param.h>
-
-#ifdef PC98
-#include <pc98/pc98/pc98.h>
-#else
-#include <i386/isa/isa.h>
-#endif
-
-#ifndef NSFBUFS
-#define NSFBUFS (512 + maxusers * 16)
-#endif
-
-#include <machine/xenfunc.h>
-#if 0
-#ifdef SMP
-static void cpu_reset_proxy(void);
-static u_int cpu_reset_proxyid;
-static volatile u_int cpu_reset_proxy_active;
-#endif
-#endif
-static void sf_buf_init(void *arg);
-SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
-
-LIST_HEAD(sf_head, sf_buf);
-
-/*
- * A hash table of active sendfile(2) buffers
- */
-static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
-
-
-static struct sf_head *sf_buf_active;
-static u_long sf_buf_hashmask;
-
-
-#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
-
-static u_int sf_buf_alloc_want;
-
-/*
- * A lock used to synchronize access to the hash table and free list
- */
-static struct mtx sf_buf_lock;
-
-extern int _ucodesel, _udatasel;
-
-/*
- * Finish a fork operation, with process p2 nearly set up.
- * Copy and update the pcb, set up the stack so that the child
- * ready to run and return to user mode.
- */
-void
-cpu_fork(struct thread *td1,
- struct proc *p2,
- struct thread *td2,
- int flags)
-{
- register struct proc *p1;
- struct pcb *pcb2;
- struct mdproc *mdp2;
-#ifdef DEV_NPX
- register_t savecrit;
-#endif
-
- p1 = td1->td_proc;
- if ((flags & RFPROC) == 0) {
- if ((flags & RFMEM) == 0) {
- /* unshare user LDT */
- struct mdproc *mdp1 = &p1->p_md;
- struct proc_ldt *pldt = mdp1->md_ldt;
- if (pldt && pldt->ldt_refcnt > 1) {
- pldt = user_ldt_alloc(mdp1, pldt->ldt_len);
- if (pldt == NULL)
- panic("could not copy LDT");
- mdp1->md_ldt = pldt;
- set_user_ldt(mdp1);
- user_ldt_free(td1);
- }
- }
- return;
- }
-
- /* Ensure that p1's pcb is up to date. */
-#ifdef DEV_NPX
- if (td1 == curthread)
- td1->td_pcb->pcb_gs = rgs();
- savecrit = intr_disable();
- if (PCPU_GET(fpcurthread) == td1)
- npxsave(&td1->td_pcb->pcb_save);
- intr_restore(savecrit);
-#endif
-
- /* Point the pcb to the top of the stack */
- pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages *
PAGE_SIZE) - 1;
- td2->td_pcb = pcb2;
-
- /* Copy p1's pcb */
- bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
-
- /* Point mdproc and then copy over td1's contents */
- mdp2 = &p2->p_md;
- bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
-
- /*
- * Create a new fresh stack for the new process.
- * Copy the trap frame for the return to user mode as if from a
- * syscall. This copies most of the user mode register values.
- */
- td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb) - 1;
- bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
-
- td2->td_frame->tf_eax = 0; /* Child returns zero */
- td2->td_frame->tf_eflags &= ~PSL_C; /* success */
- td2->td_frame->tf_edx = 1;
- /*
- * Set registers for trampoline to user mode. Leave space for the
- * return address on stack. These are the kernel mode register values.
- */
- pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
- pcb2->pcb_edi = 0;
- pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */
- pcb2->pcb_ebp = 0;
- pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
- pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */
- pcb2->pcb_eip = (int)fork_trampoline;
- pcb2->pcb_psl = PSL_KERNEL; /* ints disabled */
- pcb2->pcb_gs = rgs();
- /*-
- * pcb2->pcb_dr*: cloned above.
- * pcb2->pcb_savefpu: cloned above.
- * pcb2->pcb_flags: cloned above.
- * pcb2->pcb_onfault: cloned above (always NULL here?).
- * pcb2->pcb_gs: cloned above.
- * pcb2->pcb_ext: cleared below.
- */
-
- /*
- * XXX don't copy the i/o pages. this should probably be fixed.
- */
- pcb2->pcb_ext = 0;
-
- /* Copy the LDT, if necessary. */
- mtx_lock_spin(&sched_lock);
-
- if (mdp2->md_ldt != 0) {
- if (flags & RFMEM) {
- mdp2->md_ldt->ldt_refcnt++;
- } else {
- mdp2->md_ldt = user_ldt_alloc(mdp2,
- mdp2->md_ldt->ldt_len);
- if (mdp2->md_ldt == NULL)
- panic("could not copy LDT");
- }
- }
- mtx_unlock_spin(&sched_lock);
-
- /*
- * Now, cpu_switch() can schedule the new process.
- * pcb_esp is loaded pointing to the cpu_switch() stack frame
- * containing the return address when exiting cpu_switch.
- * This will normally be to fork_trampoline(), which will have
- * %ebx loaded with the new proc's pointer. fork_trampoline()
- * will set up a stack to call fork_return(p, frame); to complete
- * the return to user-mode.
- */
-}
-
-/*
- * Intercept the return address from a freshly forked process that has NOT
- * been scheduled yet.
- *
- * This is needed to make kernel threads stay in kernel mode.
- */
-void
-cpu_set_fork_handler(td, func, arg)
- struct thread *td;
- void (*func)(void *);
- void *arg;
-{
- /*
- * Note that the trap frame follows the args, so the function
- * is really called like this: func(arg, frame);
- */
- td->td_pcb->pcb_esi = (int) func; /* function */
- td->td_pcb->pcb_ebx = (int) arg; /* first arg */
-}
-
-void
-cpu_exit(struct thread *td)
-{
- struct mdproc *mdp;
- struct pcb *pcb = td->td_pcb;
-
-
- /* Reset pc->pcb_gs and %gs before possibly invalidating it. */
- mdp = &td->td_proc->p_md;
- if (mdp->md_ldt) {
- td->td_pcb->pcb_gs = _udatasel;
- load_gs(_udatasel);
- user_ldt_free(td);
- }
- if (pcb->pcb_flags & PCB_DBREGS) {
- /* disable all hardware breakpoints */
- reset_dbregs();
- pcb->pcb_flags &= ~PCB_DBREGS;
- }
-}
-
-void
-cpu_thread_exit(struct thread *td)
-{
- struct pcb *pcb = td->td_pcb;
-#ifdef DEV_NPX
- if (td == PCPU_GET(fpcurthread))
- npxdrop();
-#endif
- if (pcb->pcb_flags & PCB_DBREGS) {
- /* disable all hardware breakpoints */
- reset_dbregs();
- pcb->pcb_flags &= ~PCB_DBREGS;
- }
-}
-
-void
-cpu_thread_clean(struct thread *td)
-{
- struct pcb *pcb;
-
- pcb = td->td_pcb;
- if (pcb->pcb_ext != 0) {
- /* XXXKSE XXXSMP not SMP SAFE.. what locks do we have? */
- /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */
- /*
- * XXX do we need to move the TSS off the allocated pages
- * before freeing them? (not done here)
- */
- kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
- ctob(IOPAGES + 1));
- pcb->pcb_ext = 0;
- }
-}
-
-void
-cpu_thread_swapin(struct thread *td)
-{
-}
-
-void
-cpu_thread_swapout(struct thread *td)
-{
-}
-
-void
-cpu_thread_setup(struct thread *td)
-{
-
- td->td_pcb =
- (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) -
1;
- td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
- td->td_pcb->pcb_ext = NULL;
-}
-
-/*
- * Initialize machine state (pcb and trap frame) for a new thread about to
- * upcall. Pu t enough state in the new thread's PCB to get it to go back
- * userret(), where we can intercept it again to set the return (upcall)
- * Address and stack, along with those from upcals that are from other sources
- * such as those generated in thread_userret() itself.
- */
-void
-cpu_set_upcall(struct thread *td, struct thread *td0)
-{
- struct pcb *pcb2;
-
- /* Point the pcb to the top of the stack. */
- pcb2 = td->td_pcb;
-
- /*
- * Copy the upcall pcb. This loads kernel regs.
- * Those not loaded individually below get their default
- * values here.
- *
- * XXXKSE It might be a good idea to simply skip this as
- * the values of the other registers may be unimportant.
- * This would remove any requirement for knowing the KSE
- * at this time (see the matching comment below for
- * more analysis) (need a good safe default).
- */
- bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
- pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE);
-
- /*
- * Create a new fresh stack for the new thread.
- * Don't forget to set this stack value into whatever supplies
- * the address for the fault handlers.
- * The contexts are filled in at the time we actually DO the
- * upcall as only then do we know which KSE we got.
- */
- bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
-
- /*
- * Set registers for trampoline to user mode. Leave space for the
- * return address on stack. These are the kernel mode register values.
- */
-#ifdef PAE
- pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt);
-#else
- pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir);
-#endif
- pcb2->pcb_edi = 0;
- pcb2->pcb_esi = (int)fork_return; /* trampoline arg */
- pcb2->pcb_ebp = 0;
- pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
- pcb2->pcb_ebx = (int)td; /* trampoline arg */
- pcb2->pcb_eip = (int)fork_trampoline;
- pcb2->pcb_psl &= ~(PSL_I); /* interrupts must be disabled */
- pcb2->pcb_gs = rgs();
- /*
- * If we didn't copy the pcb, we'd need to do the following registers:
- * pcb2->pcb_dr*: cloned above.
- * pcb2->pcb_savefpu: cloned above.
- * pcb2->pcb_flags: cloned above.
- * pcb2->pcb_onfault: cloned above (always NULL here?).
- * pcb2->pcb_gs: cloned above. XXXKSE ???
- * pcb2->pcb_ext: cleared below.
- */
- pcb2->pcb_ext = NULL;
-}
-
-/*
- * Set that machine state for performing an upcall that has to
- * be done in thread_userret() so that those upcalls generated
- * in thread_userret() itself can be done as well.
- */
-void
-cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku)
-{
-
- /*
- * Do any extra cleaning that needs to be done.
- * The thread may have optional components
- * that are not present in a fresh thread.
- * This may be a recycled thread so make it look
- * as though it's newly allocated.
- */
- cpu_thread_clean(td);
-
- /*
- * Set the trap frame to point at the beginning of the uts
- * function.
- */
- td->td_frame->tf_ebp = 0;
- td->td_frame->tf_esp =
- (int)ku->ku_stack.ss_sp + ku->ku_stack.ss_size - 16;
- td->td_frame->tf_eip = (int)ku->ku_func;
-
- /*
- * Pass the address of the mailbox for this kse to the uts
- * function as a parameter on the stack.
- */
- suword((void *)(td->td_frame->tf_esp + sizeof(void *)),
- (int)ku->ku_mailbox);
-}
-
-/*
- * Convert kernel VA to physical address
- */
-vm_paddr_t
-kvtop(void *addr)
-{
- vm_paddr_t pa;
-
- pa = pmap_kextract((vm_offset_t)addr);
- if (pa == 0)
- panic("kvtop: zero page frame");
- return (pa);
-}
-
-/*
- * Force reset the processor by invalidating the entire address space!
- */
-
-#if 0
-#ifdef SMP
-static void
-cpu_reset_proxy()
-{
-
- cpu_reset_proxy_active = 1;
- while (cpu_reset_proxy_active == 1)
- ; /* Wait for other cpu to see that we've started */
- stop_cpus((1<<cpu_reset_proxyid));
- printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
- DELAY(1000000);
- cpu_reset();
-}
-#endif
-#endif
-void
-cpu_reset()
-{
- HYPERVISOR_shutdown();
-}
-
-
-/*
- * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
- */
-static void
-sf_buf_init(void *arg)
-{
- struct sf_buf *sf_bufs;
- vm_offset_t sf_base;
- int i;
-
- nsfbufs = NSFBUFS;
- TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
-
- sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
- TAILQ_INIT(&sf_buf_freelist);
- sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);
- sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
- M_NOWAIT | M_ZERO);
- for (i = 0; i < nsfbufs; i++) {
- sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
- TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
- }
- sf_buf_alloc_want = 0;
- mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
-}
-
-/*
- * Get an sf_buf from the freelist. Will block if none are available.
- */
-struct sf_buf *
-sf_buf_alloc(struct vm_page *m, int pri)
-{
- struct sf_head *hash_list;
- struct sf_buf *sf;
- int error;
-
- hash_list = &sf_buf_active[SF_BUF_HASH(m)];
- mtx_lock(&sf_buf_lock);
- LIST_FOREACH(sf, hash_list, list_entry) {
- if (sf->m == m) {
- sf->ref_count++;
- if (sf->ref_count == 1) {
- TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
- nsfbufsused++;
- nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
- }
- goto done;
- }
- }
- while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
- sf_buf_alloc_want++;
- mbstat.sf_allocwait++;
- error = msleep(&sf_buf_freelist, &sf_buf_lock, PVM | pri,
- "sfbufa", 0);
- sf_buf_alloc_want--;
-
- /*
- * If we got a signal, don't risk going back to sleep.
- */
- if (error)
- goto done;
- }
- TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
- if (sf->m != NULL)
- LIST_REMOVE(sf, list_entry);
- LIST_INSERT_HEAD(hash_list, sf, list_entry);
- sf->ref_count = 1;
- sf->m = m;
- nsfbufsused++;
- nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
- pmap_qenter(sf->kva, &sf->m, 1);
-done:
- mtx_unlock(&sf_buf_lock);
- return (sf);
-}
-
-/*
- * Detatch mapped page and release resources back to the system.
- */
-void
-sf_buf_free(struct sf_buf *sf)
-{
- mtx_lock(&sf_buf_lock);
- sf->ref_count--;
- if (sf->ref_count == 0) {
- TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
- nsfbufsused--;
- /* XEN only */
- pmap_qremove(sf->kva, 1);
- sf->m = NULL;
- LIST_REMOVE(sf, list_entry);
- /* ----- */
- if (sf_buf_alloc_want > 0)
- wakeup_one(&sf_buf_freelist);
- }
- mtx_unlock(&sf_buf_lock);
-}
-
-/*
- * Software interrupt handler for queued VM system processing.
- */
-void
-swi_vm(void *dummy)
-{
- if (busdma_swi_pending != 0)
- busdma_swi();
-}
-
-/*
- * Tell whether this address is in some physical memory region.
- * Currently used by the kernel coredump code in order to avoid
- * dumping the ``ISA memory hole'' which could cause indefinite hangs,
- * or other unpredictable behaviour.
- */
-
-int
-is_physical_memory(vm_paddr_t addr)
-{
-
-#ifdef DEV_ISA
- /* The ISA ``memory hole''. */
- if (addr >= 0xa0000 && addr < 0x100000)
- return 0;
-#endif
-
- /*
- * stuff other tests for known memory-mapped devices (PCI?)
- * here
- */
-
- return 1;
-}
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_bus.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_bus.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,238 +0,0 @@
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/kernel.h>
-#include <machine/bus.h>
-#include <sys/rman.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <machine/resource.h>
-
-#include <machine/xen-os.h>
-#include <machine/hypervisor.h>
-#include <machine/xen_intr.h>
-
-static MALLOC_DEFINE(M_XENDEV, "xenintrdrv", "xen system device");
-
-struct xenbus_device {
- struct resource_list xen_resources;
-};
-
-#define DEVTOXEN(dev) ((struct xenbus_device *)device_get_ivars(dev))
-
-static void xenbus_identify(driver_t *, device_t);
-static int xenbus_probe(device_t);
-static int xenbus_attach(device_t);
-static int xenbus_print_child(device_t, device_t);
-static device_t xenbus_add_child(device_t bus, int order, const char *name,
- int unit);
-static struct resource *xenbus_alloc_resource(device_t, device_t, int, int *,
- u_long, u_long, u_long, u_int);
-static int xenbus_release_resource(device_t, device_t, int, int,
- struct resource *);
-static int xenbus_set_resource(device_t, device_t, int, int, u_long, u_long);
-static int xenbus_get_resource(device_t, device_t, int, int, u_long *, u_long
*);
-static void xenbus_delete_resource(device_t, device_t, int, int);
-
-
-static device_method_t xenbus_methods[] = {
- /* Device interface */
- DEVMETHOD(device_identify, xenbus_identify),
- DEVMETHOD(device_probe, xenbus_probe),
- DEVMETHOD(device_attach, xenbus_attach),
- DEVMETHOD(device_detach, bus_generic_detach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- /* Bus interface */
- DEVMETHOD(bus_print_child, xenbus_print_child),
- DEVMETHOD(bus_add_child, xenbus_add_child),
- DEVMETHOD(bus_read_ivar, bus_generic_read_ivar),
- DEVMETHOD(bus_write_ivar, bus_generic_write_ivar),
- DEVMETHOD(bus_set_resource, xenbus_set_resource),
- DEVMETHOD(bus_get_resource, xenbus_get_resource),
- DEVMETHOD(bus_alloc_resource, xenbus_alloc_resource),
- DEVMETHOD(bus_release_resource, xenbus_release_resource),
- DEVMETHOD(bus_delete_resource, xenbus_delete_resource),
- DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
- DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
- DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
- DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
-
- { 0, 0 }
-};
-
-
-static driver_t xenbus_driver = {
- "xenbus",
- xenbus_methods,
- 1, /* no softc */
-};
-static devclass_t xenbus_devclass;
-static device_t xenbus_dev;
-static boolean_t xenbus_probe_delay = TRUE; /* delay child probes */
-
-DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0);
-
-static void
-xenbus_identify(driver_t *driver, device_t parent)
-{
-
- /*
- * Add child device with order of 0 so it gets probed
- * first
- */
- xenbus_dev = BUS_ADD_CHILD(parent, 0, "xenbus", 0);
- if (xenbus_dev == NULL)
- panic("xenbus: could not attach");
-}
-
-static int
-xenbus_probe(device_t dev)
-{
- device_set_desc(dev, "xen system");
- device_quiet(dev);
- return (0);
-}
-
-static int
-xenbus_attach(device_t dev)
-{
- /*
- * First, let our child driver's identify any child devices that
- * they can find. Once that is done attach any devices that we
- * found.
- */
- if (!xenbus_probe_delay) {
- bus_generic_probe(dev);
- bus_generic_attach(dev);
- }
-
- return 0;
-}
-
-
-static int
-xenbus_print_all_resources(device_t dev)
-{
- struct xenbus_device *xdev = device_get_ivars(dev);
- struct resource_list *rl = &xdev->xen_resources;
- int retval = 0;
-
- if (SLIST_FIRST(rl))
- retval += printf(" at");
-
- retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
- retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#lx");
- retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
-
- return retval;
-}
-
-
-static int
-xenbus_print_child(device_t bus, device_t child)
-{
- int retval = 0;
-
- retval += bus_print_child_header(bus, child);
- retval += xenbus_print_all_resources(child);
- retval += printf(" on motherboard\n"); /* XXX "motherboard", ick */
-
- return (retval);
-}
-
-static device_t
-xenbus_add_child(device_t bus, int order, const char *name, int unit)
-{
- device_t child;
- struct xenbus_device *xendev;
-
- xendev = malloc(sizeof(struct xenbus_device), M_XENDEV,
- M_NOWAIT | M_ZERO);
- if (!xendev)
- return(0);
- resource_list_init(&xendev->xen_resources);
-
- child = device_add_child_ordered(bus, order, name, unit);
-
- /* should we free this in xenbus_child_detached? */
- device_set_ivars(child, xendev);
-
- return(child);
-}
-
-static struct resource *
-xenbus_alloc_resource(device_t bus, device_t child, int type, int *rid,
- u_long start, u_long end, u_long count, u_int flags)
-{
- struct xenbus_device *xendev = DEVTOXEN(child);
- struct resource_list *rl = &xendev->xen_resources;
-
- return (resource_list_alloc(rl, bus, child, type, rid, start, end,
- count, flags));
-}
-
-
-static int
-xenbus_release_resource(device_t bus, device_t child, int type, int rid,
- struct resource *r)
-{
- struct xenbus_device *xendev = DEVTOXEN(child);
- struct resource_list *rl = &xendev->xen_resources;
-
- return (resource_list_release(rl, bus, child, type, rid, r));
-}
-
-static int
-xenbus_set_resource(device_t dev, device_t child, int type, int rid,
- u_long start, u_long count)
-{
- struct xenbus_device *xendev = DEVTOXEN(child);
- struct resource_list *rl = &xendev->xen_resources;
-
- resource_list_add(rl, type, rid, start, start + count - 1, count);
- return(0);
-}
-
-static int
-xenbus_get_resource(device_t dev, device_t child, int type, int rid,
- u_long *startp, u_long *countp)
-{
- struct xenbus_device *xendev = DEVTOXEN(child);
- struct resource_list *rl = &xendev->xen_resources;
- struct resource_list_entry *rle;
-
- rle = resource_list_find(rl, type, rid);
- if (!rle)
- return(ENOENT);
- if (startp)
- *startp = rle->start;
- if (countp)
- *countp = rle->count;
- return(0);
-}
-
-static void
-xenbus_delete_resource(device_t dev, device_t child, int type, int rid)
-{
- struct xenbus_device *xendev = DEVTOXEN(child);
- struct resource_list *rl = &xendev->xen_resources;
-
- resource_list_delete(rl, type, rid);
-}
-
-static void
-xenbus_init(void *unused)
-{
- xenbus_probe_delay = FALSE;
- xenbus_attach(xenbus_dev);
-}
-SYSINIT(xenbusdev, SI_SUB_PSEUDO, SI_ORDER_FIRST, xenbus_init, NULL);
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,605 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004,2005 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <sys/cdefs.h>
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/mount.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/reboot.h>
-
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/stdarg.h>
-#include <machine/xenfunc.h>
-#include <machine/xenpmap.h>
-#include <machine/vmparam.h>
-#include <machine/cpu.h>
-#include <machine/xenvar.h>
-
-#include <sys/socket.h>
-#include <sys/sockio.h>
-#include <net/if.h>
-#include <net/if_dl.h>
-#include <net/if_types.h>
-#include <net/if_var.h>
-#include <net/ethernet.h>
-#include <netinet/in.h>
-#include <sys/mbuf.h>
-#include <nfs/rpcv2.h>
-#include <nfsclient/krpc.h>
-#include <nfs/nfsproto.h>
-
-
-shared_info_t *HYPERVISOR_shared_info;
-
-void ni_cli(void);
-void ni_sti(void);
-#ifdef NFS_ROOT
-
-static int
-xdr_opaque_decode(struct mbuf **mptr, u_char *buf, int len)
-{
- struct mbuf *m;
- int alignedlen;
-
- m = *mptr;
- alignedlen = ( len + 3 ) & ~3;
-
- if (m->m_len < alignedlen) {
- m = m_pullup(m, alignedlen);
- if (m == NULL) {
- *mptr = NULL;
- return EBADRPC;
- }
- }
- bcopy(mtod(m, u_char *), buf, len);
- m_adj(m, alignedlen);
- *mptr = m;
- return 0;
-}
-
-
-static int
-getdec(char **ptr)
-{
- char *p;
- int ret;
-
- p = *ptr;
- ret = 0;
- if ((*p < '0') || (*p > '9'))
- return -1;
- while ((*p >= '0') && (*p <= '9')) {
- ret = ret * 10 + (*p - '0');
- p++;
- }
- *ptr = p;
- return ret;
-}
-
-int
-setinaddr(struct sockaddr_in *addr, char *ipstr)
-{
- unsigned int ip;
- int val;
-
- ip = 0;
- if (((val = getdec(&ipstr)) < 0) || (val > 255))
- return 1;
- ip = val << 24;
- if (*ipstr != '.')
- return 1;
- ipstr++;
- if (((val = getdec(&ipstr)) < 0) || (val > 255))
- return 1;
- ip |= (val << 16);
- if (*ipstr != '.')
- return 1;
- ipstr++;
- if (((val = getdec(&ipstr)) < 0) || (val > 255))
- return 1;
- ip |= (val << 8);
- if (*ipstr != '.')
- return 1;
- ipstr++;
- if (((val = getdec(&ipstr)) < 0) || (val > 255))
- return 1;
- ip |= val;
-
- addr->sin_addr.s_addr = htonl(ip);
- addr->sin_len = sizeof(struct sockaddr_in);
- addr->sin_family = AF_INET;
-
- return 0;
-}
-
-static int
-hwaddr_to_sockaddr(char *ev, struct sockaddr_dl *sa)
-{
- char *cp;
- u_int32_t a[6];
- int count;
-
- bzero(sa, sizeof(*sa));
- sa->sdl_len = sizeof(*sa);
- sa->sdl_family = AF_LINK;
- sa->sdl_type = IFT_ETHER;
- sa->sdl_alen = ETHER_ADDR_LEN;
- if ((cp = getenv(ev)) == NULL)
- return (1);
- count = sscanf(cp, "%x:%x:%x:%x:%x:%x",
- &a[0], &a[1], &a[2], &a[3], &a[4], &a[5]);
- freeenv(cp);
- if (count != 6)
- return (1);
- sa->sdl_data[0] = a[0];
- sa->sdl_data[1] = a[1];
- sa->sdl_data[2] = a[2];
- sa->sdl_data[3] = a[3];
- sa->sdl_data[4] = a[4];
- sa->sdl_data[5] = a[5];
- return (0);
-}
-extern int in_control(struct socket *so, u_long cmd,
- caddr_t data, struct ifnet *ifp,
- struct thread *td);
-
-static int
-xen_setnetwork(void)
-{
- int error = 0;
- struct ifaddr *ifa;
- struct ifnet *ifp;
- struct sockaddr_dl *sdl, ourdl;
-
- if (sizeof(struct sockaddr) != sizeof(struct sockaddr_in))
- panic("sizes not equal\n");
-
- if (hwaddr_to_sockaddr("boot.netif.hwaddr", &ourdl)) {
- printf("nfs_diskless: no hardware address\n");
- return -1;
- }
-
-
- ifa = NULL;
- IFNET_RLOCK();
- TAILQ_FOREACH(ifp, &ifnet, if_link) {
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if ((ifa->ifa_addr->sa_family == AF_LINK) &&
- (sdl = ((struct sockaddr_dl *)ifa->ifa_addr))) {
- if ((sdl->sdl_type == ourdl.sdl_type) &&
- (sdl->sdl_alen == ourdl.sdl_alen) &&
- !bcmp(sdl->sdl_data + sdl->sdl_nlen,
- ourdl.sdl_data + ourdl.sdl_nlen,
- sdl->sdl_alen)) {
- IFNET_RUNLOCK();
- goto match_done;
- }
- }
- }
- }
- IFNET_RUNLOCK();
- printf("nfs_diskless: no interface\n");
- return -1; /* no matching interface */
- match_done:
-
- if (getenv("boot.netif.ip") && getenv("boot.netif.gateway") &&
- getenv("boot.netif.netmask")) {
- struct ifaliasreq ifra;
- char *ip;
-
- bzero(&ifra, sizeof(ifra));
- strcpy(ifra.ifra_name, "xn0");
- ip = getenv("boot.netif.ip");
- setinaddr((struct sockaddr_in *)&(ifra.ifra_addr), ip);
- printf("setting ip to %s\n", ip);
- ip = getenv("boot.netif.netmask");
- setinaddr((struct sockaddr_in *)&ifra.ifra_mask, ip);
- setinaddr((struct sockaddr_in *)&ifra.ifra_broadaddr,
"255.255.255.255");
-
-
- if ((error = in_control(NULL, SIOCAIFADDR, (caddr_t) &ifra, ifp,
curthread)))
- printf("couldn't set interface address %d\n", error);
-#if 0
- if ((error = xn_ioctl(ifp, SIOCSIFNETMASK, (caddr_t)&ifa)))
- printf("couldn't set interface netmask %d\n", error);
-#endif
- }
- return error;
-}
-
-int
-xen_setnfshandle(void)
-{
- char *path, *ip;
- u_char fhp[NFSX_V2FH];
- int error = 0;
- struct sockaddr_in sin_local, *sin ;
- struct mbuf *m;
-
- if ((error = xen_setnetwork()))
- return error;
-
- sin = &sin_local;
-
- path = getenv("boot.nfsroot.path");
- ip = getenv("boot.nfsroot.server");
-
- /* we aren't configured for NFS root */
- if (!path || !ip)
- return 0;
-
- error = setinaddr(sin, ip);
- if (error) {
- printf("invalid ip address %s\n", ip);
- return error;
- }
-
- error = krpc_portmap(sin, RPCPROG_MNT, RPCMNT_VER1,
- &sin->sin_port, curthread);
- if (error) {
- printf("failed to find port number for mountd\n");
- return error;
- }
- m = xdr_string_encode(path, strlen(path));
-
- /* Do RPC to mountd */
- error = krpc_call(sin, RPCPROG_MNT, RPCMNT_VER1,
- RPCMNT_MOUNT, &m, NULL, curthread);
- if (error) {
- printf("call to mountd failed\n");
- return error;
- }
-
- if (xdr_opaque_decode(&m, fhp, NFSX_V2FH) != 0) {
- printf("failed to decode nfs file handle\n");
- return error;
- }
-
- setenv("boot.nfsroot.nfshandle", fhp);
-
- return 0;
-}
-#endif
-void
-ni_cli(void)
-{
- __asm__("pushl %edx;"
- "pushl %eax;"
- );
- __cli();
- __asm__("popl %eax;"
- "popl %edx;"
- );
-}
-
-
-void
-ni_sti(void)
-{
- __asm__("pushl %edx;"
- "pushl %esi;"
- "pushl %eax;"
- );
- __sti();
- __asm__("popl %eax;"
- "popl %esi;"
- "popl %edx;"
- );
-}
-
-/*
- * Modify the cmd_line by converting ',' to NULLs so that it is in a format
- * suitable for the static env vars.
- */
-char *
-xen_setbootenv(char *cmd_line)
-{
- char *cmd_line_next;
-
- for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
- return cmd_line;
-}
-
-static struct
-{
- const char *ev;
- int mask;
-} howto_names[] = {
- {"boot_askname", RB_ASKNAME},
- {"boot_cdrom", RB_CDROM},
- {"boot_userconfig", RB_CONFIG},
- {"boot_ddb", RB_KDB},
- {"boot_gdb", RB_GDB},
- {"boot_gdb_pause", RB_GDB_PAUSE},
- {"boot_single", RB_SINGLE},
- {"boot_verbose", RB_VERBOSE},
- {"boot_multicons", RB_MULTIPLE},
- {"boot_serial", RB_SERIAL},
- {NULL, 0}
-};
-
-int
-xen_boothowto(char *envp)
-{
- int i, howto = 0;
-
- /* get equivalents from the environment */
- for (i = 0; howto_names[i].ev != NULL; i++)
- if (getenv(howto_names[i].ev) != NULL)
- howto |= howto_names[i].mask;
- return howto;
-}
-
-#define PRINTK_BUFSIZE 1024
-void
-printk(const char *fmt, ...)
-{
- __va_list ap;
- int ret;
- static char buf[PRINTK_BUFSIZE];
-
- va_start(ap, fmt);
- ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
- va_end(ap);
- buf[ret] = 0;
- (void)HYPERVISOR_console_write(buf, ret);
-}
-
-
-#define XPQUEUE_SIZE 128
-#ifdef SMP
-/* per-cpu queues and indices */
-static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE];
-static int xpq_idx[MAX_VIRT_CPUS];
-
-#define XPQ_QUEUE xpq_queue[vcpu]
-#define XPQ_IDX xpq_idx[vcpu]
-#define SET_VCPU() int vcpu = smp_processor_id()
-#else
-static mmu_update_t xpq_queue[XPQUEUE_SIZE];
-static int xpq_idx = 0;
-
-#define XPQ_QUEUE xpq_queue
-#define XPQ_IDX xpq_idx
-#define SET_VCPU()
-#endif
-#define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
-
-
-static __inline void
-_xen_flush_queue(void)
-{
- SET_VCPU();
- int _xpq_idx = XPQ_IDX;
- int error, i;
- /* window of vulnerability here? */
-
- XPQ_IDX = 0;
- /* Make sure index is cleared first to avoid double updates. */
- error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
- _xpq_idx, NULL, DOMID_SELF);
-
- if (__predict_false(error < 0)) {
- for (i = 0; i < _xpq_idx; i++)
- printk("val: %x ptr: %p\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
- panic("Failed to execute MMU updates: %d", error);
- }
-
-}
-
-void
-xen_flush_queue(void)
-{
- SET_VCPU();
- if (XPQ_IDX != 0) _xen_flush_queue();
-}
-
-static __inline void
-xen_increment_idx(void)
-{
- SET_VCPU();
-
- XPQ_IDX++;
- if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
- xen_flush_queue();
-}
-
-void
-xen_invlpg(vm_offset_t va)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_INVLPG_LOCAL;
- op.linear_addr = va & ~PAGE_MASK;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-load_cr3(uint32_t val)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_NEW_BASEPTR;
- op.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-
-void
-xen_machphys_update(unsigned long mfn, unsigned long pfn)
-{
- SET_VCPU();
-
- XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- XPQ_QUEUE[XPQ_IDX].val = pfn;
- xen_increment_idx();
- _xen_flush_queue();
-}
-
-void
-xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val)
-{
- SET_VCPU();
-
- XPQ_QUEUE[XPQ_IDX].ptr = (memory_t)ptr;
- XPQ_QUEUE[XPQ_IDX].val = (memory_t)val;
- xen_increment_idx();
-}
-
-void
-xen_pgd_pin(unsigned long ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L2_TABLE;
- op.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pgd_unpin(unsigned long ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pt_pin(unsigned long ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L1_TABLE;
- op.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pt_unpin(unsigned long ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_set_ldt(unsigned long ptr, unsigned long len)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_SET_LDT;
- op.linear_addr = ptr;
- op.nr_ents = len;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_tlb_flush(void)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-
-/********** CODE WORTH KEEPING ABOVE HERE *****************/
-
-void xen_failsafe_handler(void);
-
-void
-xen_failsafe_handler(void)
-{
-
- panic("xen_failsafe_handler called!\n");
-}
-
-
-void
-xen_update_descriptor(union descriptor *table, union descriptor *entry)
-{
- vm_paddr_t pa;
- pt_entry_t *ptp;
- uint32_t raw[2];
-
- bcopy(entry, raw, 2*sizeof(int32_t));
- ptp = vtopte((vm_offset_t)table);
- pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
- if (HYPERVISOR_update_descriptor(pa, raw[0], raw[1]))
- panic("HYPERVISOR_update_descriptor failed\n");
-}
-
-
-
-#if defined(XENDEBUG)
-static void
-xpmap_dump_pt(pt_entry_t *ptp, int p)
-{
- pt_entry_t pte;
- int j;
- int bufpos;
-
- pte = xpmap_ptom((uint32_t)ptp - KERNTEXTOFF);
- PRINTK(("%03x: %p(%p) %08x\n", p, ptp, (void *)pte, p << PDRSHIFT));
-
- bufpos = 0;
- for (j = 0; j < PTES_PER_PTP; j++) {
- if ((ptp[j] & PG_V) == 0)
- continue;
- pte = ptp[j] /* & PG_FRAME */;
- bufpos += sprintf(XBUF + bufpos, "%x:%03x:%08x ",
- p, j, pte);
- if (bufpos > 70) {
- int k;
- sprintf(XBUF + bufpos, "\n");
- PRINTK((XBUF));
- bufpos = 0;
- for (k = 0; k < 1000000; k++);
- }
- }
- if (bufpos) {
- PRINTK((XBUF));
- bufpos = 0;
- }
-}
-#endif
-
-
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,601 +0,0 @@
-/*-
- * Copyright (c) 1993 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/i386/include/cpufunc.h,v 1.135 2003/08/06 18:21:27 bde
Exp $
- */
-
-/*
- * Functions to provide access to special i386 instructions.
- * This in included in sys/systm.h, and that file should be
- * used in preference to this.
- */
-
-#ifndef _MACHINE_CPUFUNC_H_
-#define _MACHINE_CPUFUNC_H_
-
-#include <sys/cdefs.h>
-#include <machine/psl.h>
-#define NO_EXCHANGE
-#include <machine/xen-os.h>
-#include <machine/evtchn.h>
-#include <machine/xenvar.h>
-struct thread;
-struct region_descriptor;
-
-__BEGIN_DECLS
-#define readb(va) (*(volatile u_int8_t *) (va))
-#define readw(va) (*(volatile u_int16_t *) (va))
-#define readl(va) (*(volatile u_int32_t *) (va))
-
-#define writeb(va, d) (*(volatile u_int8_t *) (va) = (d))
-#define writew(va, d) (*(volatile u_int16_t *) (va) = (d))
-#define writel(va, d) (*(volatile u_int32_t *) (va) = (d))
-
-static __inline u_int
-read_eflags(void)
-{
- u_int ef;
- __asm __volatile("pushfl; popl %0" : "=r" (ef));
- return (ef);
-}
-
-static __inline void
-write_eflags(u_int ef)
-{
- __asm __volatile("pushl %0; popfl" : : "r" (ef));
-}
-#ifdef __GNUC__
-
-static __inline void
-breakpoint(void)
-{
- __asm __volatile("int $3");
-}
-
-static __inline u_int
-bsfl(u_int mask)
-{
- u_int result;
-
- __asm __volatile("bsfl %1,%0" : "=r" (result) : "rm" (mask));
- return (result);
-}
-
-static __inline u_int
-bsrl(u_int mask)
-{
- u_int result;
-
- __asm __volatile("bsrl %1,%0" : "=r" (result) : "rm" (mask));
- return (result);
-}
-static __inline void
-disable_intr(void)
-{
- __cli();
-}
-static __inline void
-do_cpuid(u_int ax, u_int *p)
-{
- __asm __volatile("cpuid"
- : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
- : "0" (ax));
-}
-
-static __inline void
-enable_intr(void)
-{
- __sti();
-}
-
-
-#define HAVE_INLINE_FFS
-
-static __inline int
-ffs(int mask)
-{
- /*
- * Note that gcc-2's builtin ffs would be used if we didn't declare
- * this inline or turn off the builtin. The builtin is faster but
- * broken in gcc-2.4.5 and slower but working in gcc-2.5 and later
- * versions.
- */
- return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1);
-}
-
-#define HAVE_INLINE_FLS
-
-static __inline int
-fls(int mask)
-{
- return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
-}
-
-static __inline void
-halt(void)
-{
- __asm __volatile("hlt");
-}
-
-#if __GNUC__ < 2
-
-#define inb(port) inbv(port)
-#define outb(port, data) outbv(port, data)
-
-#else /* __GNUC >= 2 */
-
-/*
- * The following complications are to get around gcc not having a
- * constraint letter for the range 0..255. We still put "d" in the
- * constraint because "i" isn't a valid constraint when the port
- * isn't constant. This only matters for -O0 because otherwise
- * the non-working version gets optimized away.
- *
- * Use an expression-statement instead of a conditional expression
- * because gcc-2.6.0 would promote the operands of the conditional
- * and produce poor code for "if ((inb(var) & const1) == const2)".
- *
- * The unnecessary test `(port) < 0x10000' is to generate a warning if
- * the `port' has type u_short or smaller. Such types are pessimal.
- * This actually only works for signed types. The range check is
- * careful to avoid generating warnings.
- */
-#define inb(port) __extension__ ({
\
- u_char _data; \
- if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
- && (port) < 0x10000) \
- _data = inbc(port); \
- else \
- _data = inbv(port); \
- _data; })
-
-#define outb(port, data) (
\
- __builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
- && (port) < 0x10000 \
- ? outbc(port, data) : outbv(port, data))
-
-static __inline u_char
-inbc(u_int port)
-{
- u_char data;
-
- __asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
- return (data);
-}
-
-static __inline void
-outbc(u_int port, u_char data)
-{
- __asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
-}
-
-#endif /* __GNUC <= 2 */
-
-static __inline u_char
-inbv(u_int port)
-{
- u_char data;
- /*
- * We use %%dx and not %1 here because i/o is done at %dx and not at
- * %edx, while gcc generates inferior code (movw instead of movl)
- * if we tell it to load (u_short) port.
- */
- __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
- return (data);
-}
-
-static __inline u_int
-inl(u_int port)
-{
- u_int data;
-
- __asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
- return (data);
-}
-
-static __inline void
-insb(u_int port, void *addr, size_t cnt)
-{
- __asm __volatile("cld; rep; insb"
- : "+D" (addr), "+c" (cnt)
- : "d" (port)
- : "memory");
-}
-
-static __inline void
-insw(u_int port, void *addr, size_t cnt)
-{
- __asm __volatile("cld; rep; insw"
- : "+D" (addr), "+c" (cnt)
- : "d" (port)
- : "memory");
-}
-
-static __inline void
-insl(u_int port, void *addr, size_t cnt)
-{
- __asm __volatile("cld; rep; insl"
- : "+D" (addr), "+c" (cnt)
- : "d" (port)
- : "memory");
-}
-
-static __inline void
-invd(void)
-{
- __asm __volatile("invd");
-}
-
-static __inline u_short
-inw(u_int port)
-{
- u_short data;
-
- __asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
- return (data);
-}
-
-static __inline void
-outbv(u_int port, u_char data)
-{
- u_char al;
- /*
- * Use an unnecessary assignment to help gcc's register allocator.
- * This make a large difference for gcc-1.40 and a tiny difference
- * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
- * best results. gcc-2.6.0 can't handle this.
- */
- al = data;
- __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
-}
-
-static __inline void
-outl(u_int port, u_int data)
-{
- /*
- * outl() and outw() aren't used much so we haven't looked at
- * possible micro-optimizations such as the unnecessary
- * assignment for them.
- */
- __asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
-}
-
-static __inline void
-outsb(u_int port, const void *addr, size_t cnt)
-{
- __asm __volatile("cld; rep; outsb"
- : "+S" (addr), "+c" (cnt)
- : "d" (port));
-}
-
-static __inline void
-outsw(u_int port, const void *addr, size_t cnt)
-{
- __asm __volatile("cld; rep; outsw"
- : "+S" (addr), "+c" (cnt)
- : "d" (port));
-}
-
-static __inline void
-outsl(u_int port, const void *addr, size_t cnt)
-{
- __asm __volatile("cld; rep; outsl"
- : "+S" (addr), "+c" (cnt)
- : "d" (port));
-}
-
-static __inline void
-outw(u_int port, u_short data)
-{
- __asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
-}
-
-static __inline void
-ia32_pause(void)
-{
- __asm __volatile("pause");
-}
-
-static __inline u_int64_t
-rdmsr(u_int msr)
-{
- u_int64_t rv;
-
- __asm __volatile("rdmsr" : "=A" (rv) : "c" (msr));
- return (rv);
-}
-
-static __inline u_int64_t
-rdpmc(u_int pmc)
-{
- u_int64_t rv;
-
- __asm __volatile("rdpmc" : "=A" (rv) : "c" (pmc));
- return (rv);
-}
-
-static __inline u_int64_t
-rdtsc(void)
-{
- u_int64_t rv;
-
- __asm __volatile("rdtsc" : "=A" (rv));
- return (rv);
-}
-
-static __inline void
-wbinvd(void)
-{
- __asm __volatile("wbinvd");
-}
-
-static __inline void
-wrmsr(u_int msr, u_int64_t newval)
-{
- __asm __volatile("wrmsr" : : "A" (newval), "c" (msr));
-}
-
-static __inline u_int
-rfs(void)
-{
- u_int sel;
- __asm __volatile("movl %%fs,%0" : "=rm" (sel));
- return (sel);
-}
-
-static __inline u_int
-rgs(void)
-{
- u_int sel;
- __asm __volatile("movl %%gs,%0" : "=rm" (sel));
- return (sel);
-}
-
-static __inline void
-load_fs(u_int sel)
-{
- __asm __volatile("movl %0,%%fs" : : "rm" (sel));
-}
-
-static __inline void
-load_gs(u_int sel)
-{
- __asm __volatile("movl %0,%%gs" : : "rm" (sel));
-}
-
-/* void lidt(struct region_descriptor *addr); */
-static __inline void
-lidt(struct region_descriptor *addr)
-{
- __asm __volatile("lidt (%0)" : : "r" (addr));
-}
-
-static __inline u_int
-rdr0(void)
-{
- u_int data;
- __asm __volatile("movl %%dr0,%0" : "=r" (data));
- return (data);
-}
-
-static __inline void
-load_dr0(u_int dr0)
-{
- __asm __volatile("movl %0,%%dr0" : : "r" (dr0));
-}
-
-static __inline u_int
-rdr1(void)
-{
- u_int data;
- __asm __volatile("movl %%dr1,%0" : "=r" (data));
- return (data);
-}
-
-static __inline void
-load_dr1(u_int dr1)
-{
- __asm __volatile("movl %0,%%dr1" : : "r" (dr1));
-}
-
-static __inline u_int
-rdr2(void)
-{
- u_int data;
- __asm __volatile("movl %%dr2,%0" : "=r" (data));
- return (data);
-}
-
-static __inline void
-load_dr2(u_int dr2)
-{
- __asm __volatile("movl %0,%%dr2" : : "r" (dr2));
-}
-
-static __inline u_int
-rdr3(void)
-{
- u_int data;
- __asm __volatile("movl %%dr3,%0" : "=r" (data));
- return (data);
-}
-
-static __inline void
-load_dr3(u_int dr3)
-{
- __asm __volatile("movl %0,%%dr3" : : "r" (dr3));
-}
-
-static __inline u_int
-rdr4(void)
-{
- u_int data;
- __asm __volatile("movl %%dr4,%0" : "=r" (data));
- return (data);
-}
-
-static __inline void
-load_dr4(u_int dr4)
-{
- __asm __volatile("movl %0,%%dr4" : : "r" (dr4));
-}
-
-static __inline u_int
-rdr5(void)
-{
- u_int data;
- __asm __volatile("movl %%dr5,%0" : "=r" (data));
- return (data);
-}
-
-static __inline void
-load_dr5(u_int dr5)
-{
- __asm __volatile("movl %0,%%dr5" : : "r" (dr5));
-}
-
-static __inline u_int
-rdr6(void)
-{
- u_int data;
- __asm __volatile("movl %%dr6,%0" : "=r" (data));
- return (data);
-}
-
-static __inline void
-load_dr6(u_int dr6)
-{
- __asm __volatile("movl %0,%%dr6" : : "r" (dr6));
-}
-
-static __inline u_int
-rdr7(void)
-{
- u_int data;
- __asm __volatile("movl %%dr7,%0" : "=r" (data));
- return (data);
-}
-
-static __inline void
-load_dr7(u_int dr7)
-{
- __asm __volatile("movl %0,%%dr7" : : "r" (dr7));
-}
-
-static __inline register_t
-intr_disable(void)
-{
- register_t eflags;
-
- __save_and_cli(eflags);
- return (eflags);
-}
-
-static __inline void
-intr_restore(register_t eflags)
-{
- __restore_flags(eflags);
-}
-
-#else /* !__GNUC__ */
-
-int breakpoint(void);
-u_int bsfl(u_int mask);
-u_int bsrl(u_int mask);
-void cpu_invlpg(u_int addr);
-void cpu_invlpg_range(u_int start, u_int end);
-void disable_intr(void);
-void do_cpuid(u_int ax, u_int *p);
-void enable_intr(void);
-void halt(void);
-u_char inb(u_int port);
-u_int inl(u_int port);
-void insb(u_int port, void *addr, size_t cnt);
-void insl(u_int port, void *addr, size_t cnt);
-void insw(u_int port, void *addr, size_t cnt);
-void invd(void);
-void invlpg(u_int addr);
-void invlpg_range(u_int start, u_int end);
-void invltlb(void);
-u_short inw(u_int port);
-void load_cr3(u_int cr3);
-void load_cr4(u_int cr4);
-void load_fs(u_int sel);
-void load_gs(u_int sel);
-struct region_descriptor;
-void lidt(struct region_descriptor *addr);
-void ltr(u_short sel);
-void outb(u_int port, u_char data);
-void outl(u_int port, u_int data);
-void outsb(u_int port, void *addr, size_t cnt);
-void outsl(u_int port, void *addr, size_t cnt);
-void outsw(u_int port, void *addr, size_t cnt);
-void outw(u_int port, u_short data);
-void ia32_pause(void);
-u_int rcr2(void);
-u_int rcr3(void);
-u_int rcr4(void);
-u_int rfs(void);
-u_int rgs(void);
-u_int64_t rdmsr(u_int msr);
-u_int64_t rdpmc(u_int pmc);
-u_int64_t rdtsc(void);
-u_int read_eflags(void);
-void wbinvd(void);
-void write_eflags(u_int ef);
-void wrmsr(u_int msr, u_int64_t newval);
-u_int rdr0(void);
-void load_dr0(u_int dr0);
-u_int rdr1(void);
-void load_dr1(u_int dr1);
-u_int rdr2(void);
-void load_dr2(u_int dr2);
-u_int rdr3(void);
-void load_dr3(u_int dr3);
-u_int rdr4(void);
-void load_dr4(u_int dr4);
-u_int rdr5(void);
-void load_dr5(u_int dr5);
-u_int rdr6(void);
-void load_dr6(u_int dr6);
-u_int rdr7(void);
-void load_dr7(u_int dr7);
-register_t intr_disable(void);
-void intr_restore(register_t ef);
-
-#endif /* __GNUC__ */
-
-void reset_dbregs(void);
-
-__END_DECLS
-
-#endif /* !_MACHINE_CPUFUNC_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,120 +0,0 @@
-/******************************************************************************
- * ctrl_if.h
- *
- * Management functions for special interface to the domain controller.
- *
- * Copyright (c) 2004, K A Fraser
- */
-
-#ifndef __I386_XENO__CTRL_IF_H__
-#define __I386_XENO__CTRL_IF_H__
-
-#include <sys/taskqueue.h>
-#include <machine/hypervisor.h>
-
-
-typedef control_msg_t ctrl_msg_t;
-
-/*
- * Callback function type. Called for asynchronous processing of received
- * request messages, and responses to previously-transmitted request messages.
- * The parameters are (@msg, @id).
- * @msg: Original request/response message (not a copy). The message can be
- * modified in-place by the handler (e.g., a response callback can
- * turn a request message into a response message in place). The message
- * is no longer accessible after the callback handler returns -- if the
- * message is required to persist for longer then it must be copied.
- * @id: (Response callbacks only) The 'id' that was specified when the
- * original request message was queued for transmission.
- */
-typedef void (*ctrl_msg_handler_t)(ctrl_msg_t *, unsigned long);
-
-/*
- * Send @msg to the domain controller. Execute @hnd when a response is
- * received, passing the response message and the specified @id. This
- * operation will not block: it will return -EAGAIN if there is no space.
- * Notes:
- * 1. The @msg is copied if it is transmitted and so can be freed after this
- * function returns.
- * 2. If @hnd is NULL then no callback is executed.
- */
-int ctrl_if_send_message_noblock(
- ctrl_msg_t *msg,
- ctrl_msg_handler_t hnd,
- unsigned long id);
-
-/*
- * Send @msg to the domain controller. Execute @hnd when a response is
- * received, passing the response message and the specified @id. This
- * operation will block until the message is sent, or a signal is received
- * for the calling process (unless @wait_state is TASK_UNINTERRUPTIBLE).
- * Notes:
- * 1. The @msg is copied if it is transmitted and so can be freed after this
- * function returns.
- * 2. If @hnd is NULL then no callback is executed.
- */
-int ctrl_if_send_message_block(
- ctrl_msg_t *msg,
- ctrl_msg_handler_t hnd,
- unsigned long id,
- long wait_state);
-
-/*
- * Request a callback when there is /possibly/ space to immediately send a
- * message to the domain controller. This function returns 0 if there is
- * already space to trasnmit a message --- in this case the callback task /may/
- * still be executed. If this function returns 1 then the callback /will/ be
- * executed when space becomes available.
- */
-int ctrl_if_enqueue_space_callback(struct task *task);
-
-/*
- * Send a response (@msg) to a message from the domain controller. This will
- * never block.
- * Notes:
- * 1. The @msg is copied and so can be freed after this function returns.
- * 2. The @msg may be the original request message, modified in-place.
- */
-void ctrl_if_send_response(ctrl_msg_t *msg);
-
-/*
- * Register a receiver for typed messages from the domain controller. The
- * handler (@hnd) is called for every received message of specified @type.
- * Returns TRUE (non-zero) if the handler was successfully registered.
- * If CALLBACK_IN_BLOCKING CONTEXT is specified in @flags then callbacks will
- * occur in a context in which it is safe to yield (i.e., process context).
- */
-#define CALLBACK_IN_BLOCKING_CONTEXT 1
-int ctrl_if_register_receiver(
- uint8_t type,
- ctrl_msg_handler_t hnd,
- unsigned int flags);
-
-/*
- * Unregister a receiver for typed messages from the domain controller. The
- * handler (@hnd) will not be executed after this function returns.
- */
-void ctrl_if_unregister_receiver(uint8_t type, ctrl_msg_handler_t hnd);
-
-/* Suspend/resume notifications. */
-void ctrl_if_suspend(void);
-void ctrl_if_resume(void);
-
-
-/*
- * Returns TRUE if there are no outstanding message requests at the domain
- * controller. This can be used to ensure that messages have really flushed
- * through when it is not possible to use the response-callback interface.
- * WARNING: If other subsystems are using the control interface then this
- * function might never return TRUE!
- */
-int ctrl_if_transmitter_empty(void); /* !! DANGEROUS FUNCTION !! */
-
-/*
- * Manually discard response messages from the domain controller.
- * WARNING: This is usually done automatically -- this function should only
- * be called when normal interrupt mechanisms are disabled!
- */
-void ctrl_if_discard_responses(void); /* !! DANGEROUS FUNCTION !! */
-
-#endif /* __ASM_XEN__CONTROL_IF_H__ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,110 +0,0 @@
-/******************************************************************************
- * evtchn.h
- *
- * Communication via Xen event channels.
- * Also definitions for the device that demuxes notifications to userspace.
- *
- * Copyright (c) 2004, K A Fraser
- */
-
-#ifndef __ASM_EVTCHN_H__
-#define __ASM_EVTCHN_H__
-#include <machine/pcpu.h>
-#include <machine/hypervisor.h>
-#include <machine/synch_bitops.h>
-#include <machine/hypervisor-ifs.h>
-
-#ifdef SMP
-#include <sys/param.h> /* XXX for time.h */
-#include <sys/time.h> /* XXX for pcpu.h */
-#include <sys/pcpu.h> /* XXX for PCPU_GET */
-extern int gdt_set;
-static inline int
-smp_processor_id(void)
-{
- if (likely(gdt_set))
- return PCPU_GET(cpuid);
- return 0;
-}
-
-#else
-#define smp_processor_id() 0
-#endif
-
-/*
- * LOW-LEVEL DEFINITIONS
- */
-
-/* Force a proper event-channel callback from Xen. */
-void force_evtchn_callback(void);
-
-/* Entry point for notifications into Linux subsystems. */
-void evtchn_do_upcall(struct intrframe *frame);
-
-/* Entry point for notifications into the userland character device. */
-void evtchn_device_upcall(int port);
-
-static inline void
-mask_evtchn(int port)
-{
- shared_info_t *s = HYPERVISOR_shared_info;
- synch_set_bit(port, &s->evtchn_mask[0]);
-}
-
-static inline void
-unmask_evtchn(int port)
-{
- shared_info_t *s = HYPERVISOR_shared_info;
- vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()];
-
- synch_clear_bit(port, &s->evtchn_mask[0]);
-
- /*
- * The following is basically the equivalent of 'hw_resend_irq'. Just like
- * a real IO-APIC we 'lose the interrupt edge' if the channel is masked.
- */
- if ( synch_test_bit (port, &s->evtchn_pending[0]) &&
- !synch_test_and_set_bit(port>>5, &vcpu_info->evtchn_pending_sel) )
- {
- s->vcpu_data[0].evtchn_upcall_pending = 1;
- if ( !s->vcpu_data[0].evtchn_upcall_mask )
- force_evtchn_callback();
- }
-}
-
-static inline void
-clear_evtchn(int port)
-{
- shared_info_t *s = HYPERVISOR_shared_info;
- synch_clear_bit(port, &s->evtchn_pending[0]);
-}
-
-static inline void
-notify_via_evtchn(int port)
-{
- evtchn_op_t op;
- op.cmd = EVTCHNOP_send;
- op.u.send.local_port = port;
- (void)HYPERVISOR_event_channel_op(&op);
-}
-
-/*
- * CHARACTER-DEVICE DEFINITIONS
- */
-
-#define PORT_NORMAL 0x0000
-#define PORT_EXCEPTION 0x8000
-#define PORTIDX_MASK 0x7fff
-
-/* /dev/xen/evtchn resides at device number major=10, minor=200 */
-#define EVTCHN_MINOR 200
-
-/* /dev/xen/evtchn ioctls: */
-/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */
-#define EVTCHN_RESET _IO('E', 1)
-/* EVTCHN_BIND: Bind to the specified event-channel port. */
-#define EVTCHN_BIND _IO('E', 2)
-/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */
-#define EVTCHN_UNBIND _IO('E', 3)
-
-#endif /* __ASM_EVTCHN_H__ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/gnttab.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/gnttab.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,71 +0,0 @@
-/******************************************************************************
- * gnttab.h
- *
- * Two sets of functionality:
- * 1. Granting foreign access to our memory reservation.
- * 2. Accessing others' memory reservations via grant references.
- * (i.e., mechanisms for both sender and recipient of grant references)
- *
- * Copyright (c) 2004, K A Fraser
- * Copyright (c) 2005, Christopher Clark
- */
-
-#ifndef __ASM_GNTTAB_H__
-#define __ASM_GNTTAB_H__
-
-#include <machine/hypervisor.h>
-#include <machine/hypervisor-ifs.h>
-
-/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
-#define NR_GRANT_FRAMES 4
-#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
-
-int
-gnttab_grant_foreign_access(
- domid_t domid, unsigned long frame, int readonly);
-
-void
-gnttab_end_foreign_access(
- grant_ref_t ref, int readonly);
-
-int
-gnttab_grant_foreign_transfer(
- domid_t domid, unsigned long pfn);
-
-unsigned long
-gnttab_end_foreign_transfer(
- grant_ref_t ref);
-
-int
-gnttab_query_foreign_access(
- grant_ref_t ref );
-
-/*
- * operations on reserved batches of grant references
- */
-int
-gnttab_alloc_grant_references(
- uint16_t count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal
);
-
-void
-gnttab_free_grant_references(
- uint16_t count, grant_ref_t private_head );
-
-int
-gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal
-);
-
-void
-gnttab_release_grant_reference(
- grant_ref_t *private_head, grant_ref_t release );
-
-void
-gnttab_grant_foreign_access_ref(
- grant_ref_t ref, domid_t domid, unsigned long frame, int readonly);
-
-void
-gnttab_grant_foreign_transfer_ref(
- grant_ref_t, domid_t domid, unsigned long pfn);
-
-
-#endif /* __ASM_GNTTAB_H__ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,38 +0,0 @@
-#ifndef _HYPERVISOR_IFS_H_
-#define _HYPERVISOR_IFS_H_
-
-#define s8 int8_t
-#define s16 int16_t
-#define s32 int32_t
-#define s64 int64_t
-
-#define u8 uint8_t
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
-
-#define CONFIG_XEN_BLKDEV_GRANT
-#include <machine/xen-public/xen.h>
-#include <machine/xen-public/io/domain_controller.h>
-#include <machine/xen-public/io/netif.h>
-#include <machine/xen-public/io/blkif.h>
-#include <machine/xen-public/dom0_ops.h>
-#include <machine/xen-public/event_channel.h>
-#include <machine/xen-public/sched_ctl.h>
-#include <machine/xen-public/physdev.h>
-#include <machine/xen-public/grant_table.h>
-#undef blkif_sector_t /* XXX pre-processor didn't do the */
-#define blkif_sector_t uint64_t /* right thing */
-
-#undef s8
-#undef s16
-#undef s32
-#undef s64
-
-#undef u8
-#undef u16
-#undef u32
-#undef u64
-
-
-#endif
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,448 +0,0 @@
-/******************************************************************************
- * hypervisor.h
- *
- * Linux-specific hypervisor handling.
- *
- * Copyright (c) 2002, K A Fraser
- */
-
-#ifndef __HYPERVISOR_H__
-#define __HYPERVISOR_H__
-
-
-#include <machine/hypervisor-ifs.h>
-#include <machine/frame.h>
-#include "opt_xen.h"
-
-extern start_info_t *xen_start_info;
-
-/* arch/xen/mm/hypervisor.c */
-/*
- * NB. ptr values should be PHYSICAL, not MACHINE. 'vals' should be already
- * be MACHINE addresses.
- */
-
-static inline void HYPERVISOR_crash(void) __dead2;
-
-void MULTICALL_flush_page_update_queue(void);
-
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
-/* Allocate a contiguous empty region of low memory. Return virtual start. */
-unsigned long allocate_empty_lowmem_region(unsigned long pages);
-/* Deallocate a contiguous region of low memory. Return it to the allocator. */
-void deallocate_lowmem_region(unsigned long vstart, unsigned long pages);
-#endif
-
-typedef struct { unsigned long pte_low, pte_high; } pte_t;
-
-/*
- * Assembler stubs for hyper-calls.
- */
-
-static inline int
-HYPERVISOR_set_trap_table(trap_info_t *table)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table),
- "b" (table) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_mmu_update(mmu_update_t *req, int count,
- int *success_count, domid_t domid)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
- "3" (success_count), "4" (domid)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_mmuext_op(
- struct mmuext_op *op, int count, int *success_count,
domid_t domid)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
- "3" (success_count), "4" (domid)
- : "memory" );
-
- return ret;
-}
-
-
-
-static inline int
-HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_stack_switch),
- "b" (ss), "c" (esp) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_set_callbacks(
- unsigned long event_selector, unsigned long event_address,
- unsigned long failsafe_selector, unsigned long failsafe_address)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks),
- "b" (event_selector), "c" (event_address),
- "d" (failsafe_selector), "S" (failsafe_address) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_fpu_taskswitch(void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_yield(void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_yield) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_block(void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_block) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_shutdown(void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_reboot(void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_suspend(unsigned long srec)
-{
- int ret;
- /* NB. On suspend, control software expects a suspend record in %esi. */
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)),
- "S" (srec) : "memory" );
-
- return ret;
-}
-
-
-static inline void
-HYPERVISOR_crash(void)
-{
- int ret;
- unsigned long ign1;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
- : "memory" );
-
- for (;;) ; /* eliminate noreturn error */
-
-}
-
-static inline long
-HYPERVISOR_set_timer_op(uint64_t timeout)
-{
- int ret;
- unsigned long timeout_hi = (unsigned long)(timeout>>32);
- unsigned long timeout_lo = (unsigned long)timeout;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_set_timer_op),
- "b" (timeout_lo), "c" (timeout_hi) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_dom0_op(dom0_op_t *dom0_op)
-{
- int ret;
- dom0_op->interface_version = DOM0_INTERFACE_VERSION;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_dom0_op),
- "b" (dom0_op) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_set_debugreg(int reg, unsigned long value)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_set_debugreg),
- "b" (reg), "c" (value) : "memory" );
-
- return ret;
-}
-
-static inline unsigned long
-HYPERVISOR_get_debugreg(int reg)
-{
- unsigned long ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_get_debugreg),
- "b" (reg) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_update_descriptor(
- unsigned long pa, unsigned long word1, unsigned long word2)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_update_descriptor),
- "b" (pa), "c" (word1), "d" (word2) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_dom_mem_op(unsigned int op,
- unsigned long *pages,
- unsigned long nr_pages)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_dom_mem_op),
- "b" (op), "c" (pages), "d" (nr_pages) : "memory" );
- return ret;
-}
-
-static inline int
-HYPERVISOR_multicall(void *call_list, int nr_calls)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_multicall),
- "b" (call_list), "c" (nr_calls) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_update_va_mapping(
- unsigned long page_nr, unsigned long new_val, unsigned long flags)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping),
- "b" (page_nr), "c" (new_val), "d" (flags):
- "memory" );
- /* XXX */
-#if 0
- if ( unlikely(ret < 0) )
- panic("Failed update VA mapping: %08lx, %08lx, %08lx",
- page_nr, (new_val).pte_low, flags);
-#endif
- return ret;
-}
-
-static inline int
-HYPERVISOR_event_channel_op(void *op)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_event_channel_op),
- "b" (op) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_xen_version(int cmd)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_xen_version),
- "b" (cmd) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_console_io(int cmd, int count, char *str)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_console_io),
- "b" (cmd), "c" (count), "d" (str) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_console_write(char *str, int count)
-{
- return HYPERVISOR_console_io(CONSOLEIO_write, count, str);
-}
-
-static inline int
-HYPERVISOR_physdev_op(void *physdev_op)
-{
- int ret;
- unsigned long ign;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign)
- : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_grant_table_op(
- unsigned int cmd, void *uop, unsigned int count)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_update_va_mapping_otherdomain(
- unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_update_va_mapping_otherdomain),
- "1" (va), "2" ((new_val).pte_low), "3" (flags), "4" (domid) :
- "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_vm_assist),
- "b" (cmd), "c" (type) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_boot_vcpu(
- unsigned long vcpu, vcpu_guest_context_t *ctxt)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
- : "memory");
-
- return ret;
-}
-
-#endif /* __HYPERVISOR_H__ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/md_var.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/md_var.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,108 +0,0 @@
-/*-
- * Copyright (c) 1995 Bruce D. Evans.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the author nor the names of contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/i386/include/md_var.h,v 1.66 2003/11/03 22:37:28 jhb Exp $
- */
-
-#ifndef _MACHINE_MD_VAR_H_
-#define _MACHINE_MD_VAR_H_
-
-/*
- * Miscellaneous machine-dependent declarations.
- */
-
-extern void (*bcopy_vector)(const void *from, void *to, size_t len);
-extern void (*bzero_vector)(void *buf, size_t len);
-extern int (*copyin_vector)(const void *udaddr, void *kaddr, size_t len);
-extern int (*copyout_vector)(const void *kaddr, void *udaddr, size_t len);
-
-extern long Maxmem;
-extern u_int atdevbase; /* offset in virtual memory of ISA io mem */
-extern u_int basemem; /* PA of original top of base memory */
-extern int busdma_swi_pending;
-extern u_int cpu_exthigh;
-extern u_int cpu_feature;
-extern u_int cpu_fxsr;
-extern u_int cpu_high;
-extern u_int cpu_id;
-extern u_int cpu_procinfo;
-extern char cpu_vendor[];
-extern u_int cyrix_did;
-extern uint16_t *elan_mmcr;
-extern char kstack[];
-#ifdef PC98
-extern int need_pre_dma_flush;
-extern int need_post_dma_flush;
-#endif
-extern char sigcode[];
-extern int szsigcode;
-#ifdef COMPAT_FREEBSD4
-extern int szfreebsd4_sigcode;
-#endif
-#ifdef COMPAT_43
-extern int szosigcode;
-#endif
-
-typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
-struct thread;
-struct reg;
-struct fpreg;
-struct dbreg;
-
-void bcopyb(const void *from, void *to, size_t len);
-void busdma_swi(void);
-void cpu_setregs(void);
-void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs));
-void doreti_iret(void) __asm(__STRING(doreti_iret));
-void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
-void doreti_popl_ds(void) __asm(__STRING(doreti_popl_ds));
-void doreti_popl_ds_fault(void) __asm(__STRING(doreti_popl_ds_fault));
-void doreti_popl_es(void) __asm(__STRING(doreti_popl_es));
-void doreti_popl_es_fault(void) __asm(__STRING(doreti_popl_es_fault));
-void doreti_popl_fs(void) __asm(__STRING(doreti_popl_fs));
-void doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault));
-void scrit(void) __asm(__STRING(scrit));
-void ecrit(void) __asm(__STRING(ecrit));
-void critical_region_fixup(void) __asm(__STRING(critical_region_fixup));
-void enable_sse(void);
-void fillw(int /*u_short*/ pat, void *base, size_t cnt);
-void i486_bzero(void *buf, size_t len);
-void i586_bcopy(const void *from, void *to, size_t len);
-void i586_bzero(void *buf, size_t len);
-int i586_copyin(const void *udaddr, void *kaddr, size_t len);
-int i586_copyout(const void *kaddr, void *udaddr, size_t len);
-void i686_pagezero(void *addr);
-void sse2_pagezero(void *addr);
-void init_AMD_Elan_sc520(void);
-int is_physical_memory(vm_offset_t addr);
-int isa_nmi(int cd);
-vm_paddr_t kvtop(void *addr);
-void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec);
-int user_dbreg_trap(void);
-
-#endif /* !_MACHINE_MD_VAR_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/multicall.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/multicall.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,98 +0,0 @@
-/******************************************************************************
- * multicall.h
- */
-
-#ifndef __MULTICALL_H__
-#define __MULTICALL_H__
-
-#include <machine/hypervisor.h>
-#define MAX_MULTICALL_ENTS 8
-extern multicall_entry_t multicall_list[];
-extern int nr_multicall_ents;
-
-static inline void execute_multicall_list(void)
-{
- if ( unlikely(nr_multicall_ents == 0) ) return;
- (void)HYPERVISOR_multicall(multicall_list, nr_multicall_ents);
- nr_multicall_ents = 0;
-}
-
-
-static inline void handle_edge(void)
-{
- if (unlikely(nr_multicall_ents == MAX_MULTICALL_ENTS))
- execute_multicall_list();
-}
-
-static inline void queue_multicall0(unsigned long op)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- nr_multicall_ents = i+1;
- handle_edge();
-}
-
-static inline void queue_multicall1(unsigned long op, unsigned long arg1)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- multicall_list[i].args[0] = arg1;
- nr_multicall_ents = i+1;
- handle_edge();
-}
-
-static inline void queue_multicall2(
- unsigned long op, unsigned long arg1, unsigned long arg2)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- multicall_list[i].args[0] = arg1;
- multicall_list[i].args[1] = arg2;
- nr_multicall_ents = i+1;
- handle_edge();
-}
-
-static inline void queue_multicall3(
- unsigned long op, unsigned long arg1, unsigned long arg2,
- unsigned long arg3)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- multicall_list[i].args[0] = arg1;
- multicall_list[i].args[1] = arg2;
- multicall_list[i].args[2] = arg3;
- nr_multicall_ents = i+1;
- handle_edge();
-}
-
-static inline void queue_multicall4(
- unsigned long op, unsigned long arg1, unsigned long arg2,
- unsigned long arg3, unsigned long arg4)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- multicall_list[i].args[0] = arg1;
- multicall_list[i].args[1] = arg2;
- multicall_list[i].args[2] = arg3;
- multicall_list[i].args[3] = arg4;
- nr_multicall_ents = i+1;
- handle_edge();
-}
-
-static inline void queue_multicall5(
- unsigned long op, unsigned long arg1, unsigned long arg2,
- unsigned long arg3, unsigned long arg4, unsigned long arg5)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- multicall_list[i].args[0] = arg1;
- multicall_list[i].args[1] = arg2;
- multicall_list[i].args[2] = arg3;
- multicall_list[i].args[3] = arg4;
- multicall_list[i].args[4] = arg5;
- nr_multicall_ents = i+1;
- handle_edge();
-}
-
-
-#endif /* __MULTICALL_H__ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/param.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/param.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,146 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)param.h 5.8 (Berkeley) 6/28/91
- * $FreeBSD: src/sys/i386/include/param.h,v 1.69 2003/06/14 23:23:53 alc Exp $
- */
-
-/*
- * Machine dependent constants for Intel 386.
- */
-
-/*
- * Round p (pointer or byte index) up to a correctly-aligned value
- * for all data types (int, long, ...). The result is unsigned int
- * and must be cast to any desired pointer type.
- */
-#ifndef _ALIGNBYTES
-#define _ALIGNBYTES (sizeof(int) - 1)
-#endif
-#ifndef _ALIGN
-#define _ALIGN(p) (((unsigned)(p) + _ALIGNBYTES) & ~_ALIGNBYTES)
-#endif
-
-#ifndef _MACHINE
-#define _MACHINE i386-xen
-#endif
-#ifndef _MACHINE_ARCH
-#define _MACHINE_ARCH i386-xen
-#endif
-
-#ifndef _NO_NAMESPACE_POLLUTION
-
-#ifndef _MACHINE_PARAM_H_
-#define _MACHINE_PARAM_H_
-
-#ifndef MACHINE
-#define MACHINE "i386"
-#endif
-#ifndef MACHINE_ARCH
-#define MACHINE_ARCH "i386"
-#endif
-#define MID_MACHINE MID_I386
-
-#ifdef SMP
-#define MAXCPU 16
-#else
-#define MAXCPU 1
-#endif /* SMP */
-
-#define ALIGNBYTES _ALIGNBYTES
-#define ALIGN(p) _ALIGN(p)
-
-#define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */
-#define PAGE_SIZE (1<<PAGE_SHIFT) /* bytes/page */
-#define PAGE_MASK (PAGE_SIZE-1)
-#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
-
-#ifdef PAE
-#define NPGPTD 4
-#define PDRSHIFT 21 /* LOG2(NBPDR) */
-#else
-#define NPGPTD 1
-#define PDRSHIFT 22 /* LOG2(NBPDR) */
-#endif
-
-#define NBPTD (NPGPTD<<PAGE_SHIFT)
-#define NPDEPTD (NBPTD/(sizeof (pd_entry_t)))
-#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
-#define NBPDR (1<<PDRSHIFT) /* bytes/page dir */
-#define PDRMASK (NBPDR-1)
-
-#define IOPAGES 2 /* pages of i/o permission bitmap */
-
-#ifndef KSTACK_PAGES
-#define KSTACK_PAGES 2 /* Includes pcb! */
-#endif
-#define KSTACK_GUARD_PAGES 1 /* pages of kstack guard; 0 disables */
-#define UAREA_PAGES 1 /* holds struct user WITHOUT PCB (see def.) */
-
-/*
- * Ceiling on amount of swblock kva space, can be changed via
- * the kern.maxswzone /boot/loader.conf variable.
- */
-#ifndef VM_SWZONE_SIZE_MAX
-#define VM_SWZONE_SIZE_MAX (32 * 1024 * 1024)
-#endif
-
-/*
- * Ceiling on size of buffer cache (really only effects write queueing,
- * the VM page cache is not effected), can be changed via
- * the kern.maxbcache /boot/loader.conf variable.
- */
-#ifndef VM_BCACHE_SIZE_MAX
-#define VM_BCACHE_SIZE_MAX (200 * 1024 * 1024)
-#endif
-
-/*
- * Mach derived conversion macros
- */
-#define trunc_page(x) ((x) & ~PAGE_MASK)
-#define round_page(x) (((x) + PAGE_MASK) & ~PAGE_MASK)
-#define trunc_4mpage(x) ((x) & ~PDRMASK)
-#define round_4mpage(x) ((((x)) + PDRMASK) & ~PDRMASK)
-
-#define atop(x) ((x) >> PAGE_SHIFT)
-#define ptoa(x) ((x) << PAGE_SHIFT)
-
-#define i386_btop(x) ((x) >> PAGE_SHIFT)
-#define i386_ptob(x) ((x) << PAGE_SHIFT)
-
-#define pgtok(x) ((x) * (PAGE_SIZE / 1024))
-
-#endif /* !_MACHINE_PARAM_H_ */
-#endif /* !_NO_NAMESPACE_POLLUTION */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/pcb.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/pcb.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,96 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)pcb.h 5.10 (Berkeley) 5/12/91
- * $FreeBSD: src/sys/i386/include/pcb.h,v 1.50 2003/09/30 08:11:36 jeff Exp $
- */
-
-#ifndef _I386_PCB_H_
-#define _I386_PCB_H_
-
-/*
- * Intel 386 process control block
- */
-#include <machine/npx.h>
-
-struct pcb {
- int pcb_cr3;
- int pcb_edi;
- int pcb_esi;
- int pcb_ebp;
- int pcb_esp;
- int pcb_eax;
- int pcb_ebx;
- int pcb_ecx;
- int pcb_edx;
- int pcb_eip;
-
- int pcb_dr0;
- int pcb_dr1;
- int pcb_dr2;
- int pcb_dr3;
- int pcb_dr6;
- int pcb_dr7;
-
- union savefpu pcb_save;
- u_int pcb_flags;
-#define FP_SOFTFP 0x01 /* process using software fltng pnt
emulator */
-#define PCB_DBREGS 0x02 /* process using debug registers */
-#define PCB_NPXTRAP 0x04 /* npx trap pending */
-#define PCB_NPXINITDONE 0x08 /* fpu state is initialized */
-#define PCB_VM86CALL 0x10 /* in vm86 call */
-
- caddr_t pcb_onfault; /* copyin/out fault recovery */
- int pcb_cs;
- int pcb_ds;
- int pcb_ss;
- int pcb_es;
- int pcb_gs;
- int pcb_fs;
- struct pcb_ext *pcb_ext; /* optional pcb extension */
- int pcb_psl; /* process status long */
- void (*pcb_switchout)(void); /* Special switchout function. */
- u_long __pcb_spare[2]; /* adjust to avoid core dump size changes */
-};
-
-#ifdef _KERNEL
-struct trapframe;
-
-void makectx(struct trapframe *, struct pcb *);
-
-void savectx(struct pcb *);
-#endif
-
-#endif /* _I386_PCB_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,179 +0,0 @@
-/*-
- * Copyright (c) Peter Wemm <peter@xxxxxxxxxxxxxx>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/i386/include/pcpu.h,v 1.41 2003/11/20 23:23:22 peter Exp $
- */
-
-#ifndef _MACHINE_PCPU_H_
-#define _MACHINE_PCPU_H_
-
-#ifdef _KERNEL
-
-#include <machine/segments.h>
-#include <machine/tss.h>
-
-/*
- * The SMP parts are setup in pmap.c and locore.s for the BSP, and
- * mp_machdep.c sets up the data for the AP's to "see" when they awake.
- * The reason for doing it via a struct is so that an array of pointers
- * to each CPU's data can be set up for things like "check curproc on all
- * other processors"
- */
-#define PCPU_MD_FIELDS
\
- struct pcpu *pc_prvspace; /* Self-reference */ \
- struct pmap *pc_curpmap; \
- struct i386tss pc_common_tss; \
- struct segment_descriptor pc_common_tssd; \
- struct segment_descriptor *pc_tss_gdt; \
- int pc_currentldt; \
- u_int pc_acpi_id; \
- u_int pc_apic_id; \
- int *pc_ipi_to_evtchn; \
- int *pc_virq_to_irq; \
- u_int pc_cr2; \
- u_int pc_pdir; \
- u_int pc_lazypmap; \
- u_int pc_rendezvous; \
- u_int pc_cpuast; \
- u_int pc_time_irq; \
- uint64_t pc_processed_system_time;
-
-#if defined(lint)
-
-extern struct pcpu *pcpup;
-
-#define PCPU_GET(member) (pcpup->pc_ ## member)
-#define PCPU_PTR(member) (&pcpup->pc_ ## member)
-#define PCPU_SET(member,value) (pcpup->pc_ ## member = (value))
-
-#elif defined(__GNUC__)
-
-/*
- * Evaluates to the byte offset of the per-cpu variable name.
- */
-#define __pcpu_offset(name)
\
- __offsetof(struct pcpu, name)
-
-/*
- * Evaluates to the type of the per-cpu variable name.
- */
-#define __pcpu_type(name)
\
- __typeof(((struct pcpu *)0)->name)
-
-/*
- * Evaluates to the address of the per-cpu variable name.
- */
-#define __PCPU_PTR(name) __extension__ ({
\
- __pcpu_type(name) *__p; \
- \
- __asm __volatile("movl %%fs:%1,%0; addl %2,%0" \
- : "=r" (__p) \
- : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))), \
- "i" (__pcpu_offset(name))); \
- \
- __p; \
-})
-
-/*
- * Evaluates to the value of the per-cpu variable name.
- */
-#define __PCPU_GET(name) __extension__ ({
\
- __pcpu_type(name) __result; \
- \
- if (sizeof(__result) == 1) { \
- u_char __b; \
- __asm __volatile("movb %%fs:%1,%0" \
- : "=r" (__b) \
- : "m" (*(u_char *)(__pcpu_offset(name)))); \
- __result = *(__pcpu_type(name) *)(void *)&__b; \
- } else if (sizeof(__result) == 2) { \
- u_short __w; \
- __asm __volatile("movw %%fs:%1,%0" \
- : "=r" (__w) \
- : "m" (*(u_short *)(__pcpu_offset(name)))); \
- __result = *(__pcpu_type(name) *)(void *)&__w; \
- } else if (sizeof(__result) == 4) { \
- u_int __i; \
- __asm __volatile("movl %%fs:%1,%0" \
- : "=r" (__i) \
- : "m" (*(u_int *)(__pcpu_offset(name)))); \
- __result = *(__pcpu_type(name) *)(void *)&__i; \
- } else { \
- __result = *__PCPU_PTR(name); \
- } \
- \
- __result; \
-})
-
-/*
- * Sets the value of the per-cpu variable name to value val.
- */
-#define __PCPU_SET(name, val) {
\
- __pcpu_type(name) __val = (val); \
- \
- if (sizeof(__val) == 1) { \
- u_char __b; \
- __b = *(u_char *)&__val; \
- __asm __volatile("movb %1,%%fs:%0" \
- : "=m" (*(u_char *)(__pcpu_offset(name))) \
- : "r" (__b)); \
- } else if (sizeof(__val) == 2) { \
- u_short __w; \
- __w = *(u_short *)&__val; \
- __asm __volatile("movw %1,%%fs:%0" \
- : "=m" (*(u_short *)(__pcpu_offset(name))) \
- : "r" (__w)); \
- } else if (sizeof(__val) == 4) { \
- u_int __i; \
- __i = *(u_int *)&__val; \
- __asm __volatile("movl %1,%%fs:%0" \
- : "=m" (*(u_int *)(__pcpu_offset(name))) \
- : "r" (__i)); \
- } else { \
- *__PCPU_PTR(name) = __val; \
- } \
-}
-
-#define PCPU_GET(member) __PCPU_GET(pc_ ## member)
-#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member)
-#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val)
-
-static __inline struct thread *
-__curthread(void)
-{
- struct thread *td;
-
- __asm __volatile("movl %%fs:0,%0" : "=r" (td));
- return (td);
-}
-#define curthread (__curthread())
-
-#else
-#error gcc or lint is required to use this file
-#endif
-
-#endif /* _KERNEL */
-
-#endif /* ! _MACHINE_PCPU_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,356 +0,0 @@
-/*
- * Copyright (c) 1991 Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department and William Jolitz of UUNET Technologies Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Derived from hp300 version by Mike Hibler, this version by William
- * Jolitz uses a recursive map [a pde points to the page directory] to
- * map the page tables using the pagetables themselves. This is done to
- * reduce the impact on kernel virtual memory for lots of sparse address
- * space, and to reduce the cost of memory to each process.
- *
- * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90
- * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91
- * $FreeBSD: src/sys/i386/include/pmap.h,v 1.103 2003/11/08 03:01:26 alc Exp $
- */
-
-#ifndef _MACHINE_PMAP_H_
-#define _MACHINE_PMAP_H_
-
-/*
- * Page-directory and page-table entires follow this format, with a few
- * of the fields not present here and there, depending on a lot of things.
- */
- /* ---- Intel Nomenclature ---- */
-#define PG_V 0x001 /* P Valid */
-#define PG_RW 0x002 /* R/W Read/Write */
-#define PG_U 0x004 /* U/S User/Supervisor */
-#define PG_NC_PWT 0x008 /* PWT Write through */
-#define PG_NC_PCD 0x010 /* PCD Cache disable */
-#define PG_A 0x020 /* A Accessed */
-#define PG_M 0x040 /* D Dirty */
-#define PG_PS 0x080 /* PS Page size (0=4k,1=4M) */
-#define PG_G 0x100 /* G Global */
-#define PG_AVAIL1 0x200 /* / Available for system */
-#define PG_AVAIL2 0x400 /* < programmers use */
-#define PG_AVAIL3 0x800 /* \ */
-
-
-/* Our various interpretations of the above */
-#define PG_W PG_AVAIL1 /* "Wired" pseudoflag */
-#define PG_MANAGED PG_AVAIL2
-#define PG_FRAME (~((vm_paddr_t)PAGE_MASK))
-#define PG_PROT (PG_RW|PG_U) /* all protection bits . */
-#define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */
-
-#define PG_KERNEL (PG_V | PG_RW | PG_M | PG_A)
-#define PG_KERNEL_NC (PG_KERNEL | PG_N)
-#define PG_KERNEL_RO (PG_VALID | PG_M | PG_A)
-
-/*
- * Page Protection Exception bits
- */
-
-#define PGEX_P 0x01 /* Protection violation vs. not present */
-#define PGEX_W 0x02 /* during a Write cycle */
-#define PGEX_U 0x04 /* access from User mode (UPL) */
-#define XEN_PAGES 16
-
-/*
- * Size of Kernel address space. This is the number of page table pages
- * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte.
- * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
- */
-
-#ifndef KVA_PAGES
-#ifdef PAE
-#define KVA_PAGES 512
-#else
-#define KVA_PAGES 256
-#endif
-#endif
-
-/*
- * Pte related macros
- */
-#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
-
-#ifndef NKPT
-#ifdef PAE
-#define NKPT 120 /* actual number of kernel page tables
*/
-#else
-#define NKPT 30 /* actual number of kernel page tables
*/
-#endif
-#endif
-
-/*
- * XEN NOTE: Xen consumes 64MB of memory, so subtract that from the number
- * of page available to the kernel virutal address space.
- */
-#ifndef NKPDE
-#ifdef SMP
-#define NKPDE (KVA_PAGES - 1 - XEN_PAGES) /* number of page tables/pde's */
-#else
-#define NKPDE (KVA_PAGES - XEN_PAGES) /* number of page tables/pde's */
-#endif
-#endif
-
-/*
- * The *PTDI values control the layout of virtual memory
- *
- * XXX This works for now, but I am not real happy with it, I'll fix it
- * right after I fix locore.s and the magic 28K hole
- *
- * SMP_PRIVPAGES: The per-cpu address space is 0xff80000 -> 0xffbfffff
- */
-
-/*
- * XEN NOTE: We need to shift down the start of KVA by 64MB to account for
- * Xen using the upper 64MB.
- *
- * The layout of VA for XenoBSD is:
- * | USER | PTDPTDI | KVA | XEN |
- * | 0x00000000 | 0xbfc00000 | 0xc0000000 | 0xfc000000 - 0xffffffff|
- *
- * Normally it is just:
- * | USER | PTDPTDI | KVA |
- * | 0x00000000 | 0xbfc00000 | 0xc0000000 - 0xffffffff |
- */
-
-#ifdef SMP
-#define MPPTDI (NPDEPTD-1-XEN_PAGES) /* per cpu ptd entry */
-#define KPTDI (MPPTDI-NKPDE) /* start of kernel virtual pde's */
-#else
-#define KPTDI (NPDEPTD-NKPDE-XEN_PAGES) /* start of kernel virtual
pde's */
-#endif /* SMP */
-
-#define PTDPTDI (KPTDI-NPGPTD) /* ptd entry that points to
ptd! */
-
-/*
- * XXX doesn't really belong here I guess...
- */
-#define ISA_HOLE_START 0xa0000
-#define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
-
-#ifndef LOCORE
-
-#include <sys/queue.h>
-#include <sys/_lock.h>
-#include <sys/_mutex.h>
-
-
-typedef uint32_t pd_entry_t;
-typedef uint32_t pt_entry_t;
-
-#define PTESHIFT (2)
-#define PDESHIFT (2)
-
-
-/*
- * Address of current and alternate address space page table maps
- * and directories.
- */
-#ifdef _KERNEL
-extern pt_entry_t PTmap[];
-extern pd_entry_t PTD[];
-extern pd_entry_t PTDpde[];
-
-extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
-
-#include <machine/xen-os.h>
-#include <machine/xenvar.h>
-#include <machine/xenpmap.h>
-
-
-/*
- * virtual address to page table entry and
- * to physical address. Likewise for alternate address space.
- * Note: these work recursively, thus vtopte of a pte will give
- * the corresponding pde that in turn maps it.
- */
-#define vtopte(va) (PTmap + i386_btop(va))
-
-/*
- * Given a virtual address, return the machine address of its PTE
- *
- */
-#define vtoptema(va) pmap_kextract_ma((vm_offset_t) vtopte(va))
-
-/*
- * Routine: pmap_kextract/pmap_kextract_ma
- * Function:
- * Extract the physical/machine page address associated
- * kernel virtual address.
- */
-
-static __inline vm_paddr_t
-pmap_kextract_ma(vm_offset_t va)
-{
- vm_paddr_t ma;
- if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) {
- ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1));
- } else {
- ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK);
- }
- return ma;
-}
-
-static __inline vm_paddr_t
-pmap_kextract(vm_offset_t va)
-{
- return xpmap_mtop(pmap_kextract_ma(va));
-}
-
-#define vtophys(va) pmap_kextract(((vm_offset_t) (va)))
-#define vtomach(va) pmap_kextract_ma(((vm_offset_t) (va)))
-
-static __inline pt_entry_t
-pte_load_clear(pt_entry_t *ptep)
-{
- pt_entry_t r;
-
- r = PT_GET(ptep);
- PT_CLEAR_VA(ptep, TRUE);
- return (r);
-}
-static __inline pt_entry_t
-pte_load_store(pt_entry_t *ptep, pt_entry_t v)
-{
- pt_entry_t r;
- r = PT_GET(ptep);
- PT_SET_VA_MA(ptep, v, TRUE);
- return (r);
-}
-
-#define pte_store(ptep, pte) PT_SET_VA(ptep, pte, TRUE)
-#define pte_clear(pte) PT_CLEAR_VA(pte, TRUE)
-
-
-#endif /* _KERNEL */
-
-/*
- * Pmap stuff
- */
-struct pv_entry;
-
-struct md_page {
- int pv_list_count;
- TAILQ_HEAD(,pv_entry) pv_list;
-};
-
-struct pmap {
- struct mtx pm_mtx;
- pd_entry_t *pm_pdir; /* KVA of page directory */
- TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */
- u_int pm_active; /* active on cpus */
- struct pmap_statistics pm_stats; /* pmap statistics */
- LIST_ENTRY(pmap) pm_list; /* List of all pmaps */
-};
-
-
-typedef struct pmap *pmap_t;
-
-#ifdef _KERNEL
-extern struct pmap kernel_pmap_store;
-#define kernel_pmap (&kernel_pmap_store)
-
-#define PMAP_LOCK(pmap)mtx_lock(&(pmap)->pm_mtx)
-#define PMAP_LOCK_ASSERT(pmap, type) \
-mtx_assert(&(pmap)->pm_mtx, (type))
-#define PMAP_LOCK_DESTROY(pmap)mtx_destroy(&(pmap)->pm_mtx)
-#define PMAP_LOCK_INIT(pmap)mtx_init(&(pmap)->pm_mtx, "pmap", \
- NULL, MTX_DEF | MTX_DUPOK)
-#define PMAP_LOCKED(pmap)mtx_owned(&(pmap)->pm_mtx)
-#define PMAP_MTX(pmap)(&(pmap)->pm_mtx)
-#define PMAP_TRYLOCK(pmap)mtx_trylock(&(pmap)->pm_mtx)
-#define PMAP_UNLOCK(pmap)mtx_unlock(&(pmap)->pm_mtx)
-
-#endif
-
-/*
- * For each vm_page_t, there is a list of all currently valid virtual
- * mappings of that page. An entry is a pv_entry_t, the list is pv_table.
- */
-typedef struct pv_entry {
- pmap_t pv_pmap; /* pmap where mapping lies */
- vm_offset_t pv_va; /* virtual address for mapping */
- TAILQ_ENTRY(pv_entry) pv_list;
- TAILQ_ENTRY(pv_entry) pv_plist;
-} *pv_entry_t;
-
-#ifdef _KERNEL
-
-#define NPPROVMTRR 8
-#define PPRO_VMTRRphysBase0 0x200
-#define PPRO_VMTRRphysMask0 0x201
-struct ppro_vmtrr {
- u_int64_t base, mask;
-};
-extern struct ppro_vmtrr PPro_vmtrr[NPPROVMTRR];
-
-extern caddr_t CADDR1;
-extern pt_entry_t *CMAP1;
-extern vm_paddr_t avail_end;
-extern vm_paddr_t phys_avail[];
-extern int pseflag;
-extern int pgeflag;
-extern char *ptvmmap; /* poor name! */
-extern vm_offset_t virtual_avail;
-extern vm_offset_t virtual_end;
-
-#define pmap_page_is_mapped(m)(!TAILQ_EMPTY(&(m)->md.pv_list))
-
-void pmap_bootstrap(vm_paddr_t, vm_paddr_t);
-void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
-void pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa);
-void *pmap_kenter_temporary(vm_paddr_t pa, int i);
-void pmap_kremove(vm_offset_t);
-void *pmap_mapdev(vm_paddr_t, vm_size_t);
-void pmap_unmapdev(vm_offset_t, vm_size_t);
-pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2;
-void pmap_set_pg(void);
-void pmap_invalidate_page(pmap_t, vm_offset_t);
-void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
-void pmap_invalidate_all(pmap_t);
-void pmap_lazyfix_action(void);
-
-void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len);
-void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len);
-
-
-#endif /* _KERNEL */
-
-#endif /* !LOCORE */
-
-#endif /* !_MACHINE_PMAP_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/segments.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/segments.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,260 +0,0 @@
-/*-
- * Copyright (c) 1989, 1990 William F. Jolitz
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)segments.h 7.1 (Berkeley) 5/9/91
- * $FreeBSD: src/sys/i386/include/segments.h,v 1.36 2003/11/03 21:12:04 jhb
Exp $
- */
-
-#ifndef _MACHINE_SEGMENTS_H_
-#define _MACHINE_SEGMENTS_H_
-
-/*
- * 386 Segmentation Data Structures and definitions
- * William F. Jolitz (william@xxxxxxxxxxxxxxxxxx) 6/20/1989
- */
-
-/*
- * Selectors
- */
-
-#define ISPL(s) ((s)&3) /* what is the priority level of a
selector */
-#define SEL_KPL 1 /* kernel priority level */
-#define SEL_UPL 3 /* user priority level */
-#define ISLDT(s) ((s)&SEL_LDT) /* is it local or global */
-#define SEL_LDT 4 /* local descriptor table */
-#define IDXSEL(s) (((s)>>3) & 0x1fff) /* index of
selector */
-#define LSEL(s,r) (((s)<<3) | SEL_LDT | r) /* a local
selector */
-#define GSEL(s,r) (((s)<<3) | r) /* a global
selector */
-
-/*
- * Memory and System segment descriptors
- */
-struct segment_descriptor {
- unsigned sd_lolimit:16 ; /* segment extent (lsb) */
- unsigned sd_lobase:24 __packed; /* segment base address (lsb) */
- unsigned sd_type:5 ; /* segment type */
- unsigned sd_dpl:2 ; /* segment descriptor priority level */
- unsigned sd_p:1 ; /* segment descriptor present */
- unsigned sd_hilimit:4 ; /* segment extent (msb) */
- unsigned sd_xx:2 ; /* unused */
- unsigned sd_def32:1 ; /* default 32 vs 16 bit size */
- unsigned sd_gran:1 ; /* limit granularity (byte/page units)*/
- unsigned sd_hibase:8 ; /* segment base address (msb) */
-} ;
-
-/*
- * Gate descriptors (e.g. indirect descriptors)
- */
-struct gate_descriptor {
- unsigned gd_looffset:16 ; /* gate offset (lsb) */
- unsigned gd_selector:16 ; /* gate segment selector */
- unsigned gd_stkcpy:5 ; /* number of stack wds to cpy */
- unsigned gd_xx:3 ; /* unused */
- unsigned gd_type:5 ; /* segment type */
- unsigned gd_dpl:2 ; /* segment descriptor priority level */
- unsigned gd_p:1 ; /* segment descriptor present */
- unsigned gd_hioffset:16 ; /* gate offset (msb) */
-} ;
-
-/*
- * Generic descriptor
- */
-union descriptor {
- struct segment_descriptor sd;
- struct gate_descriptor gd;
-};
-
- /* system segments and gate types */
-#define SDT_SYSNULL 0 /* system null */
-#define SDT_SYS286TSS 1 /* system 286 TSS available */
-#define SDT_SYSLDT 2 /* system local descriptor table */
-#define SDT_SYS286BSY 3 /* system 286 TSS busy */
-#define SDT_SYS286CGT 4 /* system 286 call gate */
-#define SDT_SYSTASKGT 5 /* system task gate */
-#define SDT_SYS286IGT 6 /* system 286 interrupt gate */
-#define SDT_SYS286TGT 7 /* system 286 trap gate */
-#define SDT_SYSNULL2 8 /* system null again */
-#define SDT_SYS386TSS 9 /* system 386 TSS available */
-#define SDT_SYSNULL3 10 /* system null again */
-#define SDT_SYS386BSY 11 /* system 386 TSS busy */
-#define SDT_SYS386CGT 12 /* system 386 call gate */
-#define SDT_SYSNULL4 13 /* system null again */
-#define SDT_SYS386IGT 14 /* system 386 interrupt gate */
-#define SDT_SYS386TGT 15 /* system 386 trap gate */
-
- /* memory segment types */
-#define SDT_MEMRO 16 /* memory read only */
-#define SDT_MEMROA 17 /* memory read only accessed */
-#define SDT_MEMRW 18 /* memory read write */
-#define SDT_MEMRWA 19 /* memory read write accessed */
-#define SDT_MEMROD 20 /* memory read only expand dwn limit */
-#define SDT_MEMRODA 21 /* memory read only expand dwn limit
accessed */
-#define SDT_MEMRWD 22 /* memory read write expand dwn limit */
-#define SDT_MEMRWDA 23 /* memory read write expand dwn limit
accessed */
-#define SDT_MEME 24 /* memory execute only */
-#define SDT_MEMEA 25 /* memory execute only accessed */
-#define SDT_MEMER 26 /* memory execute read */
-#define SDT_MEMERA 27 /* memory execute read accessed */
-#define SDT_MEMEC 28 /* memory execute only conforming */
-#define SDT_MEMEAC 29 /* memory execute only accessed
conforming */
-#define SDT_MEMERC 30 /* memory execute read conforming */
-#define SDT_MEMERAC 31 /* memory execute read accessed
conforming */
-
-/*
- * Software definitions are in this convenient format,
- * which are translated into inconvenient segment descriptors
- * when needed to be used by the 386 hardware
- */
-
-struct soft_segment_descriptor {
- unsigned ssd_base ; /* segment base address */
- unsigned ssd_limit ; /* segment extent */
- unsigned ssd_type:5 ; /* segment type */
- unsigned ssd_dpl:2 ; /* segment descriptor priority level */
- unsigned ssd_p:1 ; /* segment descriptor present */
- unsigned ssd_xx:4 ; /* unused */
- unsigned ssd_xx1:2 ; /* unused */
- unsigned ssd_def32:1 ; /* default 32 vs 16 bit size */
- unsigned ssd_gran:1 ; /* limit granularity (byte/page units)*/
-};
-
-/*
- * region descriptors, used to load gdt/idt tables before segments yet exist.
- */
-struct region_descriptor {
- unsigned rd_limit:16; /* segment extent */
- unsigned rd_base:32 __packed; /* base address */
-};
-
-/*
- * Segment Protection Exception code bits
- */
-
-#define SEGEX_EXT 0x01 /* recursive or externally induced */
-#define SEGEX_IDT 0x02 /* interrupt descriptor table */
-#define SEGEX_TI 0x04 /* local descriptor table */
- /* other bits are affected descriptor index */
-#define SEGEX_IDX(s) (((s)>>3)&0x1fff)
-
-/*
- * Size of IDT table
- */
-
-#define NIDT 256 /* 32 reserved, 0x80 syscall, most are
h/w */
-#define NRSVIDT 32 /* reserved entries for cpu exceptions
*/
-
-/*
- * Entries in the Interrupt Descriptor Table (IDT)
- */
-#define IDT_DE 0 /* #DE: Divide Error */
-#define IDT_DB 1 /* #DB: Debug */
-#define IDT_NMI 2 /* Nonmaskable External Interrupt */
-#define IDT_BP 3 /* #BP: Breakpoint */
-#define IDT_OF 4 /* #OF: Overflow */
-#define IDT_BR 5 /* #BR: Bound Range Exceeded */
-#define IDT_UD 6 /* #UD: Undefined/Invalid Opcode */
-#define IDT_NM 7 /* #NM: No Math Coprocessor */
-#define IDT_DF 8 /* #DF: Double Fault */
-#define IDT_FPUGP 9 /* Coprocessor Segment Overrun */
-#define IDT_TS 10 /* #TS: Invalid TSS */
-#define IDT_NP 11 /* #NP: Segment Not Present */
-#define IDT_SS 12 /* #SS: Stack Segment Fault */
-#define IDT_GP 13 /* #GP: General Protection Fault */
-#define IDT_PF 14 /* #PF: Page Fault */
-#define IDT_MF 16 /* #MF: FPU Floating-Point Error */
-#define IDT_AC 17 /* #AC: Alignment Check */
-#define IDT_MC 18 /* #MC: Machine Check */
-#define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */
-#define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O
interrupts. */
-#define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */
-
-/*
- * Entries in the Global Descriptor Table (GDT)
- */
-#define GNULL_SEL 0 /* Null Descriptor */
-#if 0
-#define GCODE_SEL 1 /* Kernel Code Descriptor */
-#define GDATA_SEL 2 /* Kernel Data Descriptor */
-#else
-#define GCODE_SEL (__KERNEL_CS >> 3) /* Kernel Code
Descriptor */
-#define GDATA_SEL (__KERNEL_DS >> 3) /* Kernel Data
Descriptor */
-#endif
-#define GPRIV_SEL 3 /* SMP Per-Processor Private Data */
-#define GPROC0_SEL 4 /* Task state process slot zero and up
*/
-#define GLDT_SEL 5 /* LDT - eventually one per process */
-#define GUSERLDT_SEL 6 /* User LDT */
-#define GTGATE_SEL 7 /* Process task switch gate */
-#define GBIOSLOWMEM_SEL 8 /* BIOS low memory access (must be
entry 8) */
-#define GPANIC_SEL 9 /* Task state to consider panic from */
-#define GBIOSCODE32_SEL 10 /* BIOS interface (32bit Code) */
-#define GBIOSCODE16_SEL 11 /* BIOS interface (16bit Code) */
-#define GBIOSDATA_SEL 12 /* BIOS interface (Data) */
-#define GBIOSUTIL_SEL 13 /* BIOS interface (Utility) */
-#define GBIOSARGS_SEL 14 /* BIOS interface (Arguments) */
-
-#define NGDT 4
-
-/*
- * Entries in the Local Descriptor Table (LDT)
- */
-#define LSYS5CALLS_SEL 0 /* forced by intel BCS */
-#define LSYS5SIGR_SEL 1
-#define L43BSDCALLS_SEL 2 /* notyet */
-#define LUCODE_SEL 3
-#define LSOL26CALLS_SEL 4 /* Solaris >= 2.6 system call gate */
-#define LUDATA_SEL 5
-/* separate stack, es,fs,gs sels ? */
-/* #define LPOSIXCALLS_SEL 5*/ /* notyet */
-#define LBSDICALLS_SEL 16 /* BSDI system call gate */
-#define NLDT (LBSDICALLS_SEL + 1)
-
-#ifdef _KERNEL
-extern int _default_ldt;
-extern union descriptor *gdt;
-extern struct soft_segment_descriptor gdt_segs[];
-extern struct gate_descriptor *idt;
-extern union descriptor *ldt;
-extern struct region_descriptor r_gdt, r_idt;
-
-void lgdt(struct region_descriptor *rdp);
-void lgdt_finish(void);
-void sdtossd(struct segment_descriptor *sdp,
- struct soft_segment_descriptor *ssdp);
-void ssdtosd(struct soft_segment_descriptor *ssdp,
- struct segment_descriptor *sdp);
-#endif /* _KERNEL */
-
-#endif /* !_MACHINE_SEGMENTS_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,82 +0,0 @@
-#ifndef __XEN_SYNCH_BITOPS_H__
-#define __XEN_SYNCH_BITOPS_H__
-
-/*
- * Copyright 1992, Linus Torvalds.
- * Heavily modified to provide guaranteed strong synchronisation
- * when communicating with Xen or other guest OSes running on other CPUs.
- */
-
-
-#define ADDR (*(volatile long *) addr)
-
-static __inline__ void synch_set_bit(int nr, volatile void * addr)
-{
- __asm__ __volatile__ (
- "lock btsl %1,%0"
- : "=m" (ADDR) : "Ir" (nr) : "memory" );
-}
-
-static __inline__ void synch_clear_bit(int nr, volatile void * addr)
-{
- __asm__ __volatile__ (
- "lock btrl %1,%0"
- : "=m" (ADDR) : "Ir" (nr) : "memory" );
-}
-
-static __inline__ void synch_change_bit(int nr, volatile void * addr)
-{
- __asm__ __volatile__ (
- "lock btcl %1,%0"
- : "=m" (ADDR) : "Ir" (nr) : "memory" );
-}
-
-static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr)
-{
- int oldbit;
- __asm__ __volatile__ (
- "lock btsl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
- return oldbit;
-}
-
-static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
-{
- int oldbit;
- __asm__ __volatile__ (
- "lock btrl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
- return oldbit;
-}
-
-static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr)
-{
- int oldbit;
-
- __asm__ __volatile__ (
- "lock btcl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
- return oldbit;
-}
-
-static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
-{
- return ((1UL << (nr & 31)) &
- (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
-}
-
-static __inline__ int synch_var_test_bit(int nr, volatile void * addr)
-{
- int oldbit;
- __asm__ __volatile__ (
- "btl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) );
- return oldbit;
-}
-
-#define synch_test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- synch_const_test_bit((nr),(addr)) : \
- synch_var_test_bit((nr),(addr)))
-
-#endif /* __XEN_SYNCH_BITOPS_H__ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/trap.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/trap.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,111 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)trap.h 5.4 (Berkeley) 5/9/91
- * $FreeBSD: src/sys/i386/include/trap.h,v 1.13 2001/07/12 06:32:51 peter Exp $
- */
-
-#ifndef _MACHINE_TRAP_H_
-#define _MACHINE_TRAP_H_
-
-/*
- * Trap type values
- * also known in trap.c for name strings
- */
-
-#define T_PRIVINFLT 1 /* privileged instruction */
-#define T_BPTFLT 3 /* breakpoint instruction */
-#define T_ARITHTRAP 6 /* arithmetic trap */
-#define T_PROTFLT 9 /* protection fault */
-#define T_TRCTRAP 10 /* debug exception (sic) */
-#define T_PAGEFLT 12 /* page fault */
-#define T_ALIGNFLT 14 /* alignment fault */
-
-#define T_NESTED 16
-#define T_HYPCALLBACK 17 /* hypervisor callback */
-
-
-#define T_DIVIDE 18 /* integer divide fault */
-#define T_NMI 19 /* non-maskable trap */
-#define T_OFLOW 20 /* overflow trap */
-#define T_BOUND 21 /* bound instruction fault */
-#define T_DNA 22 /* device not available fault */
-#define T_DOUBLEFLT 23 /* double fault */
-#define T_FPOPFLT 24 /* fp coprocessor operand fetch fault */
-#define T_TSSFLT 25 /* invalid tss fault */
-#define T_SEGNPFLT 26 /* segment not present fault */
-#define T_STKFLT 27 /* stack fault */
-#define T_MCHK 28 /* machine check trap */
-#define T_XMMFLT 29 /* SIMD floating-point exception */
-#define T_RESERVED 30 /* reserved (unknown) */
-
-/* XXX most of the following codes aren't used, but could be. */
-
-/* definitions for <sys/signal.h> */
-#define ILL_RESAD_FAULT T_RESADFLT
-#define ILL_PRIVIN_FAULT T_PRIVINFLT
-#define ILL_RESOP_FAULT T_RESOPFLT
-#define ILL_ALIGN_FAULT T_ALIGNFLT
-#define ILL_FPOP_FAULT T_FPOPFLT /* coprocessor operand
fault */
-
-/* portable macros for SIGFPE/ARITHTRAP */
-#define FPE_INTOVF 1 /* integer overflow */
-#define FPE_INTDIV 2 /* integer divide by zero */
-#define FPE_FLTDIV 3 /* floating point divide by zero */
-#define FPE_FLTOVF 4 /* floating point overflow */
-#define FPE_FLTUND 5 /* floating point underflow */
-#define FPE_FLTRES 6 /* floating point inexact result */
-#define FPE_FLTINV 7 /* invalid floating point operation */
-#define FPE_FLTSUB 8 /* subscript out of range */
-
-/* old FreeBSD macros, deprecated */
-#define FPE_INTOVF_TRAP 0x1 /* integer overflow */
-#define FPE_INTDIV_TRAP 0x2 /* integer divide by zero */
-#define FPE_FLTDIV_TRAP 0x3 /* floating/decimal divide by zero */
-#define FPE_FLTOVF_TRAP 0x4 /* floating overflow */
-#define FPE_FLTUND_TRAP 0x5 /* floating underflow */
-#define FPE_FPU_NP_TRAP 0x6 /* floating point unit not present */
-#define FPE_SUBRNG_TRAP 0x7 /* subrange out of bounds */
-
-/* codes for SIGBUS */
-#define BUS_PAGE_FAULT T_PAGEFLT /* page fault
protection base */
-#define BUS_SEGNP_FAULT T_SEGNPFLT /* segment not present
*/
-#define BUS_STK_FAULT T_STKFLT /* stack segment */
-#define BUS_SEGM_FAULT T_RESERVED /* segment protection
base */
-
-/* Trap's coming from user mode */
-#define T_USER 0x100
-
-#endif /* !_MACHINE_TRAP_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,141 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- * Copyright (c) 1994 John S. Dyson
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91
- * $FreeBSD: src/sys/i386/include/vmparam.h,v 1.37 2003/10/01 23:46:08 peter
Exp $
- */
-
-
-#ifndef _MACHINE_VMPARAM_H_
-#define _MACHINE_VMPARAM_H_ 1
-
-/*
- * Machine dependent constants for 386.
- */
-
-#define VM_PROT_READ_IS_EXEC /* if you can read -- then you can exec */
-
-/*
- * Virtual memory related constants, all in bytes
- */
-#define MAXTSIZ (128UL*1024*1024) /* max text size */
-#ifndef DFLDSIZ
-#define DFLDSIZ (128UL*1024*1024) /* initial data size
limit */
-#endif
-#ifndef MAXDSIZ
-#define MAXDSIZ (512UL*1024*1024) /* max data size */
-#endif
-#ifndef DFLSSIZ
-#define DFLSSIZ (8UL*1024*1024) /* initial stack size
limit */
-#endif
-#ifndef MAXSSIZ
-#define MAXSSIZ (64UL*1024*1024) /* max stack size */
-#endif
-#ifndef SGROWSIZ
-#define SGROWSIZ (128UL*1024) /* amount to grow stack */
-#endif
-
-#define USRTEXT (1*PAGE_SIZE) /* base of user text
XXX bogus */
-
-/*
- * The time for a process to be blocked before being very swappable.
- * This is a number of seconds which the system takes as being a non-trivial
- * amount of real time. You probably shouldn't change this;
- * it is used in subtle ways (fractions and multiples of it are, that is, like
- * half of a ``long time'', almost a long time, etc.)
- * It is related to human patience and other factors which don't really
- * change over time.
- */
-#define MAXSLP 20
-
-
-/*
- * Kernel physical load address.
- */
-#ifndef KERNLOAD
-#define KERNLOAD (1 << PDRSHIFT)
-#endif
-
-/*
- * Virtual addresses of things. Derived from the page directory and
- * page table indexes from pmap.h for precision.
- * Because of the page that is both a PD and PT, it looks a little
- * messy at times, but hey, we'll do anything to save a page :-)
- */
-
-#define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1)
-#define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI)
-
-#define KERNBASE VADDR(KPTDI, 0)
-
-#define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
-#define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0)
-
-#define VM_MAXUSER_ADDRESS VADDR(PTDPTDI, 0)
-
-#define USRSTACK VM_MAXUSER_ADDRESS
-
-#define VM_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
-#define VM_MIN_ADDRESS ((vm_offset_t)0)
-
-/* virtual sizes (bytes) for various kernel submaps */
-#ifndef VM_KMEM_SIZE
-#define VM_KMEM_SIZE (12 * 1024 * 1024)
-#endif
-
-/*
- * How many physical pages per KVA page allocated.
- * min(max(VM_KMEM_SIZE, Physical memory/VM_KMEM_SIZE_SCALE), VM_KMEM_SIZE_MAX)
- * is the total KVA space allocated for kmem_map.
- */
-#ifndef VM_KMEM_SIZE_SCALE
-#define VM_KMEM_SIZE_SCALE (3)
-#endif
-
-/*
- * Ceiling on amount of kmem_map kva space.
- */
-#ifndef VM_KMEM_SIZE_MAX
-#define VM_KMEM_SIZE_MAX (320 * 1024 * 1024)
-#endif
-
-/* initial pagein size of beginning of executable file */
-#ifndef VM_INITIAL_PAGEIN
-#define VM_INITIAL_PAGEIN 16
-#endif
-
-#endif /* _MACHINE_VMPARAM_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,314 +0,0 @@
-/******************************************************************************
- * os.h
- *
- * random collection of macros and definition
- */
-
-#ifndef _OS_H_
-#define _OS_H_
-#include <machine/param.h>
-
-#ifndef NULL
-#define NULL (void *)0
-#endif
-
-/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented
- a mechanism by which the user can annotate likely branch directions and
- expect the blocks to be reordered appropriately. Define __builtin_expect
- to nothing for earlier compilers. */
-
-#if __GNUC__ == 2 && __GNUC_MINOR__ < 96
-#define __builtin_expect(x, expected_value) (x)
-#endif
-
-
-
-/*
- * These are the segment descriptors provided for us by the hypervisor.
- * For now, these are hardwired -- guest OSes cannot update the GDT
- * or LDT.
- *
- * It shouldn't be hard to support descriptor-table frobbing -- let me
- * know if the BSD or XP ports require flexibility here.
- */
-
-
-/*
- * these are also defined in hypervisor-if.h but can't be pulled in as
- * they are used in start of day assembly. Need to clean up the .h files
- * a bit more...
- */
-
-#ifndef FLAT_RING1_CS
-#define FLAT_RING1_CS 0x0819
-#define FLAT_RING1_DS 0x0821
-#define FLAT_RING3_CS 0x082b
-#define FLAT_RING3_DS 0x0833
-#endif
-
-#define __KERNEL_CS FLAT_RING1_CS
-#define __KERNEL_DS FLAT_RING1_DS
-
-/* Everything below this point is not included by assembler (.S) files. */
-#ifndef __ASSEMBLY__
-#include <sys/types.h>
-
-#include <machine/hypervisor-ifs.h>
-void printk(const char *fmt, ...);
-
-/* some function prototypes */
-void trap_init(void);
-
-extern int preemptable;
-#define preempt_disable() (preemptable = 0)
-#define preempt_enable() (preemptable = 1)
-#define preempt_enable_no_resched() (preemptable = 1)
-
-
-/*
- * STI/CLI equivalents. These basically set and clear the virtual
- * event_enable flag in teh shared_info structure. Note that when
- * the enable bit is set, there may be pending events to be handled.
- * We may therefore call into do_hypervisor_callback() directly.
- */
-#define likely(x) __builtin_expect((x),1)
-#define unlikely(x) __builtin_expect((x),0)
-
-
-
-#define __cli() \
-do { \
- vcpu_info_t *_vcpu; \
- preempt_disable(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
- _vcpu->evtchn_upcall_mask = 1; \
- preempt_enable_no_resched(); \
- barrier(); \
-} while (0)
-
-#define __sti() \
-do { \
- vcpu_info_t *_vcpu; \
- barrier(); \
- preempt_disable(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
- _vcpu->evtchn_upcall_mask = 0; \
- barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
- force_evtchn_callback(); \
- preempt_enable(); \
-} while (0)
-
-
-#define __save_flags(x) \
-do { \
- vcpu_info_t *vcpu; \
- vcpu = HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
- (x) = _vcpu->evtchn_upcall_mask; \
-} while (0)
-
-#define __restore_flags(x) \
-do { \
- vcpu_info_t *_vcpu; \
- barrier(); \
- preempt_disable(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
- if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
- barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
- force_evtchn_callback(); \
- preempt_enable(); \
- } else \
- preempt_enable_no_resched(); \
-} while (0)
-
-
-#define __save_and_cli(x) \
-do { \
- vcpu_info_t *_vcpu; \
- preempt_disable(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
- (x) = _vcpu->evtchn_upcall_mask; \
- _vcpu->evtchn_upcall_mask = 1; \
- preempt_enable_no_resched(); \
- barrier(); \
-} while (0)
-
-
-#define cli() __cli()
-#define sti() __sti()
-#define save_flags(x) __save_flags(x)
-#define restore_flags(x) __restore_flags(x)
-#define save_and_cli(x) __save_and_cli(x)
-
-#define local_irq_save(x) __save_and_cli(x)
-#define local_irq_restore(x) __restore_flags(x)
-#define local_irq_disable() __cli()
-#define local_irq_enable() __sti()
-
-#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
-#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock));
local_irq_restore((x)); }
-
-#define mb()
-#define rmb()
-#define wmb()
-#ifdef SMP
-#define smp_mb() mb()
-#define smp_rmb() rmb()
-#define smp_wmb() wmb()
-#define smp_read_barrier_depends() read_barrier_depends()
-#define set_mb(var, value) do { xchg(&var, value); } while (0)
-#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while(0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
-
-/* This is a barrier for the compiler only, NOT the processor! */
-#define barrier() __asm__ __volatile__("": : :"memory")
-
-#define LOCK_PREFIX ""
-#define LOCK ""
-#define ADDR (*(volatile long *) addr)
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct { volatile int counter; } atomic_t;
-
-
-
-#define xen_xchg(ptr,v) \
- ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((volatile struct __xchg_dummy *)(x))
-static __inline unsigned long __xchg(unsigned long x, volatile void * ptr,
- int size)
-{
- switch (size) {
- case 1:
- __asm__ __volatile__("xchgb %b0,%1"
- :"=q" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- case 2:
- __asm__ __volatile__("xchgw %w0,%1"
- :"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- case 4:
- __asm__ __volatile__("xchgl %0,%1"
- :"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- }
- return x;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btrl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-static __inline__ int constant_test_bit(int nr, const volatile void * addr)
-{
- return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >>
5])) != 0;
-}
-
-static __inline__ int variable_test_bit(int nr, volatile void * addr)
-{
- int oldbit;
-
- __asm__ __volatile__(
- "btl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit)
- :"m" (ADDR),"Ir" (nr));
- return oldbit;
-}
-
-#define test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- constant_test_bit((nr),(addr)) : \
- variable_test_bit((nr),(addr)))
-
-
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered. See __set_bit()
- * if you do not require the atomic guarantees.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static __inline__ void set_bit(int nr, volatile void * addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btsl %1,%0"
- :"=m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered. However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
- * in order to ensure changes are visible on other processors.
- */
-static __inline__ void clear_bit(int nr, volatile void * addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btrl %1,%0"
- :"=m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ void atomic_inc(atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "incl %0"
- :"=m" (v->counter)
- :"m" (v->counter));
-}
-
-
-#define rdtscll(val) \
- __asm__ __volatile__("rdtsc" : "=A" (val))
-
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _OS_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,53 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- */
-#ifndef _XEN_INTR_H_
-#define _XEN_INTR_H_
-
-/*
-* The flat IRQ space is divided into two regions:
-* 1. A one-to-one mapping of real physical IRQs. This space is only used
-* if we have physical device-access privilege. This region is at the
-* start of the IRQ space so that existing device drivers do not need
-* to be modified to translate physical IRQ numbers into our IRQ space.
-* 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
-* are bound using the provided bind/unbind functions.
-*/
-
-#define PIRQ_BASE 0
-#define NR_PIRQS 128
-
-#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
-#define NR_DYNIRQS 128
-
-#define NR_IRQS (NR_PIRQS + NR_DYNIRQS)
-
-#define pirq_to_irq(_x) ((_x) + PIRQ_BASE)
-#define irq_to_pirq(_x) ((_x) - PIRQ_BASE)
-
-#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE)
-#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE)
-
-/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
-extern int bind_virq_to_irq(int virq);
-extern void unbind_virq_from_irq(int virq);
-extern int bind_evtchn_to_irq(int evtchn);
-extern void unbind_evtchn_from_irq(int evtchn);
-extern int bind_ipi_on_cpu_to_irq(int cpu, int ipi);
-extern void unbind_ipi_on_cpu_from_irq(int cpu, int ipi);
-extern void ap_evtchn_init(int cpu);
-
-static __inline__ int irq_cannonicalize(int irq)
-{
- return (irq == 2) ? 9 : irq;
-}
-
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-
-extern void irq_suspend(void);
-extern void irq_resume(void);
-
-extern void idle_block(void);
-
-
-#endif /* _XEN_INTR_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,87 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004,2005 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _XEN_XENFUNC_H_
-#define _XEN_XENFUNC_H_
-
-#include <machine/xen-os.h>
-#include <machine/hypervisor.h>
-#include <machine/xenpmap.h>
-#include <machine/segments.h>
-#include <sys/pcpu.h>
-#define BKPT __asm__("int3");
-#define XPQ_CALL_DEPTH 5
-#define XPQ_CALL_COUNT 2
-#define PG_PRIV PG_AVAIL3
-typedef struct {
- unsigned long pt_ref;
- unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH];
-} pteinfo_t;
-
-extern pteinfo_t *pteinfo_list;
-#ifdef XENDEBUG_LOW
-#define __PRINTK(x) printk x
-#else
-#define __PRINTK(x)
-#endif
-
-char *xen_setbootenv(char *cmd_line);
-int xen_boothowto(char *envp);
-void load_cr3(uint32_t val);
-void xen_machphys_update(unsigned long, unsigned long);
-void xen_update_descriptor(union descriptor *, union descriptor *);
-void lldt(u_short sel);
-void ap_cpu_initclocks(void);
-
-
-/*
- * Invalidate a patricular VA on all cpus
- *
- * N.B. Made these global for external loadable modules to reference.
- */
-static __inline void
-invlpg(u_int addr)
-{
- xen_invlpg(addr);
-}
-
-static __inline void
-invltlb(void)
-{
- xen_tlb_flush();
-
-}
-
-#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp);
panic("%s: %s:%d", #exp, __FILE__, __LINE__);}
-
-#endif /* _XEN_XENFUNC_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,180 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004,2005 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _XEN_XENPMAP_H_
-#define _XEN_XENPMAP_H_
-#include <machine/xenvar.h>
-void xen_invlpg(vm_offset_t);
-void xen_queue_pt_update(vm_paddr_t, vm_paddr_t);
-void xen_pt_switch(uint32_t);
-void xen_set_ldt(unsigned long, unsigned long);
-void xen_tlb_flush(void);
-void xen_pgd_pin(unsigned long);
-void xen_pgd_unpin(unsigned long);
-void xen_pt_pin(unsigned long);
-void xen_pt_unpin(unsigned long);
-void xen_flush_queue(void);
-void pmap_ref(pt_entry_t *pte, unsigned long ma);
-
-
-#ifdef PMAP_DEBUG
-#define PMAP_REF pmap_ref
-#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
-#define PMAP_MARK_PRIV pmap_mark_privileged
-#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
-#else
-#define PMAP_MARK_PRIV(a)
-#define PMAP_MARK_UNPRIV(a)
-#define PMAP_REF(a, b)
-#define PMAP_DEC_REF_PAGE(a)
-#endif
-
-#if 0
-#define WRITABLE_PAGETABLES
-#endif
-#define ALWAYS_SYNC 0
-
-#ifdef PT_DEBUG
-#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__)
-#else
-#define PT_LOG()
-#endif
-
-#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */
-
-#define SH_PD_SET_VA 1
-#define SH_PD_SET_VA_MA 2
-#define SH_PD_SET_VA_CLEAR 3
-
-struct pmap;
-void pd_set(struct pmap *pmap, vm_paddr_t *ptr, vm_paddr_t val, int type);
-
-#define PT_GET(_ptp) \
- (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : *(_ptp))
-
-#ifdef WRITABLE_PAGETABLES
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- PT_LOG(); \
- *(_ptp) = xpmap_ptom((_npte)); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- PT_LOG(); \
- *(_ptp) = (_npte); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- PT_LOG(); \
- *(_ptp) = 0; \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-
-#else /* !WRITABLE_PAGETABLES */
-
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- xen_queue_pt_update(vtomach(_ptp), \
- xpmap_ptom(_npte)); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- xen_queue_pt_update(vtomach(_ptp), _npte); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- xen_queue_pt_update(vtomach(_ptp), 0); \
- if (sync || ALWAYS_SYNC) \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#endif
-
-#define PT_SET_MA(_va, _ma) \
- HYPERVISOR_update_va_mapping(((unsigned long)_va), \
- ((unsigned long)_ma), \
- UVMF_INVLPG| UVMF_LOCAL)\
-
-#define PT_UPDATES_FLUSH() do { \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-
-static __inline uint32_t
-xpmap_mtop(uint32_t mpa)
-{
- return (((xen_machine_phys[(mpa >> PAGE_SHIFT)]) << PAGE_SHIFT)
- | (mpa & ~PG_FRAME));
-}
-
-static __inline vm_paddr_t
-xpmap_ptom(uint32_t ppa)
-{
- return phystomach(ppa) | (ppa & ~PG_FRAME);
-}
-
-#endif /* _XEN_XENPMAP_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/include/xenvar.h
--- a/freebsd-5.3-xen-sparse/i386-xen/include/xenvar.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,30 +0,0 @@
-#ifndef XENVAR_H_
-#define XENVAR_H_
-
-#define XBOOTUP 0x1
-#define XPMAP 0x2
-extern int xendebug_flags;
-#ifndef NOXENDEBUG
-#define XENPRINTF printk
-#else
-#define XENPRINTF(x...)
-#endif
-extern unsigned long *xen_phys_machine;
-#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__,
__LINE__, __FUNCTION__)
-#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__,
__LINE__, __FUNCTION__)
-#define TRACE_DEBUG(argflags, _f, _a...) \
-if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n",
__FILE__, __LINE__, ## _a);
-
-extern unsigned long *xen_machine_phys;
-#define PTOM(i) (((unsigned long *)xen_phys_machine)[i])
-#define phystomach(pa) ((((unsigned long *)xen_phys_machine)[(pa >>
PAGE_SHIFT)]) << PAGE_SHIFT)
-void xpq_init(void);
-
-struct sockaddr_in;
-
-int xen_setnfshandle(void);
-int setinaddr(struct sockaddr_in *addr, char *ipstr);
-
-#define RB_GDB_PAUSE RB_RESERVED1
-
-#endif
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c
--- a/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c Sun Dec
4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,1048 +0,0 @@
-/*-
- * All rights reserved.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
-
-/*
- * XenoBSD block device driver
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <vm/vm.h>
-#include <vm/pmap.h>
-
-#include <sys/bio.h>
-#include <sys/bus.h>
-#include <sys/conf.h>
-
-#include <machine/bus.h>
-#include <sys/rman.h>
-#include <machine/resource.h>
-#include <machine/intr_machdep.h>
-#include <machine/vmparam.h>
-
-#include <machine/hypervisor.h>
-#include <machine/hypervisor-ifs.h>
-#include <machine/xen-os.h>
-#include <machine/xen_intr.h>
-#include <machine/evtchn.h>
-
-#include <geom/geom_disk.h>
-#include <machine/ctrl_if.h>
-#include <machine/xenfunc.h>
-
-
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-#include <machine/gnttab.h>
-#endif
-
-/* prototypes */
-struct xb_softc;
-static void xb_startio(struct xb_softc *sc);
-static void xb_vbdinit(void);
-static void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
-static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
-static void blkif_control_probe_send(blkif_request_t *req, blkif_response_t
*rsp, unsigned long address);
-
-struct xb_softc {
- device_t xb_dev;
- struct disk xb_disk; /* disk params */
- struct bio_queue_head xb_bioq; /* sort queue */
- struct resource *xb_irq;
- void *xb_resp_handler;
- int xb_unit;
- int xb_flags;
- struct xb_softc *xb_next_blocked;
-#define XB_OPEN (1<<0) /* drive is open (can't shut down) */
-};
-
-/* Control whether runtime update of vbds is enabled. */
-#define ENABLE_VBD_UPDATE 1
-
-#if ENABLE_VBD_UPDATE
-static void vbd_update(void);
-#else
-static void vbd_update(void){};
-#endif
-
-#define BLKIF_STATE_CLOSED 0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED 2
-
-static char *blkif_state_name[] = {
- [BLKIF_STATE_CLOSED] = "closed",
- [BLKIF_STATE_DISCONNECTED] = "disconnected",
- [BLKIF_STATE_CONNECTED] = "connected",
-};
-
-static char * blkif_status_name[] = {
- [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
- [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
- [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
- [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
-};
-
-#define WPRINTK(fmt, args...) printk("[XEN] " fmt, ##args)
-
-static int blkif_handle;
-static unsigned int blkif_state = BLKIF_STATE_CLOSED;
-static unsigned int blkif_evtchn;
-static unsigned int blkif_irq;
-
-static int blkif_control_rsp_valid;
-static blkif_response_t blkif_control_rsp;
-
-static blkif_front_ring_t blk_ring;
-
-#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-static domid_t rdomid = 0;
-static grant_ref_t gref_head, gref_terminal;
-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
- (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
-#endif
-
-static struct xb_softc *xb_kick_pending_head = NULL;
-static struct xb_softc *xb_kick_pending_tail = NULL;
-static struct mtx blkif_io_block_lock;
-
-static unsigned long rec_ring_free;
-blkif_request_t rec_ring[BLK_RING_SIZE];
-
-/* XXX move to xb_vbd.c when VBD update support is added */
-#define MAX_VBDS 64
-static vdisk_t xb_diskinfo[MAX_VBDS];
-static int xb_ndisks;
-
-#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */
-#define XBD_SECTOR_SHFT 9
-
-static unsigned int xb_kick_pending;
-
-static struct mtx blkif_io_lock;
-
-
-static int xb_recovery = 0; /* "Recovery in progress" flag.
Protected
- * by the blkif_io_lock */
-
-
-void blkif_completion(blkif_request_t *req);
-void xb_response_intr(void *);
-
-/* XXX: This isn't supported in FreeBSD, so ignore it for now. */
-#define TASK_UNINTERRUPTIBLE 0
-
-static inline int
-GET_ID_FROM_FREELIST( void )
-{
- unsigned long free = rec_ring_free;
-
- KASSERT(free <= BLK_RING_SIZE, ("free %lu > RING_SIZE", free));
-
- rec_ring_free = rec_ring[free].id;
-
- rec_ring[free].id = 0x0fffffee; /* debug */
-
- return free;
-}
-
-static inline void
-ADD_ID_TO_FREELIST( unsigned long id )
-{
- rec_ring[id].id = rec_ring_free;
- rec_ring_free = id;
-}
-
-static inline void
-translate_req_to_pfn(blkif_request_t *xreq,
- blkif_request_t *req)
-{
- int i;
-
- xreq->operation = req->operation;
- xreq->nr_segments = req->nr_segments;
- xreq->device = req->device;
- /* preserve id */
- xreq->sector_number = req->sector_number;
-
- for ( i = 0; i < req->nr_segments; i++ ){
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- xreq->frame_and_sects[i] = req->frame_and_sects[i];
-#else
- xreq->frame_and_sects[i] = xpmap_mtop(req->frame_and_sects[i]);
-#endif
- }
-}
-
-static inline void translate_req_to_mfn(blkif_request_t *xreq,
- blkif_request_t *req)
-{
- int i;
-
- xreq->operation = req->operation;
- xreq->nr_segments = req->nr_segments;
- xreq->device = req->device;
- xreq->id = req->id; /* copy id (unlike above) */
- xreq->sector_number = req->sector_number;
-
- for ( i = 0; i < req->nr_segments; i++ ){
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- xreq->frame_and_sects[i] = req->frame_and_sects[i];
-#else
- xreq->frame_and_sects[i] = xpmap_ptom(req->frame_and_sects[i]);
-#endif
- }
-}
-
-
-static inline void flush_requests(void)
-{
- RING_PUSH_REQUESTS(&blk_ring);
- notify_via_evtchn(blkif_evtchn);
-}
-
-
-#if ENABLE_VBD_UPDATE
-static void vbd_update()
-{
- XENPRINTF(">\n");
- XENPRINTF("<\n");
-}
-#endif /* ENABLE_VBD_UPDATE */
-
-void
-xb_response_intr(void *xsc)
-{
- struct xb_softc *sc = NULL;
- struct bio *bp;
- blkif_response_t *bret;
- RING_IDX i, rp;
- unsigned long flags;
-
- mtx_lock_irqsave(&blkif_io_lock, flags);
-
- if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
- unlikely(xb_recovery) ) {
- mtx_unlock_irqrestore(&blkif_io_lock, flags);
- return;
- }
-
- rp = blk_ring.sring->rsp_prod;
- rmb(); /* Ensure we see queued responses up to 'rp'. */
-
- /* sometimes we seem to lose i/o. stay in the interrupt handler while
- * there is stuff to process: continually recheck the response producer.
- */
- process_rcvd:
- for ( i = blk_ring.rsp_cons; i != (rp = blk_ring.sring->rsp_prod); i++ ) {
- unsigned long id;
- bret = RING_GET_RESPONSE(&blk_ring, i);
-
- id = bret->id;
- bp = (struct bio *)rec_ring[id].id;
-
- blkif_completion(&rec_ring[id]);
-
- ADD_ID_TO_FREELIST(id); /* overwrites req */
-
- switch ( bret->operation ) {
- case BLKIF_OP_READ:
- /* had an unaligned buffer that needs to be copied */
- if (bp->bio_driver1)
- bcopy(bp->bio_data, bp->bio_driver1, bp->bio_bcount);
- case BLKIF_OP_WRITE:
-
- /* free the copy buffer */
- if (bp->bio_driver1) {
- free(bp->bio_data, M_DEVBUF);
- bp->bio_data = bp->bio_driver1;
- bp->bio_driver1 = NULL;
- }
-
- if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
- XENPRINTF("Bad return from blkdev data request: %x\n",
- bret->status);
- bp->bio_flags |= BIO_ERROR;
- }
-
- sc = (struct xb_softc *)bp->bio_disk->d_drv1;
-
- if (bp->bio_flags & BIO_ERROR)
- bp->bio_error = EIO;
- else
- bp->bio_resid = 0;
-
- biodone(bp);
- break;
- case BLKIF_OP_PROBE:
- memcpy(&blkif_control_rsp, bret, sizeof(*bret));
- blkif_control_rsp_valid = 1;
- break;
- default:
- panic("received invalid operation");
- break;
- }
- }
-
- blk_ring.rsp_cons = i;
-
- if (xb_kick_pending) {
- unsigned long flags;
- mtx_lock_irqsave(&blkif_io_block_lock, flags);
- xb_kick_pending = FALSE;
- /* Run as long as there are blocked devs or queue fills again */
- while ((NULL != xb_kick_pending_head) && (FALSE == xb_kick_pending)) {
- struct xb_softc *xb_cur = xb_kick_pending_head;
- xb_kick_pending_head = xb_cur->xb_next_blocked;
- if(NULL == xb_kick_pending_head) {
- xb_kick_pending_tail = NULL;
- }
- xb_cur->xb_next_blocked = NULL;
- mtx_unlock_irqrestore(&blkif_io_block_lock, flags);
- xb_startio(xb_cur);
- mtx_lock_irqsave(&blkif_io_block_lock, flags);
- }
- mtx_unlock_irqrestore(&blkif_io_block_lock, flags);
-
- if(blk_ring.rsp_cons != blk_ring.sring->rsp_prod) {
- /* Consume those, too */
- goto process_rcvd;
- }
- }
-
- mtx_unlock_irqrestore(&blkif_io_lock, flags);
-}
-
-static int
-xb_open(struct disk *dp)
-{
- struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
-
- if (sc == NULL) {
- printk("xb%d: not found", sc->xb_unit);
- return (ENXIO);
- }
-
- /* block dev not active */
- if (blkif_state != BLKIF_STATE_CONNECTED) {
- printk("xb%d: bad state: %dn", sc->xb_unit, blkif_state);
- return(ENXIO);
- }
-
- sc->xb_flags |= XB_OPEN;
- return (0);
-}
-
-static int
-xb_close(struct disk *dp)
-{
- struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
-
- if (sc == NULL)
- return (ENXIO);
- sc->xb_flags &= ~XB_OPEN;
- return (0);
-}
-
-static int
-xb_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
-{
- struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
-
- if (sc == NULL)
- return (ENXIO);
-
- return (ENOTTY);
-}
-
-/*
- * Dequeue buffers and place them in the shared communication ring.
- * Return when no more requests can be accepted or all buffers have
- * been queued.
- *
- * Signal XEN once the ring has been filled out.
- */
-static void
-xb_startio(struct xb_softc *sc)
-{
- struct bio *bp;
- unsigned long buffer_ma;
- blkif_request_t *req;
- int s, queued = 0;
- unsigned long id;
- unsigned int fsect, lsect;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- int ref;
-#endif
-
-
- if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
- return;
-
- s = splbio();
-
- for (bp = bioq_first(&sc->xb_bioq);
- bp && !RING_FULL(&blk_ring);
- blk_ring.req_prod_pvt++, queued++, bp = bioq_first(&sc->xb_bioq)) {
-
- /* Check if the buffer is properly aligned */
- if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
- int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
- PAGE_SIZE;
- caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
- M_WAITOK);
- caddr_t alignbuf = (char *)roundup2((u_long)newbuf, align);
-
- /* save a copy of the current buffer */
- bp->bio_driver1 = bp->bio_data;
-
- /* Copy the data for a write */
- if (bp->bio_cmd == BIO_WRITE)
- bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
- bp->bio_data = alignbuf;
- }
-
- bioq_remove(&sc->xb_bioq, bp);
- buffer_ma = vtomach(bp->bio_data);
- fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
- lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
-
- KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
- ("XEN buffer must be sector aligned"));
- KASSERT(lsect <= 7,
- ("XEN disk driver data cannot cross a page boundary"));
-
- buffer_ma &= ~PAGE_MASK;
-
- /* Fill out a communications ring structure. */
- req = RING_GET_REQUEST(&blk_ring,
- blk_ring.req_prod_pvt);
- id = GET_ID_FROM_FREELIST();
- rec_ring[id].id= (unsigned long)bp;
-
- req->id = id;
- req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
- BLKIF_OP_WRITE;
-
- req->sector_number= (blkif_sector_t)bp->bio_pblkno;
- req->device = xb_diskinfo[sc->xb_unit].device;
-
- req->nr_segments = 1; /* not doing scatter/gather since buffer
- * chaining is not supported.
- */
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- /* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
- KASSERT( ref != -ENOSPC, ("grant_reference failed") );
-
- gnttab_grant_foreign_access_ref(
- ref,
- rdomid,
- buffer_ma >> PAGE_SHIFT,
- req->operation & 1 ); /* ??? */
-
- req->frame_and_sects[0] =
- (((uint32_t) ref) << 16) | (fsect << 3) | lsect;
-#else
- /*
- * upper bits represent the machine address of the buffer and the
- * lower bits is the number of sectors to be read/written.
- */
- req->frame_and_sects[0] = buffer_ma | (fsect << 3) | lsect;
-#endif
- /* Keep a private copy so we can reissue requests when recovering. */
- translate_req_to_pfn( &rec_ring[id], req);
-
- }
-
- if (RING_FULL(&blk_ring)) {
- unsigned long flags;
- mtx_lock_irqsave(&blkif_io_block_lock, flags);
- xb_kick_pending = TRUE;
- /* If we are not already on blocked list, add us */
- if((NULL == sc->xb_next_blocked) && (xb_kick_pending_tail != sc)) {
-
- if(NULL == xb_kick_pending_head) {
- xb_kick_pending_head = xb_kick_pending_tail = sc;
- } else {
- xb_kick_pending_tail->xb_next_blocked = sc;
- xb_kick_pending_tail = sc;
- }
- }
- mtx_unlock_irqrestore(&blkif_io_block_lock, flags);
- }
-
- if (queued != 0)
- flush_requests();
- splx(s);
-}
-
-/*
- * Read/write routine for a buffer. Finds the proper unit, place it on
- * the sortq and kick the controller.
- */
-static void
-xb_strategy(struct bio *bp)
-{
- struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
- int s;
-
- /* bogus disk? */
- if (sc == NULL) {
- bp->bio_error = EINVAL;
- bp->bio_flags |= BIO_ERROR;
- goto bad;
- }
-
- s = splbio();
- /*
- * Place it in the queue of disk activities for this disk
- */
- bioq_disksort(&sc->xb_bioq, bp);
- splx(s);
-
- xb_startio(sc);
- return;
-
- bad:
- /*
- * Correctly set the bio to indicate a failed tranfer.
- */
- bp->bio_resid = bp->bio_bcount;
- biodone(bp);
- return;
-}
-
-
-static int
-xb_create(int unit)
-{
- struct xb_softc *sc;
- int error = 0;
-
- sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK);
- sc->xb_unit = unit;
- sc->xb_next_blocked = NULL;
-
- memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
- sc->xb_disk.d_unit = unit;
- sc->xb_disk.d_open = xb_open;
- sc->xb_disk.d_close = xb_close;
- sc->xb_disk.d_ioctl = xb_ioctl;
- sc->xb_disk.d_strategy = xb_strategy;
- sc->xb_disk.d_name = "xbd";
- sc->xb_disk.d_drv1 = sc;
- sc->xb_disk.d_sectorsize = XBD_SECTOR_SIZE;
- sc->xb_disk.d_mediasize = xb_diskinfo[sc->xb_unit].capacity
- << XBD_SECTOR_SHFT;
-#if 0
- sc->xb_disk.d_maxsize = DFLTPHYS;
-#else /* XXX: xen can't handle large single i/o requests */
- sc->xb_disk.d_maxsize = 4096;
-#endif
-
- XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
- xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
- sc->xb_disk.d_mediasize);
-
- disk_create(&sc->xb_disk, DISK_VERSION_00);
- bioq_init(&sc->xb_bioq);
-
- return error;
-}
-
-/* XXX move to xb_vbd.c when vbd update support is added */
-static void
-xb_vbdinit(void)
-{
- int i;
- blkif_request_t req;
- blkif_response_t rsp;
- vdisk_t *buf;
-
- buf = (vdisk_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
-
- /* Probe for disk information. */
- memset(&req, 0, sizeof(req));
- req.operation = BLKIF_OP_PROBE;
- req.nr_segments = 1;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- blkif_control_probe_send(&req, &rsp,
- (unsigned long)(vtomach(buf)));
-
-#else
- req.frame_and_sects[0] = vtomach(buf) | 7;
- blkif_control_send(&req, &rsp);
-#endif
- if ( rsp.status <= 0 ) {
- printk("xb_identify: Could not identify disks (%d)\n", rsp.status);
- free(buf, M_DEVBUF);
- return;
- }
-
- if ((xb_ndisks = rsp.status) > MAX_VBDS)
- xb_ndisks = MAX_VBDS;
-
- memcpy(xb_diskinfo, buf, xb_ndisks * sizeof(vdisk_t));
-
- for (i = 0; i < xb_ndisks; i++)
- xb_create(i);
-
- free(buf, M_DEVBUF);
-}
-
-
-/***************************** COMMON CODE *******************************/
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-static void
-blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp,
- unsigned long address)
-{
- int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
- KASSERT( ref != -ENOSPC, ("couldn't get grant reference") );
-
- gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 );
-
- req->frame_and_sects[0] = (((uint32_t) ref) << 16) | 7;
-
- blkif_control_send(req, rsp);
-}
-#endif
-
-void
-blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
-{
- unsigned long flags, id;
- blkif_request_t *req_d;
-
- retry:
- while ( RING_FULL(&blk_ring) )
- {
- tsleep( req, PWAIT | PCATCH, "blkif", hz);
- }
-
- mtx_lock_irqsave(&blkif_io_lock, flags);
- if ( RING_FULL(&blk_ring) )
- {
- mtx_unlock_irqrestore(&blkif_io_lock, flags);
- goto retry;
- }
-
- req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
- *req_d = *req;
-
- id = GET_ID_FROM_FREELIST();
- req_d->id = id;
- rec_ring[id].id = (unsigned long) req;
-
- translate_req_to_pfn( &rec_ring[id], req );
-
- blk_ring.req_prod_pvt++;
- flush_requests();
-
- mtx_unlock_irqrestore(&blkif_io_lock, flags);
-
- while ( !blkif_control_rsp_valid )
- {
- tsleep( &blkif_control_rsp_valid, PWAIT | PCATCH, "blkif", hz);
- }
-
- memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
- blkif_control_rsp_valid = 0;
-}
-
-
-/* Send a driver status notification to the domain controller. */
-static void
-send_driver_status(int ok)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_BLKIF_FE,
- .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
- .length = sizeof(blkif_fe_driver_status_t),
- };
- blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
-
- msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-/* Tell the controller to bring up the interface. */
-static void
-blkif_send_interface_connect(void)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_BLKIF_FE,
- .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
- .length = sizeof(blkif_fe_interface_connect_t),
- };
- blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
-
- msg->handle = 0;
- msg->shmem_frame = (vtomach(blk_ring.sring) >> PAGE_SHIFT);
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-static void
-blkif_free(void)
-{
-
- unsigned long flags;
-
- printk("[XEN] Recovering virtual block device driver\n");
-
- /* Prevent new requests being issued until we fix things up. */
- mtx_lock_irqsave(&blkif_io_lock, flags);
- xb_recovery = 1;
- blkif_state = BLKIF_STATE_DISCONNECTED;
- mtx_unlock_irqrestore(&blkif_io_lock, flags);
-
- /* Free resources associated with old device channel. */
- if (blk_ring.sring != NULL) {
- free(blk_ring.sring, M_DEVBUF);
- blk_ring.sring = NULL;
- }
- /* free_irq(blkif_irq, NULL);*/
- blkif_irq = 0;
-
- unbind_evtchn_from_irq(blkif_evtchn);
- blkif_evtchn = 0;
-}
-
-static void
-blkif_close(void)
-{
-}
-
-/* Move from CLOSED to DISCONNECTED state. */
-static void
-blkif_disconnect(void)
-{
- if (blk_ring.sring) free(blk_ring.sring, M_DEVBUF);
- blk_ring.sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
- SHARED_RING_INIT(blk_ring.sring);
- FRONT_RING_INIT(&blk_ring, blk_ring.sring, PAGE_SIZE);
- blkif_state = BLKIF_STATE_DISCONNECTED;
- blkif_send_interface_connect();
-}
-
-static void
-blkif_reset(void)
-{
- printk("[XEN] Recovering virtual block device driver\n");
- blkif_free();
- blkif_disconnect();
-}
-
-static void
-blkif_recover(void)
-{
-
- int i;
- blkif_request_t *req;
-
- /* Hmm, requests might be re-ordered when we re-issue them.
- * This will need to be fixed once we have barriers */
-
- /* Stage 1 : Find active and move to safety. */
- for ( i = 0; i < BLK_RING_SIZE; i++ ) {
- if ( rec_ring[i].id >= KERNBASE ) {
- req = RING_GET_REQUEST(&blk_ring,
- blk_ring.req_prod_pvt);
- translate_req_to_mfn(req, &rec_ring[i]);
- blk_ring.req_prod_pvt++;
- }
- }
-
- printk("blkfront: recovered %d descriptors\n",blk_ring.req_prod_pvt);
-
- /* Stage 2 : Set up shadow list. */
- for ( i = 0; i < blk_ring.req_prod_pvt; i++ ) {
- req = RING_GET_REQUEST(&blk_ring, i);
- rec_ring[i].id = req->id;
- req->id = i;
- translate_req_to_pfn(&rec_ring[i], req);
- }
-
- /* Stage 3 : Set up free list. */
- for ( ; i < BLK_RING_SIZE; i++ ){
- rec_ring[i].id = i+1;
- }
- rec_ring_free = blk_ring.req_prod_pvt;
- rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff;
-
- /* blk_ring.req_prod will be set when we flush_requests().*/
- wmb();
-
- /* Switch off recovery mode, using a memory barrier to ensure that
- * it's seen before we flush requests - we don't want to miss any
- * interrupts. */
- xb_recovery = 0;
- wmb();
-
- /* Kicks things back into life. */
- flush_requests();
-
- /* Now safe to left other peope use interface. */
- blkif_state = BLKIF_STATE_CONNECTED;
-}
-
-static void
-blkif_connect(blkif_fe_interface_status_t *status)
-{
- int err = 0;
-
- blkif_evtchn = status->evtchn;
- blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- rdomid = status->domid;
-#endif
-
-
- err = intr_add_handler("xbd", blkif_irq,
- (driver_intr_t *)xb_response_intr, NULL,
- INTR_TYPE_BIO | INTR_MPSAFE, NULL);
- if(err){
- printk("[XEN] blkfront request_irq failed (err=%d)\n", err);
- return;
- }
-
- if ( xb_recovery ) {
- blkif_recover();
- } else {
- /* Probe for discs attached to the interface. */
- xb_vbdinit();
-
- /* XXX: transition state after probe */
- blkif_state = BLKIF_STATE_CONNECTED;
- }
-
- /* Kick pending requests. */
-#if 0 /* XXX: figure out sortq logic */
- mtx_lock_irq(&blkif_io_lock);
- kick_pending_request_queues();
- mtx_unlock_irq(&blkif_io_lock);
-#endif
-}
-
-static void
-unexpected(blkif_fe_interface_status_t *status)
-{
- WPRINTK(" Unexpected blkif status %s in state %s\n",
- blkif_status_name[status->status],
- blkif_state_name[blkif_state]);
-}
-
-static void
-blkif_status(blkif_fe_interface_status_t *status)
-{
- if (status->handle != blkif_handle) {
- WPRINTK(" Invalid blkif: handle=%u", status->handle);
- return;
- }
-
- switch (status->status) {
-
- case BLKIF_INTERFACE_STATUS_CLOSED:
- switch(blkif_state){
- case BLKIF_STATE_CLOSED:
- unexpected(status);
- break;
- case BLKIF_STATE_DISCONNECTED:
- case BLKIF_STATE_CONNECTED:
- unexpected(status);
- blkif_close();
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_DISCONNECTED:
- switch(blkif_state){
- case BLKIF_STATE_CLOSED:
- blkif_disconnect();
- break;
- case BLKIF_STATE_DISCONNECTED:
- case BLKIF_STATE_CONNECTED:
- unexpected(status);
- blkif_reset();
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CONNECTED:
- switch(blkif_state){
- case BLKIF_STATE_CLOSED:
- unexpected(status);
- blkif_disconnect();
- blkif_connect(status);
- break;
- case BLKIF_STATE_DISCONNECTED:
- blkif_connect(status);
- break;
- case BLKIF_STATE_CONNECTED:
- unexpected(status);
- blkif_connect(status);
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CHANGED:
- switch(blkif_state){
- case BLKIF_STATE_CLOSED:
- case BLKIF_STATE_DISCONNECTED:
- unexpected(status);
- break;
- case BLKIF_STATE_CONNECTED:
- vbd_update();
- break;
- }
- break;
-
- default:
- WPRINTK("Invalid blkif status: %d\n", status->status);
- break;
- }
-}
-
-
-static void
-blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- switch ( msg->subtype )
- {
- case CMSG_BLKIF_FE_INTERFACE_STATUS:
- if ( msg->length != sizeof(blkif_fe_interface_status_t) )
- goto parse_error;
- blkif_status((blkif_fe_interface_status_t *)
- &msg->msg[0]);
- break;
- default:
- goto parse_error;
- }
-
- ctrl_if_send_response(msg);
- return;
-
- parse_error:
- msg->length = 0;
- ctrl_if_send_response(msg);
-}
-
-static int
-wait_for_blkif(void)
-{
- int err = 0;
- int i;
- send_driver_status(1);
-
- /*
- * We should read 'nr_interfaces' from response message and wait
- * for notifications before proceeding. For now we assume that we
- * will be notified of exactly one interface.
- */
- for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*hz); i++ )
- {
- tsleep(&blkif_state, PWAIT | PCATCH, "blkif", hz);
- }
-
- if (blkif_state != BLKIF_STATE_CONNECTED){
- printk("[XEN] Timeout connecting block device driver!\n");
- err = -ENOSYS;
- }
- return err;
-}
-
-
-static void
-xb_init(void *unused)
-{
- int i;
-
- printk("[XEN] Initialising virtual block device driver\n");
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- if ( 0 > gnttab_alloc_grant_references( MAXIMUM_OUTSTANDING_BLOCK_REQS,
- &gref_head, &gref_terminal ))
- return;
- printk("Blkif frontend is using grant tables.\n");
-#endif
-
- xb_kick_pending = FALSE;
- xb_kick_pending_head = NULL;
- xb_kick_pending_tail = NULL;
-
- rec_ring_free = 0;
- for (i = 0; i < BLK_RING_SIZE; i++) {
- rec_ring[i].id = i+1;
- }
- rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff;
-
- (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, 0);
-
- wait_for_blkif();
-}
-
-#if 0 /* XXX not yet */
-void
-blkdev_suspend(void)
-{
-}
-
-void
-blkdev_resume(void)
-{
- send_driver_status(1);
-}
-#endif
-
-void
-blkif_completion(blkif_request_t *req)
-{
- int i;
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- grant_ref_t gref;
-
- for ( i = 0; i < req->nr_segments; i++ )
- {
- gref = blkif_gref_from_fas(req->frame_and_sects[i]);
- gnttab_release_grant_reference(&gref_head, gref);
- }
-#else
- /* This is a hack to get the dirty logging bits set */
- switch ( req->operation )
- {
- case BLKIF_OP_READ:
- for ( i = 0; i < req->nr_segments; i++ )
- {
- unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
- unsigned long mfn = xen_phys_machine[pfn];
- xen_machphys_update(mfn, pfn);
- }
- break;
- }
-#endif
-}
-MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_SPIN | MTX_NOWITNESS); /*
XXX how does one enroll a lock? */
- MTX_SYSINIT(ioreq_block, &blkif_io_block_lock, "BIO BLOCK LOCK", MTX_SPIN |
MTX_NOWITNESS);
-SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_ANY, xb_init, NULL)
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/xen/char/console.c
--- a/freebsd-5.3-xen-sparse/i386-xen/xen/char/console.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,536 +0,0 @@
-#include <sys/cdefs.h>
-
-
-#include <sys/param.h>
-#include <sys/module.h>
-#include <sys/systm.h>
-#include <sys/consio.h>
-#include <sys/proc.h>
-#include <sys/uio.h>
-#include <sys/tty.h>
-#include <sys/systm.h>
-#include <sys/taskqueue.h>
-#include <sys/conf.h>
-#include <sys/kernel.h>
-#include <sys/bus.h>
-#include <machine/stdarg.h>
-#include <machine/xen-os.h>
-#include <machine/hypervisor.h>
-#include <machine/ctrl_if.h>
-#include <sys/cons.h>
-
-#include "opt_ddb.h"
-#ifdef DDB
-#include <ddb/ddb.h>
-#endif
-
-static char driver_name[] = "xc";
-devclass_t xc_devclass;
-static void xcstart (struct tty *);
-static int xcparam (struct tty *, struct termios *);
-static void xcstop (struct tty *, int);
-static void xc_timeout(void *);
-static void xencons_tx_flush_task_routine(void *,int );
-static void __xencons_tx_flush(void);
-static void xencons_rx(ctrl_msg_t *msg,unsigned long id);
-static boolean_t xcons_putc(int c);
-
-/* switch console so that shutdown can occur gracefully */
-static void xc_shutdown(void *arg, int howto);
-static int xc_mute;
-
-void xcons_force_flush(void);
-
-static cn_probe_t xccnprobe;
-static cn_init_t xccninit;
-static cn_getc_t xccngetc;
-static cn_putc_t xccnputc;
-static cn_checkc_t xccncheckc;
-
-#define XC_POLLTIME (hz/10)
-
-CONS_DRIVER(xc, xccnprobe, xccninit, NULL, xccngetc,
- xccncheckc, xccnputc, NULL);
-
-static int xen_console_up;
-static boolean_t xc_tx_task_queued;
-static boolean_t xc_start_needed;
-static struct callout xc_callout;
-struct mtx cn_mtx;
-
-#define RBUF_SIZE 1024
-#define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1))
-#define WBUF_SIZE 4096
-#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1))
-static char wbuf[WBUF_SIZE];
-static char rbuf[RBUF_SIZE];
-static int rc, rp;
-static int cnsl_evt_reg;
-static unsigned int wc, wp; /* write_cons, write_prod */
-static struct task xencons_tx_flush_task = {
{NULL},0,0,&xencons_tx_flush_task_routine,NULL };
-
-
-#define CDEV_MAJOR 12
-#define XCUNIT(x) (minor(x))
-#define ISTTYOPEN(tp) ((tp) && ((tp)->t_state & TS_ISOPEN))
-#define CN_LOCK_INIT(x, _name) \
- mtx_init(&x, _name, _name, MTX_SPIN)
-#define CN_LOCK(l, f) mtx_lock_irqsave(&(l), (f))
-#define CN_UNLOCK(l, f) mtx_unlock_irqrestore(&(l), (f))
-#define CN_LOCK_ASSERT(x) mtx_assert(&x, MA_OWNED)
-#define CN_LOCK_DESTROY(x) mtx_destroy(&x)
-
-
-static struct tty *xccons;
-
-struct xc_softc {
- int xc_unit;
- struct cdev *xc_dev;
-};
-
-
-static d_open_t xcopen;
-static d_close_t xcclose;
-static d_ioctl_t xcioctl;
-
-static struct cdevsw xc_cdevsw = {
- /* version */ D_VERSION_00,
- /* maj */ CDEV_MAJOR,
- /* flags */ D_TTY | D_NEEDGIANT,
- /* name */ driver_name,
-
- /* open */ xcopen,
- /* fdopen */ 0,
- /* close */ xcclose,
- /* read */ ttyread,
- /* write */ ttywrite,
- /* ioctl */ xcioctl,
- /* poll */ ttypoll,
- /* mmap */ 0,
- /* strategy */ 0,
- /* dump */ 0,
- /* kqfilter */ ttykqfilter
-};
-
-static void
-xccnprobe(struct consdev *cp)
-{
- cp->cn_pri = CN_REMOTE;
- cp->cn_tp = xccons;
- sprintf(cp->cn_name, "%s0", driver_name);
-}
-
-
-static void
-xccninit(struct consdev *cp)
-{
- CN_LOCK_INIT(cn_mtx,"XCONS LOCK");
-
-}
-int
-xccngetc(struct consdev *dev)
-{
- int c;
- if (xc_mute)
- return 0;
- do {
- if ((c = xccncheckc(dev)) == -1) {
- /* polling without sleeping in Xen doesn't work well.
- * Sleeping gives other things like clock a chance to
- * run
- */
- tsleep(&cn_mtx, PWAIT | PCATCH, "console sleep",
- XC_POLLTIME);
- }
- } while( c == -1 );
- return c;
-}
-
-int
-xccncheckc(struct consdev *dev)
-{
- int ret = (xc_mute ? 0 : -1);
- int flags;
- CN_LOCK(cn_mtx, flags);
- if ( (rp - rc) ){
- /* we need to return only one char */
- ret = (int)rbuf[RBUF_MASK(rc)];
- rc++;
- }
- CN_UNLOCK(cn_mtx, flags);
- return(ret);
-}
-
-static void
-xccnputc(struct consdev *dev, int c)
-{
- int flags;
- CN_LOCK(cn_mtx, flags);
- xcons_putc(c);
- CN_UNLOCK(cn_mtx, flags);
-}
-
-static boolean_t
-xcons_putc(int c)
-{
- int force_flush = xc_mute ||
-#ifdef DDB
- db_active ||
-#endif
- panicstr; /* we're not gonna recover, so force
- * flush
- */
-
- if ( (wp-wc) < (WBUF_SIZE-1) ){
- if ( (wbuf[WBUF_MASK(wp++)] = c) == '\n' ) {
- wbuf[WBUF_MASK(wp++)] = '\r';
- if (force_flush)
- xcons_force_flush();
- }
- } else if (force_flush) {
- xcons_force_flush();
-
- }
- if (cnsl_evt_reg)
- __xencons_tx_flush();
-
- /* inform start path that we're pretty full */
- return ((wp - wc) >= WBUF_SIZE - 100) ? TRUE : FALSE;
-}
-
-static void
-xc_identify(driver_t *driver, device_t parent)
-{
- device_t child;
- child = BUS_ADD_CHILD(parent, 0, driver_name, 0);
- device_set_driver(child, driver);
- device_set_desc(child, "Xen Console");
-}
-
-static int
-xc_probe(device_t dev)
-{
- struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
-
- sc->xc_unit = device_get_unit(dev);
- return (0);
-}
-
-static int
-xc_attach(device_t dev)
-{
- struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
-
- sc->xc_dev = make_dev(&xc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "xc%r", 0);
- xccons = ttymalloc(NULL);
-
- sc->xc_dev->si_drv1 = (void *)sc;
- sc->xc_dev->si_tty = xccons;
-
- xccons->t_oproc = xcstart;
- xccons->t_param = xcparam;
- xccons->t_stop = xcstop;
- xccons->t_dev = sc->xc_dev;
-
- callout_init(&xc_callout, 0);
-
- /* Ensure that we don't attach before the event channel is able to receive
- * a registration. The XenBus code delays the probe/attach order until
- * this has occurred.
- */
- (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0);
- cnsl_evt_reg = 1;
-
- callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons);
-
- /* register handler to flush console on shutdown */
- if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown,
- NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
- printf("xencons: shutdown event registration failed!\n");
-
- return (0);
-}
-
-/*
- * return 0 for all console input, force flush all output.
- */
-static void
-xc_shutdown(void *arg, int howto)
-{
- xc_mute = 1;
- xcons_force_flush();
-
-}
-
-static void
-xencons_rx(ctrl_msg_t *msg,unsigned long id)
-{
- int i, flags;
- struct tty *tp = xccons;
-
- CN_LOCK(cn_mtx, flags);
- for ( i = 0; i < msg->length; i++ ) {
- if ( xen_console_up )
- (*linesw[tp->t_line]->l_rint)(msg->msg[i], tp);
- else
- rbuf[RBUF_MASK(rp++)] = msg->msg[i];
- }
- CN_UNLOCK(cn_mtx, flags);
- msg->length = 0;
- ctrl_if_send_response(msg);
-}
-
-static void
-__xencons_tx_flush(void)
-{
- int sz, work_done = 0;
- ctrl_msg_t msg;
-
- while ( wc != wp )
- {
- sz = wp - wc;
- if ( sz > sizeof(msg.msg) )
- sz = sizeof(msg.msg);
- if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) )
- sz = WBUF_SIZE - WBUF_MASK(wc);
-
- msg.type = CMSG_CONSOLE;
- msg.subtype = CMSG_CONSOLE_DATA;
- msg.length = sz;
- memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz);
-
- if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 ){
- wc += sz;
- }
- else if (xc_tx_task_queued) {
- /* avoid the extra enqueue check if we know we're already queued */
- break;
- } else if (ctrl_if_enqueue_space_callback(&xencons_tx_flush_task)) {
- xc_tx_task_queued = TRUE;
- break;
- }
-
- work_done = 1;
- }
-
- if ( work_done && xen_console_up )
- ttwakeup(xccons);
-}
-static void
-xencons_tx_flush_task_routine(void * data, int arg)
-{
- int flags;
- CN_LOCK(cn_mtx, flags);
- xc_tx_task_queued = FALSE;
- __xencons_tx_flush();
- CN_UNLOCK(cn_mtx, flags);
-}
-
-int
-xcopen(struct cdev *dev, int flag, int mode, struct thread *td)
-{
- struct xc_softc *sc;
- int unit = XCUNIT(dev);
- struct tty *tp;
- int s, error;
-
- sc = (struct xc_softc *)device_get_softc(
- devclass_get_device(xc_devclass, unit));
- if (sc == NULL)
- return (ENXIO);
-
- tp = dev->si_tty;
- s = spltty();
- if (!ISTTYOPEN(tp)) {
- tp->t_state |= TS_CARR_ON;
- ttychars(tp);
- tp->t_iflag = TTYDEF_IFLAG;
- tp->t_oflag = TTYDEF_OFLAG;
- tp->t_cflag = TTYDEF_CFLAG|CLOCAL;
- tp->t_lflag = TTYDEF_LFLAG;
- tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
- xcparam(tp, &tp->t_termios);
- ttsetwater(tp);
- } else if (tp->t_state & TS_XCLUDE && suser(td)) {
- splx(s);
- return (EBUSY);
- }
- splx(s);
-
- xen_console_up = 1;
-
- error = (*linesw[tp->t_line]->l_open)(dev, tp);
-
- return error;
-}
-
-int
-xcclose(struct cdev *dev, int flag, int mode, struct thread *td)
-{
- struct tty *tp = dev->si_tty;
-
- if (tp == NULL)
- return (0);
- xen_console_up = 0;
-
- spltty();
- (*linesw[tp->t_line]->l_close)(tp, flag);
- tty_close(tp);
- spl0();
- return (0);
-}
-
-
-int
-xcioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread
*td)
-{
- struct tty *tp = dev->si_tty;
- int error;
-
- error = (*linesw[tp->t_line]->l_ioctl)(tp, cmd, data, flag, td);
- if (error != ENOIOCTL)
- return (error);
- error = ttioctl(tp, cmd, data, flag);
- if (error != ENOIOCTL)
- return (error);
- return (ENOTTY);
-}
-
-static inline int
-__xencons_put_char(int ch)
-{
- char _ch = (char)ch;
- if ( (wp - wc) == WBUF_SIZE )
- return 0;
- wbuf[WBUF_MASK(wp++)] = _ch;
- return 1;
-}
-
-
-static void
-xcstart(struct tty *tp)
-{
- int flags;
- int s;
- boolean_t cons_full = FALSE;
-
- s = spltty();
- CN_LOCK(cn_mtx, flags);
- if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
- ttwwakeup(tp);
- CN_UNLOCK(cn_mtx, flags);
- return;
- }
-
- tp->t_state |= TS_BUSY;
- while (tp->t_outq.c_cc != 0 && !cons_full)
- cons_full = xcons_putc(getc(&tp->t_outq));
-
- /* if the console is close to full leave our state as busy */
- if (!cons_full) {
- tp->t_state &= ~TS_BUSY;
- ttwwakeup(tp);
- } else {
- /* let the timeout kick us in a bit */
- xc_start_needed = TRUE;
- }
- CN_UNLOCK(cn_mtx, flags);
- splx(s);
-}
-
-static void
-xcstop(struct tty *tp, int flag)
-{
-
- if (tp->t_state & TS_BUSY) {
- if ((tp->t_state & TS_TTSTOP) == 0) {
- tp->t_state |= TS_FLUSH;
- }
- }
-}
-
-static void
-xc_timeout(void *v)
-{
- struct tty *tp;
- int c;
-
- tp = (struct tty *)v;
-
- while ((c = xccncheckc(NULL)) != -1) {
- if (tp->t_state & TS_ISOPEN) {
- (*linesw[tp->t_line]->l_rint)(c, tp);
- }
- }
-
- if (xc_start_needed) {
- xc_start_needed = FALSE;
- xcstart(tp);
- }
-
- callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, tp);
-}
-
-/*
- * Set line parameters.
- */
-int
-xcparam(struct tty *tp, struct termios *t)
-{
- tp->t_ispeed = t->c_ispeed;
- tp->t_ospeed = t->c_ospeed;
- tp->t_cflag = t->c_cflag;
- return (0);
-}
-
-
-static device_method_t xc_methods[] = {
- DEVMETHOD(device_identify, xc_identify),
- DEVMETHOD(device_probe, xc_probe),
- DEVMETHOD(device_attach, xc_attach),
- {0, 0}
-};
-
-static driver_t xc_driver = {
- driver_name,
- xc_methods,
- sizeof(struct xc_softc),
-};
-
-/*** Forcibly flush console data before dying. ***/
-void
-xcons_force_flush(void)
-{
- ctrl_msg_t msg;
- int sz;
-
- /*
- * We use dangerous control-interface functions that require a quiescent
- * system and no interrupts. Try to ensure this with a global cli().
- */
- cli();
-
- /* Spin until console data is flushed through to the domain controller. */
- while ( (wc != wp) && !ctrl_if_transmitter_empty() )
- {
- /* Interrupts are disabled -- we must manually reap responses. */
- ctrl_if_discard_responses();
-
- if ( (sz = wp - wc) == 0 )
- continue;
- if ( sz > sizeof(msg.msg) )
- sz = sizeof(msg.msg);
- if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) )
- sz = WBUF_SIZE - WBUF_MASK(wc);
-
- msg.type = CMSG_CONSOLE;
- msg.subtype = CMSG_CONSOLE_DATA;
- msg.length = sz;
- memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz);
-
- if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 )
- wc += sz;
- }
-}
-
-DRIVER_MODULE(xc, xenbus, xc_driver, xc_devclass, 0, 0);
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c
--- a/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,412 +0,0 @@
-/******************************************************************************
- * evtchn.c
- *
- * Xenolinux driver for receiving and demuxing event-channel signals.
- *
- * Copyright (c) 2004, K A Fraser
- */
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/uio.h>
-#include <sys/bus.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/selinfo.h>
-#include <sys/poll.h>
-#include <sys/conf.h>
-#include <sys/fcntl.h>
-#include <sys/ioccom.h>
-
-#include <machine/cpufunc.h>
-#include <machine/intr_machdep.h>
-#include <machine/xen-os.h>
-#include <machine/xen_intr.h>
-#include <machine/bus.h>
-#include <sys/rman.h>
-#include <machine/resource.h>
-#include <machine/synch_bitops.h>
-
-#include <machine/hypervisor.h>
-
-
-typedef struct evtchn_sotfc {
-
- struct selinfo ev_rsel;
-} evtchn_softc_t;
-
-
-#ifdef linuxcrap
-/* NB. This must be shared amongst drivers if more things go in /dev/xen */
-static devfs_handle_t xen_dev_dir;
-#endif
-
-/* Only one process may open /dev/xen/evtchn at any time. */
-static unsigned long evtchn_dev_inuse;
-
-/* Notification ring, accessed via /dev/xen/evtchn. */
-
-#define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */
-
-#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
-static uint16_t *ring;
-static unsigned int ring_cons, ring_prod, ring_overflow;
-
-/* Which ports is user-space bound to? */
-static uint32_t bound_ports[32];
-
-/* Unique address for processes to sleep on */
-static void *evtchn_waddr = ˚
-
-static struct mtx lock, upcall_lock;
-
-static d_read_t evtchn_read;
-static d_write_t evtchn_write;
-static d_ioctl_t evtchn_ioctl;
-static d_poll_t evtchn_poll;
-static d_open_t evtchn_open;
-static d_close_t evtchn_close;
-
-
-void
-evtchn_device_upcall(int port)
-{
- mtx_lock(&upcall_lock);
-
- mask_evtchn(port);
- clear_evtchn(port);
-
- if ( ring != NULL ) {
- if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) {
- ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port;
- if ( ring_cons == ring_prod++ ) {
- wakeup(evtchn_waddr);
- }
- }
- else {
- ring_overflow = 1;
- }
- }
-
- mtx_unlock(&upcall_lock);
-}
-
-static void
-__evtchn_reset_buffer_ring(void)
-{
- /* Initialise the ring to empty. Clear errors. */
- ring_cons = ring_prod = ring_overflow = 0;
-}
-
-static int
-evtchn_read(struct cdev *dev, struct uio *uio, int ioflag)
-{
- int rc;
- unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0;
- count = uio->uio_resid;
-
- count &= ~1; /* even number of bytes */
-
- if ( count == 0 )
- {
- rc = 0;
- goto out;
- }
-
- if ( count > PAGE_SIZE )
- count = PAGE_SIZE;
-
- for ( ; ; ) {
- if ( (c = ring_cons) != (p = ring_prod) )
- break;
-
- if ( ring_overflow ) {
- rc = EFBIG;
- goto out;
- }
-
- if (sst != 0) {
- rc = EINTR;
- goto out;
- }
-
- /* PCATCH == check for signals before and after sleeping
- * PWAIT == priority of waiting on resource
- */
- sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10);
- }
-
- /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
- if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) {
- bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t);
- bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t);
- }
- else {
- bytes1 = (p - c) * sizeof(uint16_t);
- bytes2 = 0;
- }
-
- /* Truncate chunks according to caller's maximum byte count. */
- if ( bytes1 > count ) {
- bytes1 = count;
- bytes2 = 0;
- }
- else if ( (bytes1 + bytes2) > count ) {
- bytes2 = count - bytes1;
- }
-
- if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) ||
- ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio)))
- /* keeping this around as its replacement is not equivalent
- * copyout(&ring[0], &buf[bytes1], bytes2)
- */
- {
- rc = EFAULT;
- goto out;
- }
-
- ring_cons += (bytes1 + bytes2) / sizeof(uint16_t);
-
- rc = bytes1 + bytes2;
-
- out:
-
- return rc;
-}
-
-static int
-evtchn_write(struct cdev *dev, struct uio *uio, int ioflag)
-{
- int rc, i, count;
-
- count = uio->uio_resid;
-
- uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
-
-
- if ( kbuf == NULL )
- return ENOMEM;
-
- count &= ~1; /* even number of bytes */
-
- if ( count == 0 ) {
- rc = 0;
- goto out;
- }
-
- if ( count > PAGE_SIZE )
- count = PAGE_SIZE;
-
- if ( uiomove(kbuf, count, uio) != 0 ) {
- rc = EFAULT;
- goto out;
- }
-
- mtx_lock_spin(&lock);
- for ( i = 0; i < (count/2); i++ )
- if ( test_bit(kbuf[i], &bound_ports[0]) )
- unmask_evtchn(kbuf[i]);
- mtx_unlock_spin(&lock);
-
- rc = count;
-
- out:
- free(kbuf, M_DEVBUF);
- return rc;
-}
-
-static int
-evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
- int mode, struct thread *td __unused)
-{
- int rc = 0;
-
- mtx_lock_spin(&lock);
-
- switch ( cmd )
- {
- case EVTCHN_RESET:
- __evtchn_reset_buffer_ring();
- break;
- case EVTCHN_BIND:
- if ( !synch_test_and_set_bit((int)arg, &bound_ports[0]) )
- unmask_evtchn((int)arg);
- else
- rc = EINVAL;
- break;
- case EVTCHN_UNBIND:
- if ( synch_test_and_clear_bit((int)arg, &bound_ports[0]) )
- mask_evtchn((int)arg);
- else
- rc = EINVAL;
- break;
- default:
- rc = ENOSYS;
- break;
- }
-
- mtx_unlock_spin(&lock);
-
- return rc;
-}
-
-static int
-evtchn_poll(struct cdev *dev, int poll_events, struct thread *td)
-{
-
- evtchn_softc_t *sc;
- unsigned int mask = POLLOUT | POLLWRNORM;
-
- sc = dev->si_drv1;
-
- if ( ring_cons != ring_prod )
- mask |= POLLIN | POLLRDNORM;
- else if ( ring_overflow )
- mask = POLLERR;
- else
- selrecord(td, &sc->ev_rsel);
-
-
- return mask;
-}
-
-
-static int
-evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td)
-{
- uint16_t *_ring;
-
- if (flag & O_NONBLOCK)
- return EBUSY;
-
- if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) )
- return EBUSY;
-
- if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL )
- return ENOMEM;
-
- mtx_lock_spin(&lock);
- ring = _ring;
- __evtchn_reset_buffer_ring();
- mtx_unlock_spin(&lock);
-
-
- return 0;
-}
-
-static int
-evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused)
-{
- int i;
-
- mtx_lock_spin(&lock);
- if (ring != NULL) {
- free(ring, M_DEVBUF);
- ring = NULL;
- }
- for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
- if ( synch_test_and_clear_bit(i, &bound_ports[0]) )
- mask_evtchn(i);
- mtx_unlock_spin(&lock);
-
- evtchn_dev_inuse = 0;
-
- return 0;
-}
-
-
-
-/* XXX wild assed guess as to a safe major number */
-#define EVTCHN_MAJOR 140
-
-static struct cdevsw evtchn_devsw = {
- d_version: D_VERSION_00,
- d_open: evtchn_open,
- d_close: evtchn_close,
- d_read: evtchn_read,
- d_write: evtchn_write,
- d_ioctl: evtchn_ioctl,
- d_poll: evtchn_poll,
- d_name: "evtchn",
- d_maj: EVTCHN_MAJOR,
- d_flags: 0,
-};
-
-
-/* XXX - if this device is ever supposed to support use by more than one
process
- * this global static will have to go away
- */
-static struct cdev *evtchn_dev;
-
-
-
-static int
-evtchn_init(void *dummy __unused)
-{
- /* XXX I believe we don't need these leaving them here for now until we
- * have some semblance of it working
- */
-#if 0
- devfs_handle_t symlink_handle;
- int err, pos;
- char link_dest[64];
-#endif
- mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF);
-
- /* (DEVFS) create '/dev/misc/evtchn'. */
- evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600,
"xen/evtchn");
-
- mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS);
-
- evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK);
- bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t));
-
- /* XXX I don't think we need any of this rubbish */
-#if 0
- if ( err != 0 )
- {
- printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
- return err;
- }
-
- /* (DEVFS) create directory '/dev/xen'. */
- xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
-
- /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
- pos = devfs_generate_path(evtchn_miscdev.devfs_handle,
- &link_dest[3],
- sizeof(link_dest) - 3);
- if ( pos >= 0 )
- strncpy(&link_dest[pos], "../", 3);
- /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
- (void)devfs_mk_symlink(xen_dev_dir,
- "evtchn",
- DEVFS_FL_DEFAULT,
- &link_dest[pos],
- &symlink_handle,
- NULL);
-
- /* (DEVFS) automatically destroy the symlink with its destination. */
- devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
-#endif
- printk("Event-channel device installed.\n");
-
- return 0;
-}
-
-
-SYSINIT(evtchn_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_init, NULL);
-
-
-#if 0
-
-static void cleanup_module(void)
-{
- destroy_dev(evtchn_dev);
-;
-}
-
-module_init(init_module);
-module_exit(cleanup_module);
-#endif
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c
--- a/freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,1109 +0,0 @@
-/*-
- * Copyright (c) 1990 William Jolitz.
- * Copyright (c) 1991 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)npx.c 7.2 (Berkeley) 5/12/91
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/i386/isa/npx.c,v 1.144 2003/11/03 21:53:38 jhb Exp
$");
-
-#include "opt_cpu.h"
-#include "opt_debug_npx.h"
-#include "opt_isa.h"
-#include "opt_npx.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/mutex.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/smp.h>
-#include <sys/sysctl.h>
-#include <machine/bus.h>
-#include <sys/rman.h>
-#ifdef NPX_DEBUG
-#include <sys/syslog.h>
-#endif
-#include <sys/signalvar.h>
-#include <sys/user.h>
-
-#include <machine/asmacros.h>
-#include <machine/cputypes.h>
-#include <machine/frame.h>
-#include <machine/md_var.h>
-#include <machine/pcb.h>
-#include <machine/psl.h>
-#include <machine/clock.h>
-#include <machine/resource.h>
-#include <machine/specialreg.h>
-#include <machine/segments.h>
-#include <machine/ucontext.h>
-
-#include <machine/multicall.h>
-
-#include <i386/isa/icu.h>
-#ifdef PC98
-#include <pc98/pc98/pc98.h>
-#else
-#include <i386/isa/isa.h>
-#endif
-#include <machine/intr_machdep.h>
-#ifdef DEV_ISA
-#include <isa/isavar.h>
-#endif
-
-#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
-#define CPU_ENABLE_SSE
-#endif
-#if defined(CPU_DISABLE_SSE)
-#undef CPU_ENABLE_SSE
-#endif
-
-/*
- * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
- */
-
-/* Configuration flags. */
-#define NPX_DISABLE_I586_OPTIMIZED_BCOPY (1 << 0)
-#define NPX_DISABLE_I586_OPTIMIZED_BZERO (1 << 1)
-#define NPX_DISABLE_I586_OPTIMIZED_COPYIO (1 << 2)
-
-#if defined(__GNUC__) && !defined(lint)
-
-#define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr)))
-#define fnclex() __asm("fnclex")
-#define fninit() __asm("fninit")
-#define fnsave(addr) __asm __volatile("fnsave %0" : "=m"
(*(addr)))
-#define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m"
(*(addr)))
-#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m"
(*(addr)))
-#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1);
fnop")
-#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
-#ifdef CPU_ENABLE_SSE
-#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
-#define fxsave(addr) __asm __volatile("fxsave %0" : "=m"
(*(addr)))
-#endif
-#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw
%%ax" \
- : : "n" (CR0_TS) : "ax")
-#define stop_emulating() __asm("clts")
-
-#else /* not __GNUC__ */
-
-void fldcw(caddr_t addr);
-void fnclex(void);
-void fninit(void);
-void fnsave(caddr_t addr);
-void fnstcw(caddr_t addr);
-void fnstsw(caddr_t addr);
-void fp_divide_by_0(void);
-void frstor(caddr_t addr);
-#ifdef CPU_ENABLE_SSE
-void fxsave(caddr_t addr);
-void fxrstor(caddr_t addr);
-#endif
-void start_emulating(void);
-void stop_emulating(void);
-
-#endif /* __GNUC__ */
-
-#ifdef CPU_ENABLE_SSE
-#define GET_FPU_CW(thread) \
- (cpu_fxsr ? \
- (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_cw : \
- (thread)->td_pcb->pcb_save.sv_87.sv_env.en_cw)
-#define GET_FPU_SW(thread) \
- (cpu_fxsr ? \
- (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_sw : \
- (thread)->td_pcb->pcb_save.sv_87.sv_env.en_sw)
-#else /* CPU_ENABLE_SSE */
-#define GET_FPU_CW(thread) \
- (thread->td_pcb->pcb_save.sv_87.sv_env.en_cw)
-#define GET_FPU_SW(thread) \
- (thread->td_pcb->pcb_save.sv_87.sv_env.en_sw)
-#endif /* CPU_ENABLE_SSE */
-
-typedef u_char bool_t;
-
-static void fpusave(union savefpu *);
-static void fpurstor(union savefpu *);
-static int npx_attach(device_t dev);
-static void npx_identify(driver_t *driver, device_t parent);
-#if 0
-static void npx_intr(void *);
-#endif
-static int npx_probe(device_t dev);
-#ifdef I586_CPU_XXX
-static long timezero(const char *funcname,
- void (*func)(void *buf, size_t len));
-#endif /* I586_CPU */
-
-int hw_float; /* XXX currently just alias for npx_exists */
-
-SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint,
- CTLFLAG_RD, &hw_float, 0,
- "Floatingpoint instructions executed in hardware");
-#if 0
-static volatile u_int npx_intrs_while_probing;
-#endif
-static union savefpu npx_cleanstate;
-static bool_t npx_cleanstate_ready;
-static bool_t npx_ex16;
-static bool_t npx_exists;
-static bool_t npx_irq13;
-
-alias_for_inthand_t probetrap;
-#if 0
-__asm(" \n\
- .text \n\
- .p2align 2,0x90 \n\
- .type " __XSTRING(CNAME(probetrap)) ",@function \n\
-" __XSTRING(CNAME(probetrap)) ": \n\
- ss \n\
- incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\
- fnclex \n\
- iret \n\
-");
-#endif
-/*
- * Identify routine. Create a connection point on our parent for probing.
- */
-static void
-npx_identify(driver, parent)
- driver_t *driver;
- device_t parent;
-{
- device_t child;
-
- child = BUS_ADD_CHILD(parent, 0, "npx", 0);
- if (child == NULL)
- panic("npx_identify");
-}
-#if 0
-/*
- * Do minimal handling of npx interrupts to convert them to traps.
- */
-static void
-npx_intr(dummy)
- void *dummy;
-{
- struct thread *td;
-
- npx_intrs_while_probing++;
-
- /*
- * The BUSY# latch must be cleared in all cases so that the next
- * unmasked npx exception causes an interrupt.
- */
-#ifdef PC98
- outb(0xf8, 0);
-#else
- outb(0xf0, 0);
-#endif
-
- /*
- * fpcurthread is normally non-null here. In that case, schedule an
- * AST to finish the exception handling in the correct context
- * (this interrupt may occur after the thread has entered the
- * kernel via a syscall or an interrupt). Otherwise, the npx
- * state of the thread that caused this interrupt must have been
- * pushed to the thread's pcb, and clearing of the busy latch
- * above has finished the (essentially null) handling of this
- * interrupt. Control will eventually return to the instruction
- * that caused it and it will repeat. We will eventually (usually
- * soon) win the race to handle the interrupt properly.
- */
- td = PCPU_GET(fpcurthread);
- if (td != NULL) {
- td->td_pcb->pcb_flags |= PCB_NPXTRAP;
- mtx_lock_spin(&sched_lock);
- td->td_flags |= TDF_ASTPENDING;
- mtx_unlock_spin(&sched_lock);
- }
-}
-#endif
-
-static int
-npx_probe(device_t dev)
-{
-
- return 1;
-}
-
-#if 0
-/*
- * Probe routine. Initialize cr0 to give correct behaviour for [f]wait
- * whether the device exists or not (XXX should be elsewhere). Set flags
- * to tell npxattach() what to do. Modify device struct if npx doesn't
- * need to use interrupts. Return 0 if device exists.
- */
-static int
-npx_probe(device_t dev)
-{
- struct gate_descriptor save_idt_npxtrap;
- struct resource *ioport_res, *irq_res;
- void *irq_cookie;
- int ioport_rid, irq_num, irq_rid;
- u_short control;
- u_short status;
-
- save_idt_npxtrap = idt[IDT_MF];
- setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- ioport_rid = 0;
- ioport_res = bus_alloc_resource(dev, SYS_RES_IOPORT, &ioport_rid,
- IO_NPX, IO_NPX, IO_NPXSIZE, RF_ACTIVE);
- if (ioport_res == NULL)
- panic("npx: can't get ports");
-#ifdef PC98
- if (resource_int_value("npx", 0, "irq", &irq_num) != 0)
- irq_num = 8;
-#else
- if (resource_int_value("npx", 0, "irq", &irq_num) != 0)
- irq_num = 13;
-#endif
- irq_rid = 0;
- irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &irq_rid, irq_num,
- irq_num, 1, RF_ACTIVE);
- if (irq_res == NULL)
- panic("npx: can't get IRQ");
- if (bus_setup_intr(dev, irq_res, INTR_TYPE_MISC | INTR_FAST, npx_intr,
- NULL, &irq_cookie) != 0)
- panic("npx: can't create intr");
-
- /*
- * Partially reset the coprocessor, if any. Some BIOS's don't reset
- * it after a warm boot.
- */
-#ifdef PC98
- outb(0xf8,0);
-#else
- outb(0xf1, 0); /* full reset on some systems, NOP on others */
- outb(0xf0, 0); /* clear BUSY# latch */
-#endif
- /*
- * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
- * instructions. We must set the CR0_MP bit and use the CR0_TS
- * bit to control the trap, because setting the CR0_EM bit does
- * not cause WAIT instructions to trap. It's important to trap
- * WAIT instructions - otherwise the "wait" variants of no-wait
- * control instructions would degenerate to the "no-wait" variants
- * after FP context switches but work correctly otherwise. It's
- * particularly important to trap WAITs when there is no NPX -
- * otherwise the "wait" variants would always degenerate.
- *
- * Try setting CR0_NE to get correct error reporting on 486DX's.
- * Setting it should fail or do nothing on lesser processors.
- */
- load_cr0(rcr0() | CR0_MP | CR0_NE);
- /*
- * But don't trap while we're probing.
- */
- stop_emulating();
- /*
- * Finish resetting the coprocessor, if any. If there is an error
- * pending, then we may get a bogus IRQ13, but npx_intr() will handle
- * it OK. Bogus halts have never been observed, but we enabled
- * IRQ13 and cleared the BUSY# latch early to handle them anyway.
- */
- fninit();
-
- device_set_desc(dev, "math processor");
-
- /*
- * Don't use fwait here because it might hang.
- * Don't use fnop here because it usually hangs if there is no FPU.
- */
- DELAY(1000); /* wait for any IRQ13 */
-#ifdef DIAGNOSTIC
- if (npx_intrs_while_probing != 0)
- printf("fninit caused %u bogus npx interrupt(s)\n",
- npx_intrs_while_probing);
- if (npx_traps_while_probing != 0)
- printf("fninit caused %u bogus npx trap(s)\n",
- npx_traps_while_probing);
-#endif
- /*
- * Check for a status of mostly zero.
- */
- status = 0x5a5a;
- fnstsw(&status);
- if ((status & 0xb8ff) == 0) {
- /*
- * Good, now check for a proper control word.
- */
- control = 0x5a5a;
- fnstcw(&control);
- if ((control & 0x1f3f) == 0x033f) {
- hw_float = npx_exists = 1;
- /*
- * We have an npx, now divide by 0 to see if exception
- * 16 works.
- */
- control &= ~(1 << 2); /* enable divide by 0 trap */
- fldcw(&control);
-#ifdef FPU_ERROR_BROKEN
- /*
- * FPU error signal doesn't work on some CPU
- * accelerator board.
- */
- npx_ex16 = 1;
- return (0);
-#endif
- npx_traps_while_probing = npx_intrs_while_probing = 0;
- fp_divide_by_0();
- if (npx_traps_while_probing != 0) {
- /*
- * Good, exception 16 works.
- */
- npx_ex16 = 1;
- goto no_irq13;
- }
- if (npx_intrs_while_probing != 0) {
- /*
- * Bad, we are stuck with IRQ13.
- */
- npx_irq13 = 1;
- idt[IDT_MF] = save_idt_npxtrap;
-#ifdef SMP
- if (mp_ncpus > 1)
- panic("npx0 cannot use IRQ 13 on an SMP
system");
-#endif
- return (0);
- }
- /*
- * Worse, even IRQ13 is broken. Use emulator.
- */
- }
- }
- /*
- * Probe failed, but we want to get to npxattach to initialize the
- * emulator and say that it has been installed. XXX handle devices
- * that aren't really devices better.
- */
-#ifdef SMP
- if (mp_ncpus > 1)
- panic("npx0 cannot be emulated on an SMP system");
-#endif
- /* FALLTHROUGH */
-no_irq13:
- idt[IDT_MF] = save_idt_npxtrap;
- bus_teardown_intr(dev, irq_res, irq_cookie);
-
- /*
- * XXX hack around brokenness of bus_teardown_intr(). If we left the
- * irq active then we would get it instead of exception 16.
- */
- {
- struct intsrc *isrc;
-
- isrc = intr_lookup_source(irq_num);
- isrc->is_pic->pic_disable_source(isrc);
- }
-
- bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res);
- bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res);
- return (0);
-}
-#endif
-
-/*
- * Attach routine - announce which it is, and wire into system
- */
-static int
-npx_attach(device_t dev)
-{
- int flags;
- register_t s;
-
- if (resource_int_value("npx", 0, "flags", &flags) != 0)
- flags = 0;
-
- if (flags)
- device_printf(dev, "flags 0x%x ", flags);
- if (npx_irq13) {
- device_printf(dev, "using IRQ 13 interface\n");
- } else {
- if (npx_ex16)
- device_printf(dev, "INT 16 interface\n");
- else
- device_printf(dev, "WARNING: no FPU!\n");
- }
- npxinit(__INITIAL_NPXCW__);
-
- if (npx_cleanstate_ready == 0) {
- s = intr_disable();
- stop_emulating();
- fpusave(&npx_cleanstate);
- start_emulating();
- npx_cleanstate_ready = 1;
- intr_restore(s);
- }
-#ifdef I586_CPU_XXX
- if (cpu_class == CPUCLASS_586 && npx_ex16 && npx_exists &&
- timezero("i586_bzero()", i586_bzero) <
- timezero("bzero()", bzero) * 4 / 5) {
- if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY))
- bcopy_vector = i586_bcopy;
- if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BZERO))
- bzero_vector = i586_bzero;
- if (!(flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) {
- copyin_vector = i586_copyin;
- copyout_vector = i586_copyout;
- }
- }
-#endif
-
- return (0); /* XXX unused */
-}
-
-/*
- * Initialize floating point unit.
- */
-void
-npxinit(control)
- u_short control;
-{
- static union savefpu dummy;
- register_t savecrit;
-
- if (!npx_exists)
- return;
- /*
- * fninit has the same h/w bugs as fnsave. Use the detoxified
- * fnsave to throw away any junk in the fpu. npxsave() initializes
- * the fpu and sets fpcurthread = NULL as important side effects.
- */
- savecrit = intr_disable();
- npxsave(&dummy);
- stop_emulating();
-#ifdef CPU_ENABLE_SSE
- /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */
- if (cpu_fxsr)
- fninit();
-#endif
- fldcw(&control);
- start_emulating();
- intr_restore(savecrit);
-}
-
-/*
- * Free coprocessor (if we have it).
- */
-void
-npxexit(td)
- struct thread *td;
-{
- register_t savecrit;
-
- savecrit = intr_disable();
- if (curthread == PCPU_GET(fpcurthread))
- npxsave(&PCPU_GET(curpcb)->pcb_save);
- intr_restore(savecrit);
-#ifdef NPX_DEBUG
- if (npx_exists) {
- u_int masked_exceptions;
-
- masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
- /*
- * Log exceptions that would have trapped with the old
- * control word (overflow, divide by 0, and invalid operand).
- */
- if (masked_exceptions & 0x0d)
- log(LOG_ERR,
- "pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
- td->td_proc->p_pid, td->td_proc->p_comm,
- masked_exceptions);
- }
-#endif
-}
-
-int
-npxformat()
-{
-
- if (!npx_exists)
- return (_MC_FPFMT_NODEV);
-#ifdef CPU_ENABLE_SSE
- if (cpu_fxsr)
- return (_MC_FPFMT_XMM);
-#endif
- return (_MC_FPFMT_387);
-}
-
-/*
- * The following mechanism is used to ensure that the FPE_... value
- * that is passed as a trapcode to the signal handler of the user
- * process does not have more than one bit set.
- *
- * Multiple bits may be set if the user process modifies the control
- * word while a status word bit is already set. While this is a sign
- * of bad coding, we have no choise than to narrow them down to one
- * bit, since we must not send a trapcode that is not exactly one of
- * the FPE_ macros.
- *
- * The mechanism has a static table with 127 entries. Each combination
- * of the 7 FPU status word exception bits directly translates to a
- * position in this table, where a single FPE_... value is stored.
- * This FPE_... value stored there is considered the "most important"
- * of the exception bits and will be sent as the signal code. The
- * precedence of the bits is based upon Intel Document "Numerical
- * Applications", Chapter "Special Computational Situations".
- *
- * The macro to choose one of these values does these steps: 1) Throw
- * away status word bits that cannot be masked. 2) Throw away the bits
- * currently masked in the control word, assuming the user isn't
- * interested in them anymore. 3) Reinsert status word bit 7 (stack
- * fault) if it is set, which cannot be masked but must be presered.
- * 4) Use the remaining bits to point into the trapcode table.
- *
- * The 6 maskable bits in order of their preference, as stated in the
- * above referenced Intel manual:
- * 1 Invalid operation (FP_X_INV)
- * 1a Stack underflow
- * 1b Stack overflow
- * 1c Operand of unsupported format
- * 1d SNaN operand.
- * 2 QNaN operand (not an exception, irrelavant here)
- * 3 Any other invalid-operation not mentioned above or zero divide
- * (FP_X_INV, FP_X_DZ)
- * 4 Denormal operand (FP_X_DNML)
- * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL)
- * 6 Inexact result (FP_X_IMP)
- */
-static char fpetable[128] = {
- 0,
- FPE_FLTINV, /* 1 - INV */
- FPE_FLTUND, /* 2 - DNML */
- FPE_FLTINV, /* 3 - INV | DNML */
- FPE_FLTDIV, /* 4 - DZ */
- FPE_FLTINV, /* 5 - INV | DZ */
- FPE_FLTDIV, /* 6 - DNML | DZ */
- FPE_FLTINV, /* 7 - INV | DNML | DZ */
- FPE_FLTOVF, /* 8 - OFL */
- FPE_FLTINV, /* 9 - INV | OFL */
- FPE_FLTUND, /* A - DNML | OFL */
- FPE_FLTINV, /* B - INV | DNML | OFL */
- FPE_FLTDIV, /* C - DZ | OFL */
- FPE_FLTINV, /* D - INV | DZ | OFL */
- FPE_FLTDIV, /* E - DNML | DZ | OFL */
- FPE_FLTINV, /* F - INV | DNML | DZ | OFL */
- FPE_FLTUND, /* 10 - UFL */
- FPE_FLTINV, /* 11 - INV | UFL */
- FPE_FLTUND, /* 12 - DNML | UFL */
- FPE_FLTINV, /* 13 - INV | DNML | UFL */
- FPE_FLTDIV, /* 14 - DZ | UFL */
- FPE_FLTINV, /* 15 - INV | DZ | UFL */
- FPE_FLTDIV, /* 16 - DNML | DZ | UFL */
- FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */
- FPE_FLTOVF, /* 18 - OFL | UFL */
- FPE_FLTINV, /* 19 - INV | OFL | UFL */
- FPE_FLTUND, /* 1A - DNML | OFL | UFL */
- FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */
- FPE_FLTDIV, /* 1C - DZ | OFL | UFL */
- FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */
- FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */
- FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */
- FPE_FLTRES, /* 20 - IMP */
- FPE_FLTINV, /* 21 - INV | IMP */
- FPE_FLTUND, /* 22 - DNML | IMP */
- FPE_FLTINV, /* 23 - INV | DNML | IMP */
- FPE_FLTDIV, /* 24 - DZ | IMP */
- FPE_FLTINV, /* 25 - INV | DZ | IMP */
- FPE_FLTDIV, /* 26 - DNML | DZ | IMP */
- FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */
- FPE_FLTOVF, /* 28 - OFL | IMP */
- FPE_FLTINV, /* 29 - INV | OFL | IMP */
- FPE_FLTUND, /* 2A - DNML | OFL | IMP */
- FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */
- FPE_FLTDIV, /* 2C - DZ | OFL | IMP */
- FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */
- FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */
- FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */
- FPE_FLTUND, /* 30 - UFL | IMP */
- FPE_FLTINV, /* 31 - INV | UFL | IMP */
- FPE_FLTUND, /* 32 - DNML | UFL | IMP */
- FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */
- FPE_FLTDIV, /* 34 - DZ | UFL | IMP */
- FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */
- FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */
- FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */
- FPE_FLTOVF, /* 38 - OFL | UFL | IMP */
- FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */
- FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */
- FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */
- FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */
- FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */
- FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */
- FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */
- FPE_FLTSUB, /* 40 - STK */
- FPE_FLTSUB, /* 41 - INV | STK */
- FPE_FLTUND, /* 42 - DNML | STK */
- FPE_FLTSUB, /* 43 - INV | DNML | STK */
- FPE_FLTDIV, /* 44 - DZ | STK */
- FPE_FLTSUB, /* 45 - INV | DZ | STK */
- FPE_FLTDIV, /* 46 - DNML | DZ | STK */
- FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */
- FPE_FLTOVF, /* 48 - OFL | STK */
- FPE_FLTSUB, /* 49 - INV | OFL | STK */
- FPE_FLTUND, /* 4A - DNML | OFL | STK */
- FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */
- FPE_FLTDIV, /* 4C - DZ | OFL | STK */
- FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */
- FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */
- FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */
- FPE_FLTUND, /* 50 - UFL | STK */
- FPE_FLTSUB, /* 51 - INV | UFL | STK */
- FPE_FLTUND, /* 52 - DNML | UFL | STK */
- FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */
- FPE_FLTDIV, /* 54 - DZ | UFL | STK */
- FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */
- FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */
- FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */
- FPE_FLTOVF, /* 58 - OFL | UFL | STK */
- FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */
- FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */
- FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */
- FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */
- FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */
- FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */
- FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */
- FPE_FLTRES, /* 60 - IMP | STK */
- FPE_FLTSUB, /* 61 - INV | IMP | STK */
- FPE_FLTUND, /* 62 - DNML | IMP | STK */
- FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */
- FPE_FLTDIV, /* 64 - DZ | IMP | STK */
- FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */
- FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */
- FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */
- FPE_FLTOVF, /* 68 - OFL | IMP | STK */
- FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */
- FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */
- FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */
- FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */
- FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */
- FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */
- FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */
- FPE_FLTUND, /* 70 - UFL | IMP | STK */
- FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */
- FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */
- FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */
- FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */
- FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */
- FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */
- FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */
- FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */
- FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */
- FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */
- FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */
- FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */
- FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */
- FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */
- FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
-};
-
-/*
- * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
- *
- * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now
- * depend on longjmp() restoring a usable state. Restoring the state
- * or examining it might fail if we didn't clear exceptions.
- *
- * The error code chosen will be one of the FPE_... macros. It will be
- * sent as the second argument to old BSD-style signal handlers and as
- * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers.
- *
- * XXX the FP state is not preserved across signal handlers. So signal
- * handlers cannot afford to do FP unless they preserve the state or
- * longjmp() out. Both preserving the state and longjmp()ing may be
- * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable
- * solution for signals other than SIGFPE.
- */
-int
-npxtrap()
-{
- register_t savecrit;
- u_short control, status;
-
- if (!npx_exists) {
- printf("npxtrap: fpcurthread = %p, curthread = %p, npx_exists =
%d\n",
- PCPU_GET(fpcurthread), curthread, npx_exists);
- panic("npxtrap from nowhere");
- }
- savecrit = intr_disable();
-
- /*
- * Interrupt handling (for another interrupt) may have pushed the
- * state to memory. Fetch the relevant parts of the state from
- * wherever they are.
- */
- if (PCPU_GET(fpcurthread) != curthread) {
- control = GET_FPU_CW(curthread);
- status = GET_FPU_SW(curthread);
- } else {
- fnstcw(&control);
- fnstsw(&status);
- }
-
- if (PCPU_GET(fpcurthread) == curthread)
- fnclex();
- intr_restore(savecrit);
- return (fpetable[status & ((~control & 0x3f) | 0x40)]);
-}
-
-/*
- * Implement device not available (DNA) exception
- *
- * It would be better to switch FP context here (if curthread != fpcurthread)
- * and not necessarily for every context switch, but it is too hard to
- * access foreign pcb's.
- */
-
-static int err_count = 0;
-
-int
-npxdna()
-{
- struct pcb *pcb;
- register_t s;
- u_short control;
-
- if (!npx_exists)
- return (0);
- if (PCPU_GET(fpcurthread) == curthread) {
- printf("npxdna: fpcurthread == curthread %d times\n",
- ++err_count);
- stop_emulating();
- return (1);
- }
- if (PCPU_GET(fpcurthread) != NULL) {
- printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
- PCPU_GET(fpcurthread),
- PCPU_GET(fpcurthread)->td_proc->p_pid,
- curthread, curthread->td_proc->p_pid);
- panic("npxdna");
- }
- s = intr_disable();
- stop_emulating();
- /*
- * Record new context early in case frstor causes an IRQ13.
- */
- PCPU_SET(fpcurthread, curthread);
- pcb = PCPU_GET(curpcb);
-
- if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
- /*
- * This is the first time this thread has used the FPU or
- * the PCB doesn't contain a clean FPU state. Explicitly
- * initialize the FPU and load the default control word.
- */
- fninit();
- control = __INITIAL_NPXCW__;
- fldcw(&control);
- pcb->pcb_flags |= PCB_NPXINITDONE;
- } else {
- /*
- * The following frstor may cause an IRQ13 when the state
- * being restored has a pending error. The error will
- * appear to have been triggered by the current (npx) user
- * instruction even when that instruction is a no-wait
- * instruction that should not trigger an error (e.g.,
- * fnclex). On at least one 486 system all of the no-wait
- * instructions are broken the same as frstor, so our
- * treatment does not amplify the breakage. On at least
- * one 386/Cyrix 387 system, fnclex works correctly while
- * frstor and fnsave are broken, so our treatment breaks
- * fnclex if it is the first FPU instruction after a context
- * switch.
- */
- fpurstor(&pcb->pcb_save);
- }
- intr_restore(s);
-
- return (1);
-}
-
-/*
- * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx
- * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by
- * no-wait npx instructions. See the Intel application note AP-578 for
- * details. This doesn't cause any additional complications here. IRQ13's
- * are inherently asynchronous unless the CPU is frozen to deliver them --
- * one that started in userland may be delivered many instructions later,
- * after the process has entered the kernel. It may even be delivered after
- * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in
- * the same way as a very-late-arriving non-spurious IRQ13 from user mode:
- * it is normally ignored at first because we set fpcurthread to NULL; it is
- * normally retriggered in npxdna() after return to user mode.
- *
- * npxsave() must be called with interrupts disabled, so that it clears
- * fpcurthread atomically with saving the state. We require callers to do the
- * disabling, since most callers need to disable interrupts anyway to call
- * npxsave() atomically with checking fpcurthread.
- *
- * A previous version of npxsave() went to great lengths to excecute fnsave
- * with interrupts enabled in case executing it froze the CPU. This case
- * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply
- * spurious freezes.
- */
-void
-npxsave(addr)
- union savefpu *addr;
-{
-
- stop_emulating();
- fpusave(addr);
-
- start_emulating();
- PCPU_SET(fpcurthread, NULL);
- queue_multicall0(__HYPERVISOR_fpu_taskswitch);
-}
-
-/*
- * This should be called with interrupts disabled and only when the owning
- * FPU thread is non-null.
- */
-void
-npxdrop()
-{
- struct thread *td;
-
- td = PCPU_GET(fpcurthread);
- PCPU_SET(fpcurthread, NULL);
- td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
- start_emulating();
-}
-
-/*
- * Get the state of the FPU without dropping ownership (if possible).
- * It returns the FPU ownership status.
- */
-int
-npxgetregs(td, addr)
- struct thread *td;
- union savefpu *addr;
-{
- register_t s;
-
- if (!npx_exists)
- return (_MC_FPOWNED_NONE);
-
- if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
- if (npx_cleanstate_ready)
- bcopy(&npx_cleanstate, addr, sizeof(npx_cleanstate));
- else
- bzero(addr, sizeof(*addr));
- return (_MC_FPOWNED_NONE);
- }
- s = intr_disable();
- if (td == PCPU_GET(fpcurthread)) {
- fpusave(addr);
-#ifdef CPU_ENABLE_SSE
- if (!cpu_fxsr)
-#endif
- /*
- * fnsave initializes the FPU and destroys whatever
- * context it contains. Make sure the FPU owner
- * starts with a clean state next time.
- */
- npxdrop();
- intr_restore(s);
- return (_MC_FPOWNED_FPU);
- } else {
- intr_restore(s);
- bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr));
- return (_MC_FPOWNED_PCB);
- }
-}
-
-/*
- * Set the state of the FPU.
- */
-void
-npxsetregs(td, addr)
- struct thread *td;
- union savefpu *addr;
-{
- register_t s;
-
- if (!npx_exists)
- return;
-
- s = intr_disable();
- if (td == PCPU_GET(fpcurthread)) {
- fpurstor(addr);
- intr_restore(s);
- } else {
- intr_restore(s);
- bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr));
- }
- curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE;
-}
-
-static void
-fpusave(addr)
- union savefpu *addr;
-{
-
-#ifdef CPU_ENABLE_SSE
- if (cpu_fxsr)
- fxsave(addr);
- else
-#endif
- fnsave(addr);
-}
-
-static void
-fpurstor(addr)
- union savefpu *addr;
-{
-
-#ifdef CPU_ENABLE_SSE
- if (cpu_fxsr)
- fxrstor(addr);
- else
-#endif
- frstor(addr);
-}
-
-#ifdef I586_CPU_XXX
-static long
-timezero(funcname, func)
- const char *funcname;
- void (*func)(void *buf, size_t len);
-
-{
- void *buf;
-#define BUFSIZE 1048576
- long usec;
- struct timeval finish, start;
-
- buf = malloc(BUFSIZE, M_TEMP, M_NOWAIT);
- if (buf == NULL)
- return (BUFSIZE);
- microtime(&start);
- (*func)(buf, BUFSIZE);
- microtime(&finish);
- usec = 1000000 * (finish.tv_sec - start.tv_sec) +
- finish.tv_usec - start.tv_usec;
- if (usec <= 0)
- usec = 1;
- if (bootverbose)
- printf("%s bandwidth = %u kBps\n", funcname,
- (u_int32_t)(((BUFSIZE >> 10) * 1000000) / usec));
- free(buf, M_TEMP);
- return (usec);
-}
-#endif /* I586_CPU */
-
-static device_method_t npx_methods[] = {
- /* Device interface */
- DEVMETHOD(device_identify, npx_identify),
- DEVMETHOD(device_probe, npx_probe),
- DEVMETHOD(device_attach, npx_attach),
- DEVMETHOD(device_detach, bus_generic_detach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- { 0, 0 }
-};
-
-static driver_t npx_driver = {
- "npx",
- npx_methods,
- 1, /* no softc */
-};
-
-static devclass_t npx_devclass;
-DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
-
-#ifdef DEV_ISA
-/*
- * We prefer to attach to the root nexus so that the usual case (exception 16)
- * doesn't describe the processor as being `on isa'.
- */
-DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
-
-/*
- * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
- */
-static struct isa_pnp_id npxisa_ids[] = {
- { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
- { 0 }
-};
-
-static int
-npxisa_probe(device_t dev)
-{
- int result;
- if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids))
<= 0) {
- device_quiet(dev);
- }
- return(result);
-}
-
-static int
-npxisa_attach(device_t dev)
-{
- return (0);
-}
-
-static device_method_t npxisa_methods[] = {
- /* Device interface */
- DEVMETHOD(device_probe, npxisa_probe),
- DEVMETHOD(device_attach, npxisa_attach),
- DEVMETHOD(device_detach, bus_generic_detach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
-
- { 0, 0 }
-};
-
-static driver_t npxisa_driver = {
- "npxisa",
- npxisa_methods,
- 1, /* no softc */
-};
-
-static devclass_t npxisa_devclass;
-
-DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
-#ifndef PC98
-DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
-#endif
-#endif /* DEV_ISA */
diff -r 64cd054aa143 -r 0255f48b757f
freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c
--- a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c Sun Dec
4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,1434 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Kip Macy
- * All rights reserved.
- *
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "opt_nfsroot.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/sockio.h>
-#include <sys/mbuf.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/socket.h>
-#include <sys/queue.h>
-
-#include <net/if.h>
-#include <net/if_arp.h>
-#include <net/ethernet.h>
-#include <net/if_dl.h>
-#include <net/if_media.h>
-
-#include <net/bpf.h>
-
-#include <net/if_types.h>
-#include <net/if_vlan_var.h>
-
-#include <netinet/in_systm.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-
-#include <machine/clock.h> /* for DELAY */
-#include <machine/bus_memio.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-#include <machine/frame.h>
-
-
-#include <sys/bus.h>
-#include <sys/rman.h>
-
-#include <machine/intr_machdep.h>
-
-#include <machine/xen-os.h>
-#include <machine/hypervisor.h>
-#include <machine/hypervisor-ifs.h>
-#include <machine/xen_intr.h>
-#include <machine/evtchn.h>
-#include <machine/ctrl_if.h>
-
-struct xn_softc;
-static void xn_txeof(struct xn_softc *);
-static void xn_rxeof(struct xn_softc *);
-static void xn_alloc_rx_buffers(struct xn_softc *);
-
-static void xn_tick_locked(struct xn_softc *);
-static void xn_tick(void *);
-
-static void xn_intr(void *);
-static void xn_start_locked(struct ifnet *);
-static void xn_start(struct ifnet *);
-static int xn_ioctl(struct ifnet *, u_long, caddr_t);
-static void xn_ifinit_locked(struct xn_softc *);
-static void xn_ifinit(void *);
-static void xn_stop(struct xn_softc *);
-#ifdef notyet
-static void xn_watchdog(struct ifnet *);
-#endif
-/* Xenolinux helper functions */
-static void network_connect(struct xn_softc *, netif_fe_interface_status_t *);
-static void create_netdev(int handle, struct xn_softc **);
-static void netif_ctrlif_rx(ctrl_msg_t *,unsigned long);
-
-static void xn_free_rx_ring(struct xn_softc *);
-
-static void xn_free_tx_ring(struct xn_softc *);
-
-
-
-/* XXX: This isn't supported in FreeBSD, so ignore it for now. */
-#define TASK_UNINTERRUPTIBLE 0
-#define INVALID_P2M_ENTRY (~0UL)
-
-/*
- * If the backend driver is pipelining transmit requests then we can be very
- * aggressive in avoiding new-packet notifications -- only need to send a
- * notification if there are no outstanding unreceived responses.
- * If the backend may be buffering our transmit buffers for any reason then we
- * are rather more conservative.
- */
-#ifdef CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
-#define TX_TEST_IDX resp_prod /* aggressive: any outstanding responses? */
-#else
-#define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
-#endif
-
-/*
- * Mbuf pointers. We need these to keep track of the virtual addresses
- * of our mbuf chains since we can only convert from virtual to physical,
- * not the other way around. The size must track the free index arrays.
- */
-struct xn_chain_data {
- struct mbuf *xn_tx_chain[NETIF_TX_RING_SIZE+1];
- struct mbuf *xn_rx_chain[NETIF_RX_RING_SIZE+1];
-};
-
-struct xn_softc {
- struct arpcom arpcom; /* interface info */
- device_t xn_dev;
- SLIST_ENTRY(xn_softc) xn_links;
- struct mtx xn_mtx;
- void *xn_intrhand;
- struct resource *xn_res;
- u_int8_t xn_ifno; /* interface number */
- struct xn_chain_data xn_cdata; /* mbufs */
-
- netif_tx_interface_t *xn_tx_if;
- netif_rx_interface_t *xn_rx_if;
-
- int xn_if_flags;
- int xn_txcnt;
- int xn_rxbufcnt;
- struct callout xn_stat_ch;
- unsigned int xn_irq;
- unsigned int xn_evtchn;
-
-
- /* What is the status of our connection to the remote backend? */
-#define BEST_CLOSED 0
-#define BEST_DISCONNECTED 1
-#define BEST_CONNECTED 2
- unsigned int xn_backend_state;
-
- /* Is this interface open or closed (down or up)? */
-#define UST_CLOSED 0
-#define UST_OPEN 1
- unsigned int xn_user_state;
-
- /* Receive-ring batched refills. */
-#define RX_MIN_TARGET 64 /* XXX: larger than linux. was causing packet
- * loss at the default of 8.
- */
-#define RX_MAX_TARGET NETIF_RX_RING_SIZE
- int xn_rx_target; /* number to allocate */
- struct mbuf *xn_rx_batch; /* head of the batch queue */
- struct mbuf *xn_rx_batchtail;
- int xn_rx_batchlen; /* how many queued */
-
- int xn_rx_resp_cons;
- int xn_tx_resp_cons;
- unsigned short xn_rx_free_idxs[NETIF_RX_RING_SIZE+1];
- unsigned short xn_tx_free_idxs[NETIF_RX_RING_SIZE+1];
-};
-
-static unsigned long xn_rx_pfns[NETIF_RX_RING_SIZE];
-static multicall_entry_t xn_rx_mcl[NETIF_RX_RING_SIZE+1];
-static mmu_update_t xn_rx_mmu[NETIF_RX_RING_SIZE];
-
-static SLIST_HEAD(, xn_softc) xn_dev_list =
- SLIST_HEAD_INITIALIZER(xn_dev_list);
-
-#define XN_LOCK_INIT(_sc, _name) \
- mtx_init(&(_sc)->xn_mtx, _name, MTX_NETWORK_LOCK, MTX_DEF)
-#define XN_LOCK(_sc) mtx_lock(&(_sc)->xn_mtx)
-#define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->xn_mtx, MA_OWNED)
-#define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->xn_mtx)
-#define XN_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->xn_mtx)
-
-/* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
-#define ADD_ID_TO_FREELIST(_list, _id) \
- (_list)[(_id)] = (_list)[0]; \
- (_list)[0] = (_id);
-#define GET_ID_FROM_FREELIST(_list) \
- ({ unsigned short _id = (_list)[0]; \
- (_list)[0] = (_list)[_id]; \
- (unsigned short)_id; })
-#define FREELIST_EMPTY(_list, _maxid) \
- ((_list)[0] == (_maxid+1))
-
-static char *status_name[] = {
- [NETIF_INTERFACE_STATUS_CLOSED] = "closed",
- [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
- [NETIF_INTERFACE_STATUS_CONNECTED] = "connected",
- [NETIF_INTERFACE_STATUS_CHANGED] = "changed",
-};
-
-static char *be_state_name[] = {
- [BEST_CLOSED] = "closed",
- [BEST_DISCONNECTED] = "disconnected",
- [BEST_CONNECTED] = "connected",
-};
-
-#define IPRINTK(fmt, args...) \
- printk("[XEN] " fmt, ##args)
-#define WPRINTK(fmt, args...) \
- printk("[XEN] " fmt, ##args)
-
-static struct xn_softc *
-find_sc_by_handle(unsigned int handle)
-{
- struct xn_softc *sc;
- SLIST_FOREACH(sc, &xn_dev_list, xn_links)
- {
- if ( sc->xn_ifno == handle )
- return sc;
- }
- return NULL;
-}
-
-/** Network interface info. */
-struct netif_ctrl {
- /** Number of interfaces. */
- int interface_n;
- /** Number of connected interfaces. */
- int connected_n;
- /** Error code. */
- int err;
- int up;
-};
-
-static struct netif_ctrl netctrl;
-
-static void
-netctrl_init(void)
-{
- /*
- * netctrl is already in bss, why are we setting it?
- */
- memset(&netctrl, 0, sizeof(netctrl));
- netctrl.up = NETIF_DRIVER_STATUS_DOWN;
-}
-
-/** Get or set a network interface error.
- */
-static int
-netctrl_err(int err)
-{
- if ( (err < 0) && !netctrl.err )
- netctrl.err = err;
- return netctrl.err;
-}
-
-/** Test if all network interfaces are connected.
- *
- * @return 1 if all connected, 0 if not, negative error code otherwise
- */
-static int
-netctrl_connected(void)
-{
- int ok;
- XENPRINTF("err %d up %d\n", netctrl.err, netctrl.up);
- if (netctrl.err)
- ok = netctrl.err;
- else if (netctrl.up == NETIF_DRIVER_STATUS_UP)
- ok = (netctrl.connected_n == netctrl.interface_n);
- else
- ok = 0;
-
- return ok;
-}
-
-/** Count the connected network interfaces.
- *
- * @return connected count
- */
-static int
-netctrl_connected_count(void)
-{
-
- struct xn_softc *sc;
- unsigned int connected;
-
- connected = 0;
-
- SLIST_FOREACH(sc, &xn_dev_list, xn_links)
- {
- if ( sc->xn_backend_state == BEST_CONNECTED )
- connected++;
- }
-
- netctrl.connected_n = connected;
- XENPRINTF("> connected_n=%d interface_n=%d\n",
- netctrl.connected_n, netctrl.interface_n);
- return connected;
-}
-
-static __inline struct mbuf*
-makembuf (struct mbuf *buf)
-{
- struct mbuf *m = NULL;
-
- MGETHDR (m, M_DONTWAIT, MT_DATA);
-
- if (! m)
- return 0;
-
- M_MOVE_PKTHDR(m, buf);
-
- MCLGET (m, M_DONTWAIT);
-
- m->m_pkthdr.len = buf->m_pkthdr.len;
- m->m_len = buf->m_len;
- m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) );
- m->m_ext.ext_args = (vm_paddr_t *)vtophys(mtod(m,caddr_t));
-
- return m;
-}
-
-
-
-static void
-xn_free_rx_ring(struct xn_softc *sc)
-{
-#if 0
- int i;
-
- for (i = 0; i < NETIF_RX_RING_SIZE; i++) {
- if (sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)] != NULL) {
- m_freem(sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)]);
- sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)] = NULL;
- }
- }
-
- sc->xn_rx_resp_cons = 0;
- sc->xn_rx_if->req_prod = 0;
- sc->xn_rx_if->event = sc->xn_rx_resp_cons ;
-#endif
-}
-
-static void
-xn_free_tx_ring(struct xn_softc *sc)
-{
-#if 0
- int i;
-
- for (i = 0; i < NETIF_TX_RING_SIZE; i++) {
- if (sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)] != NULL) {
- m_freem(sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)]);
- sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)] = NULL;
- }
- }
-
- return;
-#endif
-}
-
-static void
-xn_alloc_rx_buffers(struct xn_softc *sc)
-{
- unsigned short id;
- struct mbuf *m_new, *next;
- int i, batch_target;
- NETIF_RING_IDX req_prod = sc->xn_rx_if->req_prod;
-
- if (unlikely(sc->xn_backend_state != BEST_CONNECTED) )
- return;
-
- /*
- * Allocate skbuffs greedily, even though we batch updates to the
- * receive ring. This creates a less bursty demand on the memory allocator,
- * so should reduce the chance of failed allocation requests both for
- * ourself and for other kernel subsystems.
- */
- batch_target = sc->xn_rx_target - (req_prod - sc->xn_rx_resp_cons);
- for ( i = sc->xn_rx_batchlen; i < batch_target; i++, sc->xn_rx_batchlen++)
{
- MGETHDR(m_new, M_DONTWAIT, MT_DATA);
- if (m_new == NULL)
- break;
-
- MCLGET(m_new, M_DONTWAIT);
- if (!(m_new->m_flags & M_EXT)) {
- m_freem(m_new);
- break;
- }
- m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
-
- /* queue the mbufs allocated */
- if (!sc->xn_rx_batch)
- sc->xn_rx_batch = m_new;
-
- if (sc->xn_rx_batchtail)
- sc->xn_rx_batchtail->m_next = m_new;
- sc->xn_rx_batchtail = m_new;
- }
-
- /* Is the batch large enough to be worthwhile? */
- if ( i < (sc->xn_rx_target/2) )
- return;
-
- for (i = 0, m_new = sc->xn_rx_batch; m_new;
- i++, sc->xn_rx_batchlen--, m_new = next) {
-
- next = m_new->m_next;
- m_new->m_next = NULL;
-
- m_new->m_ext.ext_args = (vm_paddr_t *)vtophys(m_new->m_ext.ext_buf);
-
- id = GET_ID_FROM_FREELIST(sc->xn_rx_free_idxs);
- KASSERT(id != 0, ("alloc_rx_buffers: found free receive index of 0\n"));
- sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(id)] = m_new;
-
- sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-
- xn_rx_pfns[i] = vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
-
- /* Remove this page from pseudo phys map before passing back to Xen. */
- xen_phys_machine[((unsigned long)m_new->m_ext.ext_args >> PAGE_SHIFT)]
- = INVALID_P2M_ENTRY;
-
- xn_rx_mcl[i].op = __HYPERVISOR_update_va_mapping;
- xn_rx_mcl[i].args[0] = (unsigned long)mtod(m_new,vm_offset_t);
- xn_rx_mcl[i].args[1] = 0;
- xn_rx_mcl[i].args[2] = 0;
-
- }
-
- KASSERT(i, ("no mbufs processed")); /* should have returned earlier
*/
- KASSERT(sc->xn_rx_batchlen == 0, ("not all mbufs processed"));
- sc->xn_rx_batch = sc->xn_rx_batchtail = NULL;
-
- /*
- * We may have allocated buffers which have entries outstanding
- in the page * update queue -- make sure we flush those first! */
- PT_UPDATES_FLUSH();
-
- /* After all PTEs have been zapped we blow away stale TLB entries. */
- xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
-
- /* Give away a batch of pages. */
- xn_rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
- xn_rx_mcl[i].args[0] = MEMOP_decrease_reservation;
- xn_rx_mcl[i].args[1] = (unsigned long)xn_rx_pfns;
- xn_rx_mcl[i].args[2] = (unsigned long)i;
- xn_rx_mcl[i].args[3] = 0;
- xn_rx_mcl[i].args[4] = DOMID_SELF;
-
- /* Zap PTEs and give away pages in one big multicall. */
- (void)HYPERVISOR_multicall(xn_rx_mcl, i+1);
-
- /* Check return status of HYPERVISOR_dom_mem_op(). */
- if (unlikely(xn_rx_mcl[i].result != i))
- panic("Unable to reduce memory reservation\n");
-
- /* Above is a suitable barrier to ensure backend will see requests. */
- sc->xn_rx_if->req_prod = req_prod + i;
-
- /* Adjust our floating fill target if we risked running out of buffers. */
- if ( ((req_prod - sc->xn_rx_if->resp_prod) < (sc->xn_rx_target / 4)) &&
- ((sc->xn_rx_target *= 2) > RX_MAX_TARGET) )
- sc->xn_rx_target = RX_MAX_TARGET;
-}
-
-static void
-xn_rxeof(struct xn_softc *sc)
-{
- struct ifnet *ifp;
- netif_rx_response_t *rx;
- NETIF_RING_IDX i, rp;
- mmu_update_t *mmu = xn_rx_mmu;
- multicall_entry_t *mcl = xn_rx_mcl;
- struct mbuf *tail_mbuf = NULL, *head_mbuf = NULL, *m, *next;
-
- XN_LOCK_ASSERT(sc);
- if (sc->xn_backend_state != BEST_CONNECTED)
- return;
-
- ifp = &sc->arpcom.ac_if;
-
- rp = sc->xn_rx_if->resp_prod;
- rmb(); /* Ensure we see queued responses up to 'rp'. */
-
- for (i = sc->xn_rx_resp_cons; i != rp; i++) {
-
- rx = &sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(i)].resp;
- KASSERT(rx->id != 0, ("xn_rxeof: found free receive index of 0\n"));
-
- /*
- * An error here is very odd. Usually indicates a backend bug,
- * low-memory condition, or that we didn't have reservation headroom.
- * Whatever - print an error and queue the id again straight away.
- */
- if (unlikely(rx->status <= 0)) {
- printk("bad buffer on RX ring!(%d)\n", rx->status);
- sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(sc->xn_rx_if->req_prod)].req.id
- = rx->id;
- wmb();
- sc->xn_rx_if->req_prod++;
- continue;
- }
-
- m = (struct mbuf *)
- sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(rx->id)];
- if (m->m_next)
- panic("mbuf is already part of a valid mbuf chain");
- ADD_ID_TO_FREELIST(sc->xn_rx_free_idxs, rx->id);
-
- m->m_data += (rx->addr & PAGE_MASK);
- m->m_pkthdr.len = m->m_len = rx->status;
- m->m_pkthdr.rcvif = ifp;
-
- /* Remap the page. */
- mmu->ptr = (rx->addr & ~PAGE_MASK) | MMU_MACHPHYS_UPDATE;
- mmu->val = (unsigned long)m->m_ext.ext_args >> PAGE_SHIFT;
- mmu++;
- mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = (unsigned long)m->m_data;
- mcl->args[1] = (rx->addr & ~PAGE_MASK) | PG_KERNEL;
- mcl->args[2] = 0;
- mcl++;
-
- xen_phys_machine[((unsigned long)m->m_ext.ext_args >> PAGE_SHIFT)] =
- (rx->addr >> PAGE_SHIFT);
-
- if (unlikely(!head_mbuf))
- head_mbuf = m;
-
- if (tail_mbuf)
- tail_mbuf->m_next = m;
- tail_mbuf = m;
-
- sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(rx->id)] = NULL;
- sc->xn_rxbufcnt++;
- }
-
- /* Do all the remapping work, and M->P updates, in one big hypercall. */
- if (likely((mcl - xn_rx_mcl) != 0)) {
- mcl->op = __HYPERVISOR_mmu_update;
- mcl->args[0] = (unsigned long)xn_rx_mmu;
- mcl->args[1] = mmu - xn_rx_mmu;
- mcl->args[2] = 0;
- mcl->args[3] = DOMID_SELF;
- mcl++;
- (void)HYPERVISOR_multicall(xn_rx_mcl, mcl - xn_rx_mcl);
- }
-
-
- /*
- * Process all the mbufs after the remapping is complete.
- * Break the mbuf chain first though.
- */
- for (m = head_mbuf; m; m = next) {
- next = m->m_next;
- m->m_next = NULL;
-
- ifp->if_ipackets++;
-
- XN_UNLOCK(sc);
-
- /* Pass it up. */
- (*ifp->if_input)(ifp, m);
- XN_LOCK(sc);
- }
-
- sc->xn_rx_resp_cons = i;
-
- /* If we get a callback with very few responses, reduce fill target. */
- /* NB. Note exponential increase, linear decrease. */
- if (((sc->xn_rx_if->req_prod - sc->xn_rx_if->resp_prod) >
- ((3*sc->xn_rx_target) / 4)) && (--sc->xn_rx_target < RX_MIN_TARGET))
- sc->xn_rx_target = RX_MIN_TARGET;
-
- xn_alloc_rx_buffers(sc);
-
- sc->xn_rx_if->event = i + 1;
-}
-
-static void
-xn_txeof(struct xn_softc *sc)
-{
- NETIF_RING_IDX i, prod;
- unsigned short id;
- struct ifnet *ifp;
- struct mbuf *m;
-
- XN_LOCK_ASSERT(sc);
-
- if (sc->xn_backend_state != BEST_CONNECTED)
- return;
-
- ifp = &sc->arpcom.ac_if;
- ifp->if_timer = 0;
-
- do {
- prod = sc->xn_tx_if->resp_prod;
-
- for (i = sc->xn_tx_resp_cons; i != prod; i++) {
- id = sc->xn_tx_if->ring[MASK_NETIF_TX_IDX(i)].resp.id;
- m = sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)];
-
- KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
- M_ASSERTVALID(m);
-
- m_freem(m);
- sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)] = NULL;
- ADD_ID_TO_FREELIST(sc->xn_tx_free_idxs, id);
- sc->xn_txcnt--;
- }
- sc->xn_tx_resp_cons = prod;
-
- /*
- * Set a new event, then check for race with update of tx_cons. Note
- * that it is essential to schedule a callback, no matter how few
- * buffers are pending. Even if there is space in the transmit ring,
- * higher layers may be blocked because too much data is outstanding:
- * in such cases notification from Xen is likely to be the only kick
- * that we'll get.
- */
- sc->xn_tx_if->event =
- prod + ((sc->xn_tx_if->req_prod - prod) >> 1) + 1;
-
- mb();
-
- } while (prod != sc->xn_tx_if->resp_prod);
-}
-
-static void
-xn_intr(void *xsc)
-{
- struct xn_softc *sc = xsc;
- struct ifnet *ifp = &sc->arpcom.ac_if;
-
- XN_LOCK(sc);
-
- /* sometimes we seem to lose packets. stay in the interrupt handler while
- * there is stuff to process: continually recheck the response producer.
- */
- do {
- xn_txeof(sc);
-
- if (sc->xn_rx_resp_cons != sc->xn_rx_if->resp_prod &&
- sc->xn_user_state == UST_OPEN)
- xn_rxeof(sc);
-
- if (ifp->if_flags & IFF_RUNNING && ifp->if_snd.ifq_head != NULL)
- xn_start_locked(ifp);
- } while (sc->xn_rx_resp_cons != sc->xn_rx_if->resp_prod &&
- sc->xn_user_state == UST_OPEN);
-
- XN_UNLOCK(sc);
- return;
-}
-
-static void
-xn_tick_locked(struct xn_softc *sc)
-{
- XN_LOCK_ASSERT(sc);
- callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
-
- /* XXX placeholder for printing debug information */
-
-}
-
-
-static void
-xn_tick(void *xsc)
-{
- struct xn_softc *sc;
-
- sc = xsc;
- XN_LOCK(sc);
- xn_tick_locked(sc);
- XN_UNLOCK(sc);
-
-}
-static void
-xn_start_locked(struct ifnet *ifp)
-{
- unsigned short id;
- struct mbuf *m_head, *new_m;
- struct xn_softc *sc = ifp->if_softc;
- netif_tx_request_t *tx;
- NETIF_RING_IDX i, start;
-
- if (sc->xn_backend_state != BEST_CONNECTED)
- return;
-
- for (i = start = sc->xn_tx_if->req_prod; TRUE; i++, sc->xn_txcnt++) {
-
- IF_DEQUEUE(&ifp->if_snd, m_head);
- if (m_head == NULL)
- break;
-
- if (FREELIST_EMPTY(sc->xn_tx_free_idxs, NETIF_TX_RING_SIZE)) {
- IF_PREPEND(&ifp->if_snd, m_head);
- ifp->if_flags |= IFF_OACTIVE;
- break;
- }
-
- i = sc->xn_tx_if->req_prod;
-
- id = GET_ID_FROM_FREELIST(sc->xn_tx_free_idxs);
-
- /*
- * Start packing the mbufs in this chain into
- * the fragment pointers. Stop when we run out
- * of fragments or hit the end of the mbuf chain.
- */
- new_m = makembuf(m_head);
- tx = &(sc->xn_tx_if->ring[MASK_NETIF_TX_IDX(i)].req);
- tx->id = id;
- tx->size = new_m->m_pkthdr.len;
- new_m->m_next = NULL;
- new_m->m_nextpkt = NULL;
-
- m_freem(m_head);
- tx->addr = vtomach(mtod(new_m, vm_offset_t));
-
- sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)] = new_m;
- BPF_MTAP(ifp, new_m);
- }
-
- sc->xn_tx_if->req_prod = i;
- xn_txeof(sc);
-
- /* Only notify Xen if we really have to. */
- if (sc->xn_tx_if->TX_TEST_IDX == start)
- notify_via_evtchn(sc->xn_evtchn);
- return;
-}
-
-static void
-xn_start(struct ifnet *ifp)
-{
- struct xn_softc *sc;
- sc = ifp->if_softc;
- XN_LOCK(sc);
- xn_start_locked(ifp);
- XN_UNLOCK(sc);
-}
-
-
-
-/* equivalent of network_open() in Linux */
-static void
-xn_ifinit_locked(struct xn_softc *sc)
-{
- struct ifnet *ifp;
-
- XN_LOCK_ASSERT(sc);
-
- ifp = &sc->arpcom.ac_if;
-
- if (ifp->if_flags & IFF_RUNNING)
- return;
-
- xn_stop(sc);
-
- sc->xn_user_state = UST_OPEN;
-
- xn_alloc_rx_buffers(sc);
- sc->xn_rx_if->event = sc->xn_rx_resp_cons + 1;
-
- ifp->if_flags |= IFF_RUNNING;
- ifp->if_flags &= ~IFF_OACTIVE;
-
- callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
-
-}
-
-
-static void
-xn_ifinit(void *xsc)
-{
- struct xn_softc *sc = xsc;
-
- XN_LOCK(sc);
- xn_ifinit_locked(sc);
- XN_UNLOCK(sc);
-
-}
-
-
-static int
-xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct xn_softc *sc = ifp->if_softc;
- struct ifreq *ifr = (struct ifreq *) data;
- int mask, error = 0;
- switch(cmd) {
- case SIOCSIFMTU:
- /* XXX can we alter the MTU on a VN ?*/
-#ifdef notyet
- if (ifr->ifr_mtu > XN_JUMBO_MTU)
- error = EINVAL;
- else
-#endif
- {
- ifp->if_mtu = ifr->ifr_mtu;
- ifp->if_flags &= ~IFF_RUNNING;
- xn_ifinit(sc);
- }
- break;
- case SIOCSIFFLAGS:
- XN_LOCK(sc);
- if (ifp->if_flags & IFF_UP) {
- /*
- * If only the state of the PROMISC flag changed,
- * then just use the 'set promisc mode' command
- * instead of reinitializing the entire NIC. Doing
- * a full re-init means reloading the firmware and
- * waiting for it to start up, which may take a
- * second or two.
- */
-#ifdef notyet
- /* No promiscuous mode with Xen */
- if (ifp->if_flags & IFF_RUNNING &&
- ifp->if_flags & IFF_PROMISC &&
- !(sc->xn_if_flags & IFF_PROMISC)) {
- XN_SETBIT(sc, XN_RX_MODE,
- XN_RXMODE_RX_PROMISC);
- } else if (ifp->if_flags & IFF_RUNNING &&
- !(ifp->if_flags & IFF_PROMISC) &&
- sc->xn_if_flags & IFF_PROMISC) {
- XN_CLRBIT(sc, XN_RX_MODE,
- XN_RXMODE_RX_PROMISC);
- } else
-#endif
- xn_ifinit_locked(sc);
- } else {
- if (ifp->if_flags & IFF_RUNNING) {
- xn_stop(sc);
- }
- }
- sc->xn_if_flags = ifp->if_flags;
- XN_UNLOCK(sc);
- error = 0;
- break;
- case SIOCSIFCAP:
- mask = ifr->ifr_reqcap ^ ifp->if_capenable;
- if (mask & IFCAP_HWCSUM) {
- if (IFCAP_HWCSUM & ifp->if_capenable)
- ifp->if_capenable &= ~IFCAP_HWCSUM;
- else
- ifp->if_capenable |= IFCAP_HWCSUM;
- }
- error = 0;
- break;
- case SIOCADDMULTI:
- case SIOCDELMULTI:
-#ifdef notyet
- if (ifp->if_flags & IFF_RUNNING) {
- XN_LOCK(sc);
- xn_setmulti(sc);
- XN_UNLOCK(sc);
- error = 0;
- }
-#endif
- /* FALLTHROUGH */
- case SIOCSIFMEDIA:
- case SIOCGIFMEDIA:
- error = EINVAL;
- break;
- default:
- error = ether_ioctl(ifp, cmd, data);
- }
-
- return (error);
-}
-
-static void
-xn_stop(struct xn_softc *sc)
-{
- struct ifnet *ifp;
-
- XN_LOCK_ASSERT(sc);
-
- ifp = &sc->arpcom.ac_if;
-
- callout_stop(&sc->xn_stat_ch);
-
- xn_free_rx_ring(sc);
- xn_free_tx_ring(sc);
-
- ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
-}
-
-/* START of Xenolinux helper functions adapted to FreeBSD */
-static void
-network_connect(struct xn_softc *sc, netif_fe_interface_status_t *status)
-{
- struct ifnet *ifp;
- int i, requeue_idx;
- netif_tx_request_t *tx;
-
- XN_LOCK(sc);
-
- ifp = &sc->arpcom.ac_if;
- /* first time through, setup the ifp info */
- if (ifp->if_softc == NULL) {
- ifp->if_softc = sc;
- if_initname(ifp, "xn", sc->xn_ifno);
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
- ifp->if_ioctl = xn_ioctl;
- ifp->if_output = ether_output;
- ifp->if_start = xn_start;
-#ifdef notyet
- ifp->if_watchdog = xn_watchdog;
-#endif
- ifp->if_init = xn_ifinit;
- ifp->if_mtu = ETHERMTU;
- ifp->if_snd.ifq_maxlen = NETIF_TX_RING_SIZE - 1;
-
-#ifdef notyet
- ifp->if_hwassist = XN_CSUM_FEATURES;
- ifp->if_capabilities = IFCAP_HWCSUM;
- ifp->if_capenable = ifp->if_capabilities;
-#endif
-
- ether_ifattach(ifp, sc->arpcom.ac_enaddr);
- callout_init(&sc->xn_stat_ch, CALLOUT_MPSAFE);
- }
-
- /* Recovery procedure: */
-
- /* Step 1: Reinitialise variables. */
- sc->xn_rx_resp_cons = sc->xn_tx_resp_cons = 0;
- sc->xn_rxbufcnt = sc->xn_txcnt = 0;
- sc->xn_rx_if->event = sc->xn_tx_if->event = 1;
-
- /* Step 2: Rebuild the RX and TX ring contents.
- * NB. We could just free the queued TX packets now but we hope
- * that sending them out might do some good. We have to rebuild
- * the RX ring because some of our pages are currently flipped out
- * so we can't just free the RX skbs.
- */
-
- /* Rebuild the TX buffer freelist and the TX ring itself.
- * NB. This reorders packets. We could keep more private state
- * to avoid this but maybe it doesn't matter so much given the
- * interface has been down.
- */
- for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ )
- {
- if (sc->xn_cdata.xn_tx_chain[i] != NULL)
- {
- struct mbuf *m = sc->xn_cdata.xn_tx_chain[i];
-
- tx = &sc->xn_tx_if->ring[requeue_idx++].req;
-
- tx->id = i;
- tx->addr = vtomach(mtod(m, vm_offset_t));
- tx->size = m->m_pkthdr.len;
- sc->xn_txcnt++;
- }
- }
- wmb();
- sc->xn_tx_if->req_prod = requeue_idx;
-
- /* Rebuild the RX buffer freelist and the RX ring itself. */
- for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ )
- if (sc->xn_cdata.xn_rx_chain[i] != NULL)
- sc->xn_rx_if->ring[requeue_idx++].req.id = i;
- wmb();
- sc->xn_rx_if->req_prod = requeue_idx;
-
- printk("[XEN] Netfront recovered tx=%d rxfree=%d\n",
- sc->xn_tx_if->req_prod,sc->xn_rx_if->req_prod);
-
-
- /* Step 3: All public and private state should now be sane. Get
- * ready to start sending and receiving packets and give the driver
- * domain a kick because we've probably just requeued some
- * packets.
- */
- sc->xn_backend_state = BEST_CONNECTED;
- wmb();
- notify_via_evtchn(status->evtchn);
- xn_txeof(sc);
-
- XN_UNLOCK(sc);
-}
-
-
-static void
-vif_show(struct xn_softc *sc)
-{
-#if DEBUG
- if (sc) {
- IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
- sc->xn_ifno,
- be_state_name[sc->xn_backend_state],
- sc->xn_user_state ? "open" : "closed",
- sc->xn_evtchn,
- sc->xn_irq,
- sc->xn_tx_if,
- sc->xn_rx_if);
- } else {
- IPRINTK("<vif NULL>\n");
- }
-#endif
-}
-
-/* Send a connect message to xend to tell it to bring up the interface. */
-static void
-send_interface_connect(struct xn_softc *sc)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_NETIF_FE,
- .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT,
- .length = sizeof(netif_fe_interface_connect_t),
- };
- netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
-
- vif_show(sc);
- msg->handle = sc->xn_ifno;
- msg->tx_shmem_frame = (vtomach(sc->xn_tx_if) >> PAGE_SHIFT);
- msg->rx_shmem_frame = (vtomach(sc->xn_rx_if) >> PAGE_SHIFT);
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-/* Send a driver status notification to the domain controller. */
-static int
-send_driver_status(int ok)
-{
- int err = 0;
- ctrl_msg_t cmsg = {
- .type = CMSG_NETIF_FE,
- .subtype = CMSG_NETIF_FE_DRIVER_STATUS,
- .length = sizeof(netif_fe_driver_status_t),
- };
- netif_fe_driver_status_t *msg = (void*)cmsg.msg;
-
- msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN);
- err = ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
- return err;
-}
-
-/* Stop network device and free tx/rx queues and irq.
- */
-static void
-vif_release(struct xn_softc *sc)
-{
- /* Stop old i/f to prevent errors whilst we rebuild the state. */
- XN_LOCK(sc);
- /* sc->xn_backend_state = BEST_DISCONNECTED; */
- XN_UNLOCK(sc);
-
- /* Free resources. */
- if(sc->xn_tx_if != NULL) {
- unbind_evtchn_from_irq(sc->xn_evtchn);
- free(sc->xn_tx_if, M_DEVBUF);
- free(sc->xn_rx_if, M_DEVBUF);
- sc->xn_irq = 0;
- sc->xn_evtchn = 0;
- sc->xn_tx_if = NULL;
- sc->xn_rx_if = NULL;
- }
-}
-
-/* Release vif resources and close it down completely.
- */
-static void
-vif_close(struct xn_softc *sc)
-{
- vif_show(sc);
- WPRINTK("Unexpected netif-CLOSED message in state %s\n",
- be_state_name[sc->xn_backend_state]);
- vif_release(sc);
- sc->xn_backend_state = BEST_CLOSED;
- /* todo: take dev down and free. */
- vif_show(sc);
-}
-
-/* Move the vif into disconnected state.
- * Allocates tx/rx pages.
- * Sends connect message to xend.
- */
-static void
-vif_disconnect(struct xn_softc *sc)
-{
- if (sc->xn_tx_if) free(sc->xn_tx_if, M_DEVBUF);
- if (sc->xn_rx_if) free(sc->xn_rx_if, M_DEVBUF);
-
- // Before this sc->xn_tx_if and sc->xn_rx_if had better be null.
- sc->xn_tx_if = (netif_tx_interface_t *)malloc(PAGE_SIZE,M_DEVBUF,M_WAITOK);
- sc->xn_rx_if = (netif_rx_interface_t *)malloc(PAGE_SIZE,M_DEVBUF,M_WAITOK);
- memset(sc->xn_tx_if, 0, PAGE_SIZE);
- memset(sc->xn_rx_if, 0, PAGE_SIZE);
- sc->xn_backend_state = BEST_DISCONNECTED;
- send_interface_connect(sc);
- vif_show(sc);
-}
-
-/* Begin interface recovery.
- *
- * NB. Whilst we're recovering, we turn the carrier state off. We
- * take measures to ensure that this device isn't used for
- * anything. We also stop the queue for this device. Various
- * different approaches (e.g. continuing to buffer packets) have
- * been tested but don't appear to improve the overall impact on
- * TCP connections.
- *
- * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
- * is initiated by a special "RESET" message - disconnect could
- * just mean we're not allowed to use this interface any more.
- */
-static void
-vif_reset(struct xn_softc *sc)
-{
- IPRINTK("Attempting to reconnect network interface: handle=%u\n",
- sc->xn_ifno);
- vif_release(sc);
- vif_disconnect(sc);
- vif_show(sc);
-}
-
-/* Move the vif into connected state.
- * Sets the mac and event channel from the message.
- * Binds the irq to the event channel.
- */
-static void
-vif_connect(
- struct xn_softc *sc, netif_fe_interface_status_t *status)
-{
- memcpy(sc->arpcom.ac_enaddr, status->mac, ETHER_ADDR_LEN);
- network_connect(sc, status);
-
- sc->xn_evtchn = status->evtchn;
- sc->xn_irq = bind_evtchn_to_irq(sc->xn_evtchn);
-
- (void)intr_add_handler("xn", sc->xn_irq, (driver_intr_t *)xn_intr, sc,
- INTR_TYPE_NET | INTR_MPSAFE, &sc->xn_intrhand);
- netctrl_connected_count();
- /* vif_wake(dev); Not needed for FreeBSD */
- vif_show(sc);
-}
-
-/** Create a network device.
- * @param handle device handle
- */
-static void
-create_netdev(int handle, struct xn_softc **sc)
-{
- int i;
-
- *sc = (struct xn_softc *)malloc(sizeof(**sc), M_DEVBUF, M_WAITOK);
- memset(*sc, 0, sizeof(struct xn_softc));
-
- (*sc)->xn_backend_state = BEST_CLOSED;
- (*sc)->xn_user_state = UST_CLOSED;
- (*sc)->xn_ifno = handle;
-
- XN_LOCK_INIT(*sc, "xnetif");
- (*sc)->xn_rx_target = RX_MIN_TARGET;
-
- /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
- for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ )
- (*sc)->xn_tx_free_idxs[i] = (i+1);
- for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ )
- (*sc)->xn_rx_free_idxs[i] = (i+1);
-
- SLIST_INSERT_HEAD(&xn_dev_list, *sc, xn_links);
-}
-
-/* Get the target interface for a status message.
- * Creates the interface when it makes sense.
- * The returned interface may be null when there is no error.
- *
- * @param status status message
- * @param sc return parameter for interface state
- * @return 0 on success, error code otherwise
- */
-static int
-target_vif(netif_fe_interface_status_t *status, struct xn_softc **sc)
-{
- int err = 0;
-
- XENPRINTF("> handle=%d\n", status->handle);
- if ( status->handle < 0 )
- {
- err = -EINVAL;
- goto exit;
- }
-
- if ( (*sc = find_sc_by_handle(status->handle)) != NULL )
- goto exit;
-
- if ( status->status == NETIF_INTERFACE_STATUS_CLOSED )
- goto exit;
- if ( status->status == NETIF_INTERFACE_STATUS_CHANGED )
- goto exit;
-
- /* It's a new interface in a good state - create it. */
- XENPRINTF("> create device...\n");
- create_netdev(status->handle, sc);
- netctrl.interface_n++;
-
-exit:
- return err;
-}
-
-/* Handle an interface status message. */
-static void
-netif_interface_status(netif_fe_interface_status_t *status)
-{
- int err = 0;
- struct xn_softc *sc = NULL;
-
- XENPRINTF("> status=%s handle=%d\n",
- status_name[status->status], status->handle);
-
- if ( (err = target_vif(status, &sc)) != 0 )
- {
- WPRINTK("Invalid netif: handle=%u\n", status->handle);
- return;
- }
-
- if ( sc == NULL )
- {
- XENPRINTF("> no vif\n");
- return;
- }
-
- vif_show(sc);
-
- switch ( status->status )
- {
- case NETIF_INTERFACE_STATUS_CLOSED:
- switch ( sc->xn_backend_state )
- {
- case BEST_CLOSED:
- case BEST_DISCONNECTED:
- case BEST_CONNECTED:
- vif_close(sc);
- break;
- }
- break;
-
- case NETIF_INTERFACE_STATUS_DISCONNECTED:
- switch ( sc->xn_backend_state )
- {
- case BEST_CLOSED:
- vif_disconnect(sc);
- break;
- case BEST_DISCONNECTED:
- case BEST_CONNECTED:
- vif_reset(sc);
- break;
- }
- break;
-
- case NETIF_INTERFACE_STATUS_CONNECTED:
- switch ( sc->xn_backend_state )
- {
- case BEST_CLOSED:
- WPRINTK("Unexpected netif status %s in state %s\n",
- status_name[status->status],
- be_state_name[sc->xn_backend_state]);
- vif_disconnect(sc);
- vif_connect(sc, status);
- break;
- case BEST_DISCONNECTED:
- vif_connect(sc, status);
- break;
- }
- break;
-
- case NETIF_INTERFACE_STATUS_CHANGED:
- /*
- * The domain controller is notifying us that a device has been
- * added or removed.
- */
- break;
-
- default:
- WPRINTK("Invalid netif status code %d\n", status->status);
- break;
- }
- vif_show(sc);
-}
-
-/*
- * Initialize the network control interface.
- */
-static void
-netif_driver_status(netif_fe_driver_status_t *status)
-{
- XENPRINTF("> status=%d\n", status->status);
- netctrl.up = status->status;
- //netctrl.interface_n = status->max_handle;
- //netctrl.connected_n = 0;
- netctrl_connected_count();
-}
-
-/* Receive handler for control messages. */
-static void
-netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- switch ( msg->subtype )
- {
- case CMSG_NETIF_FE_INTERFACE_STATUS:
- if ( msg->length != sizeof(netif_fe_interface_status_t) )
- goto error;
- netif_interface_status((netif_fe_interface_status_t *)
- &msg->msg[0]);
- break;
-
- case CMSG_NETIF_FE_DRIVER_STATUS:
- if ( msg->length != sizeof(netif_fe_driver_status_t) )
- goto error;
- netif_driver_status((netif_fe_driver_status_t *)
- &msg->msg[0]);
- break;
-
- error:
- default:
- msg->length = 0;
- break;
- }
-
- ctrl_if_send_response(msg);
-}
-
-#if 1
-/* Wait for all interfaces to be connected.
- *
- * This works OK, but we'd like to use the probing mode (see below).
- */
-static int probe_interfaces(void)
-{
- int err = 0, conn = 0;
- int wait_i, wait_n = 100;
- for ( wait_i = 0; wait_i < wait_n; wait_i++)
- {
- XENPRINTF("> wait_i=%d\n", wait_i);
- conn = netctrl_connected();
- if(conn) break;
- tsleep(&xn_dev_list, PWAIT | PCATCH, "netif", hz);
- }
-
- XENPRINTF("> wait finished...\n");
- if ( conn <= 0 )
- {
- err = netctrl_err(-ENETDOWN);
- WPRINTK("Failed to connect all virtual interfaces: err=%d\n", err);
- }
-
- XENPRINTF("< err=%d\n", err);
-
- return err;
-}
-#else
-/* Probe for interfaces until no more are found.
- *
- * This is the mode we'd like to use, but at the moment it panics the kernel.
-*/
-static int
-probe_interfaces(void)
-{
- int err = 0;
- int wait_i, wait_n = 100;
- ctrl_msg_t cmsg = {
- .type = CMSG_NETIF_FE,
- .subtype = CMSG_NETIF_FE_INTERFACE_STATUS,
- .length = sizeof(netif_fe_interface_status_t),
- };
- netif_fe_interface_status_t msg = {};
- ctrl_msg_t rmsg = {};
- netif_fe_interface_status_t *reply = (void*)rmsg.msg;
- int state = TASK_UNINTERRUPTIBLE;
- uint32_t query = -1;
-
-
- netctrl.interface_n = 0;
- for ( wait_i = 0; wait_i < wait_n; wait_i++ )
- {
- XENPRINTF("> wait_i=%d query=%d\n", wait_i, query);
- msg.handle = query;
- memcpy(cmsg.msg, &msg, sizeof(msg));
- XENPRINTF("> set_current_state...\n");
- set_current_state(state);
- XENPRINTF("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
- XENPRINTF("> sending...\n");
- err = ctrl_if_send_message_and_get_response(&cmsg, &rmsg, state);
- XENPRINTF("> err=%d\n", err);
- if(err) goto exit;
- XENPRINTF("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
- if((int)reply->handle < 0){
- // No more interfaces.
- break;
- }
- query = -reply->handle - 2;
- XENPRINTF(">netif_interface_status ...\n");
- netif_interface_status(reply);
- }
-
- exit:
- if ( err )
- {
- err = netctrl_err(-ENETDOWN);
- WPRINTK("Connecting virtual network interfaces failed: err=%d\n", err);
- }
-
- XENPRINTF("< err=%d\n", err);
- return err;
-}
-
-#endif
-
-static void
-xn_init(void *unused)
-{
-
- int err = 0;
-
- netctrl_init();
- (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
-
- send_driver_status(1);
- err = probe_interfaces();
-
- if (err)
- ctrl_if_unregister_receiver(CMSG_NETIF_FE, netif_ctrlif_rx);
-}
-
-SYSINIT(xndev, SI_SUB_PSEUDO, SI_ORDER_ANY, xn_init, NULL)
diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/kern/kern_fork.c
--- a/freebsd-5.3-xen-sparse/kern/kern_fork.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,846 +0,0 @@
-/*
- * Copyright (c) 1982, 1986, 1989, 1991, 1993
- * The Regents of the University of California. All rights reserved.
- * (c) UNIX System Laboratories, Inc.
- * All or some portions of this file are derived from material licensed
- * to the University of California by American Telephone and Telegraph
- * Co. or Unix System Laboratories, Inc. and are reproduced herein with
- * the permission of UNIX System Laboratories, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/kern_fork.c,v 1.234.2.4 2004/09/18 04:11:35
julian Exp $");
-
-#include "opt_ktrace.h"
-#include "opt_mac.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/sysproto.h>
-#include <sys/eventhandler.h>
-#include <sys/filedesc.h>
-#include <sys/kernel.h>
-#include <sys/kthread.h>
-#include <sys/sysctl.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/pioctl.h>
-#include <sys/resourcevar.h>
-#include <sys/sched.h>
-#include <sys/syscall.h>
-#include <sys/vmmeter.h>
-#include <sys/vnode.h>
-#include <sys/acct.h>
-#include <sys/mac.h>
-#include <sys/ktr.h>
-#include <sys/ktrace.h>
-#include <sys/unistd.h>
-#include <sys/sx.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <vm/vm_map.h>
-#include <vm/vm_extern.h>
-#include <vm/uma.h>
-
-#include <sys/user.h>
-#include <machine/critical.h>
-
-#ifndef _SYS_SYSPROTO_H_
-struct fork_args {
- int dummy;
-};
-#endif
-
-static int forksleep; /* Place for fork1() to sleep on. */
-
-/*
- * MPSAFE
- */
-/* ARGSUSED */
-int
-fork(td, uap)
- struct thread *td;
- struct fork_args *uap;
-{
- int error;
- struct proc *p2;
-
- error = fork1(td, RFFDG | RFPROC, 0, &p2);
- if (error == 0) {
- td->td_retval[0] = p2->p_pid;
- td->td_retval[1] = 0;
- }
- return (error);
-}
-
-/*
- * MPSAFE
- */
-/* ARGSUSED */
-int
-vfork(td, uap)
- struct thread *td;
- struct vfork_args *uap;
-{
- int error;
- struct proc *p2;
-
- error = fork1(td, RFFDG | RFPROC /* | RFPPWAIT | RFMEM */, 0, &p2);
- if (error == 0) {
- td->td_retval[0] = p2->p_pid;
- td->td_retval[1] = 0;
- }
- return (error);
-}
-
-/*
- * MPSAFE
- */
-int
-rfork(td, uap)
- struct thread *td;
- struct rfork_args *uap;
-{
- struct proc *p2;
- int error;
-
- /* Don't allow kernel-only flags. */
- if ((uap->flags & RFKERNELONLY) != 0)
- return (EINVAL);
-
- error = fork1(td, uap->flags, 0, &p2);
- if (error == 0) {
- td->td_retval[0] = p2 ? p2->p_pid : 0;
- td->td_retval[1] = 0;
- }
- return (error);
-}
-
-int nprocs = 1; /* process 0 */
-int lastpid = 0;
-SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0,
- "Last used PID");
-
-/*
- * Random component to lastpid generation. We mix in a random factor to make
- * it a little harder to predict. We sanity check the modulus value to avoid
- * doing it in critical paths. Don't let it be too small or we pointlessly
- * waste randomness entropy, and don't let it be impossibly large. Using a
- * modulus that is too big causes a LOT more process table scans and slows
- * down fork processing as the pidchecked caching is defeated.
- */
-static int randompid = 0;
-
-static int
-sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
-{
- int error, pid;
-
- error = sysctl_wire_old_buffer(req, sizeof(int));
- if (error != 0)
- return(error);
- sx_xlock(&allproc_lock);
- pid = randompid;
- error = sysctl_handle_int(oidp, &pid, 0, req);
- if (error == 0 && req->newptr != NULL) {
- if (pid < 0 || pid > PID_MAX - 100) /* out of range */
- pid = PID_MAX - 100;
- else if (pid < 2) /* NOP */
- pid = 0;
- else if (pid < 100) /* Make it reasonable */
- pid = 100;
- randompid = pid;
- }
- sx_xunlock(&allproc_lock);
- return (error);
-}
-
-SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
- 0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
-
-int
-fork1(td, flags, pages, procp)
- struct thread *td;
- int flags;
- int pages;
- struct proc **procp;
-{
- struct proc *p1, *p2, *pptr;
- uid_t uid;
- struct proc *newproc;
- int ok, trypid;
- static int curfail, pidchecked = 0;
- static struct timeval lastfail;
- struct filedesc *fd;
- struct filedesc_to_leader *fdtol;
- struct thread *td2;
- struct ksegrp *kg2;
- struct sigacts *newsigacts;
- int error;
-
- /* Can't copy and clear. */
- if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
- return (EINVAL);
-
- p1 = td->td_proc;
-
- /*
- * Here we don't create a new process, but we divorce
- * certain parts of a process from itself.
- */
- if ((flags & RFPROC) == 0) {
- mtx_lock(&Giant);
- vm_forkproc(td, NULL, NULL, flags);
- mtx_unlock(&Giant);
-
- /*
- * Close all file descriptors.
- */
- if (flags & RFCFDG) {
- struct filedesc *fdtmp;
- FILEDESC_LOCK(td->td_proc->p_fd);
- fdtmp = fdinit(td->td_proc->p_fd);
- FILEDESC_UNLOCK(td->td_proc->p_fd);
- fdfree(td);
- p1->p_fd = fdtmp;
- }
-
- /*
- * Unshare file descriptors (from parent).
- */
- if (flags & RFFDG) {
- FILEDESC_LOCK(p1->p_fd);
- if (p1->p_fd->fd_refcnt > 1) {
- struct filedesc *newfd;
-
- newfd = fdcopy(td->td_proc->p_fd);
- FILEDESC_UNLOCK(p1->p_fd);
- fdfree(td);
- p1->p_fd = newfd;
- } else
- FILEDESC_UNLOCK(p1->p_fd);
- }
- *procp = NULL;
- return (0);
- }
-
- /*
- * Note 1:1 allows for forking with one thread coming out on the
- * other side with the expectation that the process is about to
- * exec.
- */
- if (p1->p_flag & P_HADTHREADS) {
- /*
- * Idle the other threads for a second.
- * Since the user space is copied, it must remain stable.
- * In addition, all threads (from the user perspective)
- * need to either be suspended or in the kernel,
- * where they will try restart in the parent and will
- * be aborted in the child.
- */
- PROC_LOCK(p1);
- if (thread_single(SINGLE_NO_EXIT)) {
- /* Abort. Someone else is single threading before us. */
- PROC_UNLOCK(p1);
- return (ERESTART);
- }
- PROC_UNLOCK(p1);
- /*
- * All other activity in this process
- * is now suspended at the user boundary,
- * (or other safe places if we think of any).
- */
- }
-
- /* Allocate new proc. */
- newproc = uma_zalloc(proc_zone, M_WAITOK);
-#ifdef MAC
- mac_init_proc(newproc);
-#endif
- knlist_init(&newproc->p_klist, &newproc->p_mtx);
-
- /* We have to lock the process tree while we look for a pid. */
- sx_slock(&proctree_lock);
-
- /*
- * Although process entries are dynamically created, we still keep
- * a global limit on the maximum number we will create. Don't allow
- * a nonprivileged user to use the last ten processes; don't let root
- * exceed the limit. The variable nprocs is the current number of
- * processes, maxproc is the limit.
- */
- sx_xlock(&allproc_lock);
- uid = td->td_ucred->cr_ruid;
- if ((nprocs >= maxproc - 10 &&
- suser_cred(td->td_ucred, SUSER_RUID) != 0) ||
- nprocs >= maxproc) {
- error = EAGAIN;
- goto fail;
- }
-
- /*
- * Increment the count of procs running with this uid. Don't allow
- * a nonprivileged user to exceed their current limit.
- */
- PROC_LOCK(p1);
- ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
- (uid != 0) ? lim_cur(p1, RLIMIT_NPROC) : 0);
- PROC_UNLOCK(p1);
- if (!ok) {
- error = EAGAIN;
- goto fail;
- }
-
- /*
- * Increment the nprocs resource before blocking can occur. There
- * are hard-limits as to the number of processes that can run.
- */
- nprocs++;
-
- /*
- * Find an unused process ID. We remember a range of unused IDs
- * ready to use (from lastpid+1 through pidchecked-1).
- *
- * If RFHIGHPID is set (used during system boot), do not allocate
- * low-numbered pids.
- */
- trypid = lastpid + 1;
- if (flags & RFHIGHPID) {
- if (trypid < 10)
- trypid = 10;
- } else {
- if (randompid)
- trypid += arc4random() % randompid;
- }
-retry:
- /*
- * If the process ID prototype has wrapped around,
- * restart somewhat above 0, as the low-numbered procs
- * tend to include daemons that don't exit.
- */
- if (trypid >= PID_MAX) {
- trypid = trypid % PID_MAX;
- if (trypid < 100)
- trypid += 100;
- pidchecked = 0;
- }
- if (trypid >= pidchecked) {
- int doingzomb = 0;
-
- pidchecked = PID_MAX;
- /*
- * Scan the active and zombie procs to check whether this pid
- * is in use. Remember the lowest pid that's greater
- * than trypid, so we can avoid checking for a while.
- */
- p2 = LIST_FIRST(&allproc);
-again:
- for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
- PROC_LOCK(p2);
- while (p2->p_pid == trypid ||
- (p2->p_pgrp != NULL &&
- (p2->p_pgrp->pg_id == trypid ||
- (p2->p_session != NULL &&
- p2->p_session->s_sid == trypid)))) {
- trypid++;
- if (trypid >= pidchecked) {
- PROC_UNLOCK(p2);
- goto retry;
- }
- }
- if (p2->p_pid > trypid && pidchecked > p2->p_pid)
- pidchecked = p2->p_pid;
- if (p2->p_pgrp != NULL) {
- if (p2->p_pgrp->pg_id > trypid &&
- pidchecked > p2->p_pgrp->pg_id)
- pidchecked = p2->p_pgrp->pg_id;
- if (p2->p_session != NULL &&
- p2->p_session->s_sid > trypid &&
- pidchecked > p2->p_session->s_sid)
- pidchecked = p2->p_session->s_sid;
- }
- PROC_UNLOCK(p2);
- }
- if (!doingzomb) {
- doingzomb = 1;
- p2 = LIST_FIRST(&zombproc);
- goto again;
- }
- }
- sx_sunlock(&proctree_lock);
-
- /*
- * RFHIGHPID does not mess with the lastpid counter during boot.
- */
- if (flags & RFHIGHPID)
- pidchecked = 0;
- else
- lastpid = trypid;
-
- p2 = newproc;
- p2->p_state = PRS_NEW; /* protect against others */
- p2->p_pid = trypid;
- LIST_INSERT_HEAD(&allproc, p2, p_list);
- LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
- sx_xunlock(&allproc_lock);
-
- /*
- * Malloc things while we don't hold any locks.
- */
- if (flags & RFSIGSHARE)
- newsigacts = NULL;
- else
- newsigacts = sigacts_alloc();
-
- /*
- * Copy filedesc.
- */
- if (flags & RFCFDG) {
- FILEDESC_LOCK(td->td_proc->p_fd);
- fd = fdinit(td->td_proc->p_fd);
- FILEDESC_UNLOCK(td->td_proc->p_fd);
- fdtol = NULL;
- } else if (flags & RFFDG) {
- FILEDESC_LOCK(p1->p_fd);
- fd = fdcopy(td->td_proc->p_fd);
- FILEDESC_UNLOCK(p1->p_fd);
- fdtol = NULL;
- } else {
- fd = fdshare(p1->p_fd);
- if (p1->p_fdtol == NULL)
- p1->p_fdtol =
- filedesc_to_leader_alloc(NULL,
- NULL,
- p1->p_leader);
- if ((flags & RFTHREAD) != 0) {
- /*
- * Shared file descriptor table and
- * shared process leaders.
- */
- fdtol = p1->p_fdtol;
- FILEDESC_LOCK(p1->p_fd);
- fdtol->fdl_refcount++;
- FILEDESC_UNLOCK(p1->p_fd);
- } else {
- /*
- * Shared file descriptor table, and
- * different process leaders
- */
- fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
- p1->p_fd,
- p2);
- }
- }
- /*
- * Make a proc table entry for the new process.
- * Start by zeroing the section of proc that is zero-initialized,
- * then copy the section that is copied directly from the parent.
- */
- td2 = FIRST_THREAD_IN_PROC(p2);
- kg2 = FIRST_KSEGRP_IN_PROC(p2);
-
- /* Allocate and switch to an alternate kstack if specified. */
- if (pages != 0)
- vm_thread_new_altkstack(td2, pages);
-
- PROC_LOCK(p2);
- PROC_LOCK(p1);
-
-#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
-
- bzero(&p2->p_startzero,
- (unsigned) RANGEOF(struct proc, p_startzero, p_endzero));
- bzero(&td2->td_startzero,
- (unsigned) RANGEOF(struct thread, td_startzero, td_endzero));
- bzero(&kg2->kg_startzero,
- (unsigned) RANGEOF(struct ksegrp, kg_startzero, kg_endzero));
-
- bcopy(&p1->p_startcopy, &p2->p_startcopy,
- (unsigned) RANGEOF(struct proc, p_startcopy, p_endcopy));
- bcopy(&td->td_startcopy, &td2->td_startcopy,
- (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
- bcopy(&td->td_ksegrp->kg_startcopy, &kg2->kg_startcopy,
- (unsigned) RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
-#undef RANGEOF
-
- td2->td_sigstk = td->td_sigstk;
-
- /*
- * Duplicate sub-structures as needed.
- * Increase reference counts on shared objects.
- * The p_stats substruct is set in vm_forkproc.
- */
- p2->p_flag = 0;
- if (p1->p_flag & P_PROFIL)
- startprofclock(p2);
- mtx_lock_spin(&sched_lock);
- p2->p_sflag = PS_INMEM;
- /*
- * Allow the scheduler to adjust the priority of the child and
- * parent while we hold the sched_lock.
- */
- sched_fork(td, td2);
-
- mtx_unlock_spin(&sched_lock);
- p2->p_ucred = crhold(td->td_ucred);
- td2->td_ucred = crhold(p2->p_ucred); /* XXXKSE */
-
- pargs_hold(p2->p_args);
-
- if (flags & RFSIGSHARE) {
- p2->p_sigacts = sigacts_hold(p1->p_sigacts);
- } else {
- sigacts_copy(newsigacts, p1->p_sigacts);
- p2->p_sigacts = newsigacts;
- }
- if (flags & RFLINUXTHPN)
- p2->p_sigparent = SIGUSR1;
- else
- p2->p_sigparent = SIGCHLD;
-
- p2->p_textvp = p1->p_textvp;
- p2->p_fd = fd;
- p2->p_fdtol = fdtol;
-
- /*
- * p_limit is copy-on-write. Bump its refcount.
- */
- p2->p_limit = lim_hold(p1->p_limit);
- PROC_UNLOCK(p1);
- PROC_UNLOCK(p2);
-
- /* Bump references to the text vnode (for procfs) */
- if (p2->p_textvp)
- vref(p2->p_textvp);
-
- /*
- * Set up linkage for kernel based threading.
- */
- if ((flags & RFTHREAD) != 0) {
- mtx_lock(&ppeers_lock);
- p2->p_peers = p1->p_peers;
- p1->p_peers = p2;
- p2->p_leader = p1->p_leader;
- mtx_unlock(&ppeers_lock);
- PROC_LOCK(p1->p_leader);
- if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
- PROC_UNLOCK(p1->p_leader);
- /*
- * The task leader is exiting, so process p1 is
- * going to be killed shortly. Since p1 obviously
- * isn't dead yet, we know that the leader is either
- * sending SIGKILL's to all the processes in this
- * task or is sleeping waiting for all the peers to
- * exit. We let p1 complete the fork, but we need
- * to go ahead and kill the new process p2 since
- * the task leader may not get a chance to send
- * SIGKILL to it. We leave it on the list so that
- * the task leader will wait for this new process
- * to commit suicide.
- */
- PROC_LOCK(p2);
- psignal(p2, SIGKILL);
- PROC_UNLOCK(p2);
- } else
- PROC_UNLOCK(p1->p_leader);
- } else {
- p2->p_peers = NULL;
- p2->p_leader = p2;
- }
-
- sx_xlock(&proctree_lock);
- PGRP_LOCK(p1->p_pgrp);
- PROC_LOCK(p2);
- PROC_LOCK(p1);
-
- /*
- * Preserve some more flags in subprocess. P_PROFIL has already
- * been preserved.
- */
- p2->p_flag |= p1->p_flag & P_SUGID;
- td2->td_pflags |= td->td_pflags & TDP_ALTSTACK;
- SESS_LOCK(p1->p_session);
- if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
- p2->p_flag |= P_CONTROLT;
- SESS_UNLOCK(p1->p_session);
- if (flags & RFPPWAIT)
- p2->p_flag |= P_PPWAIT;
-
- p2->p_pgrp = p1->p_pgrp;
- LIST_INSERT_AFTER(p1, p2, p_pglist);
- PGRP_UNLOCK(p1->p_pgrp);
- LIST_INIT(&p2->p_children);
-
- callout_init(&p2->p_itcallout, CALLOUT_MPSAFE);
-
-#ifdef KTRACE
- /*
- * Copy traceflag and tracefile if enabled.
- */
- mtx_lock(&ktrace_mtx);
- KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
- if (p1->p_traceflag & KTRFAC_INHERIT) {
- p2->p_traceflag = p1->p_traceflag;
- if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
- VREF(p2->p_tracevp);
- KASSERT(p1->p_tracecred != NULL,
- ("ktrace vnode with no cred"));
- p2->p_tracecred = crhold(p1->p_tracecred);
- }
- }
- mtx_unlock(&ktrace_mtx);
-#endif
-
- /*
- * If PF_FORK is set, the child process inherits the
- * procfs ioctl flags from its parent.
- */
- if (p1->p_pfsflags & PF_FORK) {
- p2->p_stops = p1->p_stops;
- p2->p_pfsflags = p1->p_pfsflags;
- }
-
- /*
- * This begins the section where we must prevent the parent
- * from being swapped.
- */
- _PHOLD(p1);
- PROC_UNLOCK(p1);
-
- /*
- * Attach the new process to its parent.
- *
- * If RFNOWAIT is set, the newly created process becomes a child
- * of init. This effectively disassociates the child from the
- * parent.
- */
- if (flags & RFNOWAIT)
- pptr = initproc;
- else
- pptr = p1;
- p2->p_pptr = pptr;
- LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
- sx_xunlock(&proctree_lock);
-
- /* Inform accounting that we have forked. */
- p2->p_acflag = AFORK;
- PROC_UNLOCK(p2);
-
- /*
- * Finish creating the child process. It will return via a different
- * execution path later. (ie: directly into user mode)
- */
- mtx_lock(&Giant);
- vm_forkproc(td, p2, td2, flags);
-
- if (flags == (RFFDG | RFPROC)) {
- cnt.v_forks++;
- cnt.v_forkpages += p2->p_vmspace->vm_dsize +
- p2->p_vmspace->vm_ssize;
- } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
- cnt.v_vforks++;
- cnt.v_vforkpages += p2->p_vmspace->vm_dsize +
- p2->p_vmspace->vm_ssize;
- } else if (p1 == &proc0) {
- cnt.v_kthreads++;
- cnt.v_kthreadpages += p2->p_vmspace->vm_dsize +
- p2->p_vmspace->vm_ssize;
- } else {
- cnt.v_rforks++;
- cnt.v_rforkpages += p2->p_vmspace->vm_dsize +
- p2->p_vmspace->vm_ssize;
- }
- mtx_unlock(&Giant);
-
- /*
- * Both processes are set up, now check if any loadable modules want
- * to adjust anything.
- * What if they have an error? XXX
- */
- EVENTHANDLER_INVOKE(process_fork, p1, p2, flags);
-
- /*
- * Set the child start time and mark the process as being complete.
- */
- microuptime(&p2->p_stats->p_start);
- mtx_lock_spin(&sched_lock);
- p2->p_state = PRS_NORMAL;
-
- /*
- * If RFSTOPPED not requested, make child runnable and add to
- * run queue.
- */
- if ((flags & RFSTOPPED) == 0) {
- TD_SET_CAN_RUN(td2);
- setrunqueue(td2, SRQ_BORING);
- }
- mtx_unlock_spin(&sched_lock);
-
- /*
- * Now can be swapped.
- */
- PROC_LOCK(p1);
- _PRELE(p1);
-
- /*
- * Tell any interested parties about the new process.
- */
- KNOTE_LOCKED(&p1->p_klist, NOTE_FORK | p2->p_pid);
-
- PROC_UNLOCK(p1);
-
- /*
- * Preserve synchronization semantics of vfork. If waiting for
- * child to exec or exit, set P_PPWAIT on child, and sleep on our
- * proc (in case of exit).
- */
- PROC_LOCK(p2);
- while (p2->p_flag & P_PPWAIT)
- msleep(p1, &p2->p_mtx, PWAIT, "ppwait", 0);
- PROC_UNLOCK(p2);
-
- /*
- * If other threads are waiting, let them continue now.
- */
- if (p1->p_flag & P_HADTHREADS) {
- PROC_LOCK(p1);
- thread_single_end();
- PROC_UNLOCK(p1);
- }
-
- /*
- * Return child proc pointer to parent.
- */
- *procp = p2;
- return (0);
-fail:
- sx_sunlock(&proctree_lock);
- if (ppsratecheck(&lastfail, &curfail, 1))
- printf("maxproc limit exceeded by uid %i, please see tuning(7)
and login.conf(5).\n",
- uid);
- sx_xunlock(&allproc_lock);
-#ifdef MAC
- mac_destroy_proc(newproc);
-#endif
- uma_zfree(proc_zone, newproc);
- if (p1->p_flag & P_HADTHREADS) {
- PROC_LOCK(p1);
- thread_single_end();
- PROC_UNLOCK(p1);
- }
- tsleep(&forksleep, PUSER, "fork", hz / 2);
- return (error);
-}
-
-/*
- * Handle the return of a child process from fork1(). This function
- * is called from the MD fork_trampoline() entry point.
- */
-void
-fork_exit(callout, arg, frame)
- void (*callout)(void *, struct trapframe *);
- void *arg;
- struct trapframe *frame;
-{
- struct proc *p;
- struct thread *td;
-
- /*
- * Finish setting up thread glue so that it begins execution in a
- * non-nested critical section with sched_lock held but not recursed.
- */
- td = curthread;
- p = td->td_proc;
- td->td_oncpu = PCPU_GET(cpuid);
- KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new"));
-
- sched_lock.mtx_lock = (uintptr_t)td;
- mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
- cpu_critical_fork_exit();
- CTR4(KTR_PROC, "fork_exit: new thread %p (kse %p, pid %d, %s)",
- td, td->td_sched, p->p_pid, p->p_comm);
-
- /*
- * Processes normally resume in mi_switch() after being
- * cpu_switch()'ed to, but when children start up they arrive here
- * instead, so we must do much the same things as mi_switch() would.
- */
-
- if ((td = PCPU_GET(deadthread))) {
- PCPU_SET(deadthread, NULL);
- thread_stash(td);
- }
- td = curthread;
- mtx_unlock_spin(&sched_lock);
-
- /*
- * cpu_set_fork_handler intercepts this function call to
- * have this call a non-return function to stay in kernel mode.
- * initproc has its own fork handler, but it does return.
- */
- KASSERT(callout != NULL, ("NULL callout in fork_exit"));
- callout(arg, frame);
-
- /*
- * Check if a kernel thread misbehaved and returned from its main
- * function.
- */
- PROC_LOCK(p);
- if (p->p_flag & P_KTHREAD) {
- PROC_UNLOCK(p);
- printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",
- p->p_comm, p->p_pid);
- kthread_exit(0);
- }
- PROC_UNLOCK(p);
- mtx_assert(&Giant, MA_NOTOWNED);
-}
-
-/*
- * Simplified back end of syscall(), used when returning from fork()
- * directly into user mode. Giant is not held on entry, and must not
- * be held on return. This function is passed in to fork_exit() as the
- * first parameter and is called when returning to a new userland process.
- */
-void
-fork_return(td, frame)
- struct thread *td;
- struct trapframe *frame;
-{
-
- userret(td, frame, 0);
-#ifdef KTRACE
- if (KTRPOINT(td, KTR_SYSRET))
- ktrsysret(SYS_fork, 0, 0);
-#endif
- mtx_assert(&Giant, MA_NOTOWNED);
-}
diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/kern/kern_shutdown.c
--- a/freebsd-5.3-xen-sparse/kern/kern_shutdown.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,635 +0,0 @@
-/*-
- * Copyright (c) 1986, 1988, 1991, 1993
- * The Regents of the University of California. All rights reserved.
- * (c) UNIX System Laboratories, Inc.
- * All or some portions of this file are derived from material licensed
- * to the University of California by American Telephone and Telegraph
- * Co. or Unix System Laboratories, Inc. and are reproduced herein with
- * the permission of UNIX System Laboratories, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/kern_shutdown.c,v 1.163.2.2 2004/09/10
00:04:17 scottl Exp $");
-
-#include "opt_kdb.h"
-#include "opt_hw_wdog.h"
-#include "opt_mac.h"
-#include "opt_panic.h"
-#include "opt_show_busybufs.h"
-#include "opt_sched.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/conf.h>
-#include <sys/cons.h>
-#include <sys/eventhandler.h>
-#include <sys/kdb.h>
-#include <sys/kernel.h>
-#include <sys/kthread.h>
-#include <sys/mac.h>
-#include <sys/malloc.h>
-#include <sys/mount.h>
-#include <sys/proc.h>
-#include <sys/reboot.h>
-#include <sys/resourcevar.h>
-#include <sys/smp.h> /* smp_active */
-#include <sys/sysctl.h>
-#include <sys/sysproto.h>
-#include <sys/vnode.h>
-
-#include <machine/cpu.h>
-#include <machine/pcb.h>
-#include <machine/smp.h>
-
-#include <sys/signalvar.h>
-
-#ifndef PANIC_REBOOT_WAIT_TIME
-#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
-#endif
-
-/*
- * Note that stdarg.h and the ANSI style va_start macro is used for both
- * ANSI and traditional C compilers.
- */
-#include <machine/stdarg.h>
-
-#ifdef KDB
-#ifdef KDB_UNATTENDED
-int debugger_on_panic = 0;
-#else
-int debugger_on_panic = 1;
-#endif
-SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW,
- &debugger_on_panic, 0, "Run debugger on kernel panic");
-
-#ifdef KDB_TRACE
-int trace_on_panic = 1;
-#else
-int trace_on_panic = 0;
-#endif
-SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RW,
- &trace_on_panic, 0, "Print stack trace on kernel panic");
-#endif /* KDB */
-
-int sync_on_panic = 0;
-SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW,
- &sync_on_panic, 0, "Do a sync before rebooting from a panic");
-
-SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment");
-
-#ifdef HW_WDOG
-/*
- * If there is a hardware watchdog, point this at the function needed to
- * hold it off.
- * It's needed when the kernel needs to do some lengthy operations.
- * e.g. in wd.c when dumping core.. It's most annoying to have
- * your precious core-dump only half written because the wdog kicked in.
- */
-watchdog_tickle_fn wdog_tickler = NULL;
-#endif /* HW_WDOG */
-
-/*
- * Variable panicstr contains argument to first call to panic; used as flag
- * to indicate that the kernel has already called panic.
- */
-const char *panicstr;
-
-int dumping; /* system is dumping */
-static struct dumperinfo dumper; /* our selected dumper */
-
-/* Context information for dump-debuggers. */
-static struct pcb dumppcb; /* Registers. */
-static lwpid_t dumptid; /* Thread ID. */
-
-static void boot(int) __dead2;
-static void poweroff_wait(void *, int);
-static void shutdown_halt(void *junk, int howto);
-static void shutdown_panic(void *junk, int howto);
-static void shutdown_reset(void *junk, int howto);
-
-/* register various local shutdown events */
-static void
-shutdown_conf(void *unused)
-{
-
- EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL,
- SHUTDOWN_PRI_FIRST);
- EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL,
- SHUTDOWN_PRI_LAST + 100);
- EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL,
- SHUTDOWN_PRI_LAST + 100);
- EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL,
- SHUTDOWN_PRI_LAST + 200);
-}
-
-SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL)
-
-/*
- * The system call that results in a reboot
- *
- * MPSAFE
- */
-/* ARGSUSED */
-int
-reboot(struct thread *td, struct reboot_args *uap)
-{
- int error;
-
- error = 0;
-#ifdef MAC
- error = mac_check_system_reboot(td->td_ucred, uap->opt);
-#endif
- if (error == 0)
- error = suser(td);
- if (error == 0) {
- mtx_lock(&Giant);
- boot(uap->opt);
- mtx_unlock(&Giant);
- }
- return (error);
-}
-
-/*
- * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC
- */
-static int shutdown_howto = 0;
-
-void
-shutdown_nice(int howto)
-{
-
- shutdown_howto = howto;
-
- /* Send a signal to init(8) and have it shutdown the world */
- if (initproc != NULL) {
- PROC_LOCK(initproc);
- psignal(initproc, SIGINT);
- PROC_UNLOCK(initproc);
- } else {
- /* No init(8) running, so simply reboot */
- boot(RB_NOSYNC);
- }
- return;
-}
-static int waittime = -1;
-
-static void
-print_uptime(void)
-{
- int f;
- struct timespec ts;
-
- getnanouptime(&ts);
- printf("Uptime: ");
- f = 0;
- if (ts.tv_sec >= 86400) {
- printf("%ldd", (long)ts.tv_sec / 86400);
- ts.tv_sec %= 86400;
- f = 1;
- }
- if (f || ts.tv_sec >= 3600) {
- printf("%ldh", (long)ts.tv_sec / 3600);
- ts.tv_sec %= 3600;
- f = 1;
- }
- if (f || ts.tv_sec >= 60) {
- printf("%ldm", (long)ts.tv_sec / 60);
- ts.tv_sec %= 60;
- f = 1;
- }
- printf("%lds\n", (long)ts.tv_sec);
-}
-
-static void
-doadump(void)
-{
-
- /*
- * Sometimes people have to call this from the kernel debugger.
- * (if 'panic' can not dump)
- * Give them a clue as to why they can't dump.
- */
- if (dumper.dumper == NULL) {
- printf("Cannot dump. No dump device defined.\n");
- return;
- }
-
- savectx(&dumppcb);
- dumptid = curthread->td_tid;
- dumping++;
- dumpsys(&dumper);
-}
-
-/*
- * Go through the rigmarole of shutting down..
- * this used to be in machdep.c but I'll be dammned if I could see
- * anything machine dependant in it.
- */
-static void
-boot(int howto)
-{
- static int first_buf_printf = 1;
-
- /* collect extra flags that shutdown_nice might have set */
- howto |= shutdown_howto;
-
- /* We are out of the debugger now. */
- kdb_active = 0;
-
-#ifdef SMP
- if (smp_active)
- printf("boot() called on cpu#%d\n", PCPU_GET(cpuid));
-#endif
- /*
- * Do any callouts that should be done BEFORE syncing the filesystems.
- */
- EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
-
- /*
- * Now sync filesystems
- */
- if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
- register struct buf *bp;
- int iter, nbusy, pbusy;
-#ifndef PREEMPTION
- int subiter;
-#endif
-
- waittime = 0;
-
- sync(&thread0, NULL);
-
- /*
- * With soft updates, some buffers that are
- * written will be remarked as dirty until other
- * buffers are written.
- */
- for (iter = pbusy = 0; iter < 20; iter++) {
- nbusy = 0;
- for (bp = &buf[nbuf]; --bp >= buf; ) {
- if ((bp->b_flags & B_INVAL) == 0 &&
- BUF_REFCNT(bp) > 0) {
- nbusy++;
- } else if ((bp->b_flags & (B_DELWRI | B_INVAL))
- == B_DELWRI) {
- /* bawrite(bp);*/
- nbusy++;
- }
- }
- if (nbusy == 0) {
- if (first_buf_printf)
- printf("No buffers busy after final
sync");
- break;
- }
- if (first_buf_printf) {
- printf("Syncing disks, buffers remaining... ");
- first_buf_printf = 0;
- }
- printf("%d ", nbusy);
- if (nbusy < pbusy)
- iter = 0;
- pbusy = nbusy;
- sync(&thread0, NULL);
-
-#ifdef PREEMPTION
- /*
- * Drop Giant and spin for a while to allow
- * interrupt threads to run.
- */
- DROP_GIANT();
- DELAY(50000 * iter);
- PICKUP_GIANT();
-#else
- /*
- * Drop Giant and context switch several times to
- * allow interrupt threads to run.
- */
- DROP_GIANT();
- for (subiter = 0; subiter < 50 * iter; subiter++) {
- mtx_lock_spin(&sched_lock);
- mi_switch(SW_VOL, NULL);
- mtx_unlock_spin(&sched_lock);
- DELAY(1000);
- }
- PICKUP_GIANT();
-#endif
- }
- printf("\n");
- /*
- * Count only busy local buffers to prevent forcing
- * a fsck if we're just a client of a wedged NFS server
- */
- nbusy = 0;
- for (bp = &buf[nbuf]; --bp >= buf; ) {
- if (((bp->b_flags&B_INVAL) == 0 && BUF_REFCNT(bp)) ||
- ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
- if (bp->b_dev == NULL) {
- TAILQ_REMOVE(&mountlist,
- bp->b_vp->v_mount, mnt_list);
- continue;
- }
- nbusy++;
-#if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC)
- printf(
- "%d: dev:%s, flags:%0x, blkno:%ld, lblkno:%ld\n",
- nbusy, devtoname(bp->b_dev),
- bp->b_flags, (long)bp->b_blkno,
- (long)bp->b_lblkno);
-#endif
- }
- }
- if (nbusy) {
- /*
- * Failed to sync all blocks. Indicate this and don't
- * unmount filesystems (thus forcing an fsck on reboot).
- */
- printf("Giving up on %d buffers\n", nbusy);
- DELAY(5000000); /* 5 seconds */
- } else {
- if (!first_buf_printf)
- printf("Final sync complete\n");
- /*
- * Unmount filesystems
- */
- if (panicstr == 0)
- vfs_unmountall();
- }
- DELAY(100000); /* wait for console output to finish */
- }
-
- print_uptime();
-
- /*
- * Ok, now do things that assume all filesystem activity has
- * been completed.
- */
- EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
- splhigh();
- if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping)
- doadump();
-
- /* Now that we're going to really halt the system... */
- EVENTHANDLER_INVOKE(shutdown_final, howto);
-
- for(;;) ; /* safety against shutdown_reset not working */
- /* NOTREACHED */
-}
-
-/*
- * If the shutdown was a clean halt, behave accordingly.
- */
-static void
-shutdown_halt(void *junk, int howto)
-{
-
- if (howto & RB_HALT) {
- printf("\n");
- printf("The operating system has halted.\n");
- printf("Please press any key to reboot.\n\n");
- switch (cngetc()) {
- case -1: /* No console, just die */
- cpu_halt();
- /* NOTREACHED */
- default:
- howto &= ~RB_HALT;
- break;
- }
- }
-}
-
-/*
- * Check to see if the system paniced, pause and then reboot
- * according to the specified delay.
- */
-static void
-shutdown_panic(void *junk, int howto)
-{
- int loop;
-
- if (howto & RB_DUMP) {
- if (PANIC_REBOOT_WAIT_TIME != 0) {
- if (PANIC_REBOOT_WAIT_TIME != -1) {
- printf("Automatic reboot in %d seconds - "
- "press a key on the console to abort\n",
- PANIC_REBOOT_WAIT_TIME);
- for (loop = PANIC_REBOOT_WAIT_TIME * 10;
- loop > 0; --loop) {
- DELAY(1000 * 100); /* 1/10th second */
- /* Did user type a key? */
- if (cncheckc() != -1)
- break;
- }
- if (!loop)
- return;
- }
- } else { /* zero time specified - reboot NOW */
- return;
- }
- printf("--> Press a key on the console to reboot,\n");
- printf("--> or switch off the system now.\n");
- cngetc();
- }
-}
-
-/*
- * Everything done, now reset
- */
-static void
-shutdown_reset(void *junk, int howto)
-{
-
- printf("Rebooting...\n");
- DELAY(1000000); /* wait 1 sec for printf's to complete and be read */
- /* cpu_boot(howto); */ /* doesn't do anything at the moment */
- cpu_reset();
- /* NOTREACHED */ /* assuming reset worked */
-}
-
-#ifdef SMP
-static u_int panic_cpu = NOCPU;
-#endif
-
-/*
- * Panic is called on unresolvable fatal errors. It prints "panic: mesg",
- * and then reboots. If we are called twice, then we avoid trying to sync
- * the disks as this often leads to recursive panics.
- *
- * MPSAFE
- */
-void
-panic(const char *fmt, ...)
-{
- struct thread *td = curthread;
- int bootopt, newpanic;
- va_list ap;
- static char buf[256];
-
-#ifdef SMP
- /*
- * We don't want multiple CPU's to panic at the same time, so we
- * use panic_cpu as a simple spinlock. We have to keep checking
- * panic_cpu if we are spinning in case the panic on the first
- * CPU is canceled.
- */
- if (panic_cpu != PCPU_GET(cpuid))
- while (atomic_cmpset_int(&panic_cpu, NOCPU,
- PCPU_GET(cpuid)) == 0)
- while (panic_cpu != NOCPU)
- ; /* nothing */
-#endif
-
- bootopt = RB_AUTOBOOT | RB_DUMP;
- newpanic = 0;
- if (panicstr)
- bootopt |= RB_NOSYNC;
- else {
- panicstr = fmt;
- newpanic = 1;
- }
-
- va_start(ap, fmt);
- if (newpanic) {
- (void)vsnprintf(buf, sizeof(buf), fmt, ap);
- panicstr = buf;
- printf("panic: %s\n", buf);
- } else {
- printf("panic: ");
- vprintf(fmt, ap);
- printf("\n");
- }
- va_end(ap);
-#ifdef SMP
- printf("cpuid = %d\n", PCPU_GET(cpuid));
-#endif
-
-#ifdef KDB
- if (newpanic && trace_on_panic)
- kdb_backtrace();
- if (debugger_on_panic)
- kdb_enter("panic");
-#ifdef RESTARTABLE_PANICS
- /* See if the user aborted the panic, in which case we continue. */
- if (panicstr == NULL) {
-#ifdef SMP
- atomic_store_rel_int(&panic_cpu, NOCPU);
-#endif
- return;
- }
-#endif
-#endif
- mtx_lock_spin(&sched_lock);
- td->td_flags |= TDF_INPANIC;
- mtx_unlock_spin(&sched_lock);
- if (!sync_on_panic)
- bootopt |= RB_NOSYNC;
-#ifdef XEN
- HYPERVISOR_crash();
-#else
- boot(bootopt);
-#endif
-}
-
-/*
- * Support for poweroff delay.
- */
-#ifndef POWEROFF_DELAY
-# define POWEROFF_DELAY 5000
-#endif
-static int poweroff_delay = POWEROFF_DELAY;
-
-SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
- &poweroff_delay, 0, "");
-
-static void
-poweroff_wait(void *junk, int howto)
-{
-
- if (!(howto & RB_POWEROFF) || poweroff_delay <= 0)
- return;
- DELAY(poweroff_delay * 1000);
-}
-
-/*
- * Some system processes (e.g. syncer) need to be stopped at appropriate
- * points in their main loops prior to a system shutdown, so that they
- * won't interfere with the shutdown process (e.g. by holding a disk buf
- * to cause sync to fail). For each of these system processes, register
- * shutdown_kproc() as a handler for one of shutdown events.
- */
-static int kproc_shutdown_wait = 60;
-SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
- &kproc_shutdown_wait, 0, "");
-
-void
-kproc_shutdown(void *arg, int howto)
-{
- struct proc *p;
- char procname[MAXCOMLEN + 1];
- int error;
-
- if (panicstr)
- return;
-
- p = (struct proc *)arg;
- strlcpy(procname, p->p_comm, sizeof(procname));
- printf("Waiting (max %d seconds) for system process `%s' to stop...",
- kproc_shutdown_wait, procname);
- error = kthread_suspend(p, kproc_shutdown_wait * hz);
-
- if (error == EWOULDBLOCK)
- printf("timed out\n");
- else
- printf("done\n");
-}
-
-/* Registration of dumpers */
-int
-set_dumper(struct dumperinfo *di)
-{
-
- if (di == NULL) {
- bzero(&dumper, sizeof dumper);
- return (0);
- }
- if (dumper.dumper != NULL)
- return (EBUSY);
- dumper = *di;
- return (0);
-}
-
-#if defined(__powerpc__)
-void
-dumpsys(struct dumperinfo *di __unused)
-{
-
- printf("Kernel dumps not implemented on this architecture\n");
-}
-#endif
diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/mkbuildtree
--- a/freebsd-5.3-xen-sparse/mkbuildtree Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,119 +0,0 @@
-#!/bin/bash
-
-# mkbuildtree <build tree>
-#
-# Creates symbolic links in <build tree> for the sparse tree
-# in the current directory.
-
-# Script to determine the relative path between two directories.
-# Copyright (c) D. J. Hawkey Jr. 2002
-# Fixed for Xen project by K. Fraser in 2003.
-abs_to_rel ()
-{
- local CWD SRCPATH
-
- if [ "$1" != "/" -a "${1##*[^/]}" = "/" ]; then
- SRCPATH=${1%?}
- else
- SRCPATH=$1
- fi
- if [ "$2" != "/" -a "${2##*[^/]}" = "/" ]; then
- DESTPATH=${2%?}
- else
- DESTPATH=$2
- fi
-
- CWD=$PWD
- [ "${1%%[^/]*}" != "/" ] && cd $1 && SRCPATH=$PWD
- [ "${2%%[^/]*}" != "/" ] && cd $2 && DESTPATH=$PWD
- [ "$CWD" != "$PWD" ] && cd $CWD
-
- BASEPATH=$SRCPATH
-
- [ "$SRCPATH" = "$DESTPATH" ] && DESTPATH="." && return
- [ "$SRCPATH" = "/" ] && DESTPATH=${DESTPATH#?} && return
-
- while [ "$BASEPATH/" != "${DESTPATH%${DESTPATH#$BASEPATH/}}" ]; do
- BASEPATH=${BASEPATH%/*}
- done
-
- SRCPATH=${SRCPATH#$BASEPATH}
- DESTPATH=${DESTPATH#$BASEPATH}
- DESTPATH=${DESTPATH#?}
- while [ -n "$SRCPATH" ]; do
- SRCPATH=${SRCPATH%/*}
- DESTPATH="../$DESTPATH"
- done
-
- [ -z "$BASEPATH" ] && BASEPATH="/"
- [ "${DESTPATH##*[^/]}" = "/" ] && DESTPATH=${DESTPATH%?}
-}
-
-# relative_lndir <target_dir>
-# Creates a tree of symlinks in the current working directory that mirror
-# real files in <target_dir>. <target_dir> should be relative to the current
-# working directory. Symlinks in <target_dir> are ignored. Source-control files
-# are ignored.
-relative_lndir ()
-{
- local SYMLINK_DIR REAL_DIR pref i j
- SYMLINK_DIR=$PWD
- REAL_DIR=$1
- (
- cd $REAL_DIR
- for i in `find . -type d | grep -v SCCS`; do
- [ -d $SYMLINK_DIR/$i ] || mkdir -p $SYMLINK_DIR/$i
- (
- cd $i
- pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'`
- for j in `find . -type f -o -type l -maxdepth 1`; do
- ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j
- done
- )
- done
- )
-}
-
-[ "$1" == "" ] && { echo "Syntax: $0 <linux tree to xenify>"; exit 1; }
-
-# Get absolute path to the destination directory
-pushd . >/dev/null
-cd ${1}
-AD=$PWD
-popd >/dev/null
-
-# Get absolute path to the source directory
-AS=`pwd`
-
-# Get name of sparse directory
-SDN=$(basename $AS)
-
-# Get path to source, relative to destination
-abs_to_rel ${AD} ${AS}
-RS=$DESTPATH
-
-# Remove old copies of files and directories at the destination
-for i in `find sys -type f -o -type l` ; do rm -f ${AD}/${i#./} ; done
-
-# We now work from the destination directory
-cd ${AD}
-
-# Remove old symlinks
-find sys -type l | while read f
-do
- case $(readlink $f) in
- */$SDN/*)
- rm -f $f
- ;;
- esac
-done
-
-if [ -f ${AD}/BUILDING ]; then
- # Create symlinks of files and directories which exist in the sparse source
- (cd sys && relative_lndir ../${RS}/sys)
-else
- # Create symlinks of files and directories which exist in the sparse source
- relative_lndir ${RS}
- rm -f mkbuildtree
-fi
-
diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/xenfbsd_kernel_build
--- a/freebsd-5.3-xen-sparse/xenfbsd_kernel_build Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,7 +0,0 @@
-#!/bin/csh -f
-cd i386-xen/conf
-config XENCONF
-cd ../compile/XENCONF
-make kernel-clean
-ln -s ../../include/xen-public/io/ring.h
-make kernel-depend; make -j4 kernel
diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/Makefile
--- a/netbsd-2.0-xen-sparse/Makefile Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,25 +0,0 @@
-#
-#
-#
-
-.PHONY: clean config install netbsd
-
-TOPDIR ?= $(shell pwd)
-NETBSD_RELEASE ?= $(patsubst netbsd-%-xen%,%,$(notdir $(TOPDIR)))
-NETBSD_VER ?= $(patsubst netbsd-%-xen%,%,$(notdir $(TOPDIR)))
-NETBSD_KERNEL ?= XEN
-
-clean:
- @mkdir -p compile/$(NETBSD_KERNEL)
- cd compile/$(NETBSD_KERNEL) && TOPDIR=$(TOPDIR)
NETBSD_VER=$(NETBSD_VER) ../../nbmake-xen cleandir
-
-config:
- @mkdir -p compile/$(NETBSD_KERNEL)
- cd compile/$(NETBSD_KERNEL) && TOPDIR=$(TOPDIR)
NETBSD_VER=$(NETBSD_VER) ../../nbconfig-xen $(NETBSD_KERNEL)
-
-netbsd:
- cd compile/$(NETBSD_KERNEL) && TOPDIR=$(TOPDIR)
NETBSD_VER=$(NETBSD_VER) ../../nbmake-xen dependall
-
-install:
- @mkdir -p $(dir $(INSTALL_PATH)/$(INSTALL_NAME))
- install -c compile/$(NETBSD_KERNEL)/netbsd
$(INSTALL_PATH)/$(INSTALL_NAME)
diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/mkbuildtree
--- a/netbsd-2.0-xen-sparse/mkbuildtree Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,128 +0,0 @@
-#!/bin/bash
-
-# mkbuildtree <build tree>
-#
-# Creates symbolic links in <build tree> for the sparse tree
-# in the current directory.
-
-# Script to determine the relative path between two directories.
-# Copyright (c) D. J. Hawkey Jr. 2002
-# Fixed for Xen project by K. Fraser in 2003.
-abs_to_rel ()
-{
- local CWD SRCPATH
-
- if [ "$1" != "/" -a "${1##*[^/]}" = "/" ]; then
- SRCPATH=${1%?}
- else
- SRCPATH=$1
- fi
- if [ "$2" != "/" -a "${2##*[^/]}" = "/" ]; then
- DESTPATH=${2%?}
- else
- DESTPATH=$2
- fi
-
- CWD=$PWD
- [ "${1%%[^/]*}" != "/" ] && cd $1 && SRCPATH=$PWD
- [ "${2%%[^/]*}" != "/" ] && cd $2 && DESTPATH=$PWD
- [ "$CWD" != "$PWD" ] && cd $CWD
-
- BASEPATH=$SRCPATH
-
- [ "$SRCPATH" = "$DESTPATH" ] && DESTPATH="." && return
- [ "$SRCPATH" = "/" ] && DESTPATH=${DESTPATH#?} && return
-
- while [ "$BASEPATH/" != "${DESTPATH%${DESTPATH#$BASEPATH/}}" ]; do
- BASEPATH=${BASEPATH%/*}
- done
-
- SRCPATH=${SRCPATH#$BASEPATH}
- DESTPATH=${DESTPATH#$BASEPATH}
- DESTPATH=${DESTPATH#?}
- while [ -n "$SRCPATH" ]; do
- SRCPATH=${SRCPATH%/*}
- DESTPATH="../$DESTPATH"
- done
-
- [ -z "$BASEPATH" ] && BASEPATH="/"
- [ "${DESTPATH##*[^/]}" = "/" ] && DESTPATH=${DESTPATH%?}
-}
-
-# relative_lndir <target_dir>
-# Creates a tree of symlinks in the current working directory that mirror
-# real files in <target_dir>. <target_dir> should be relative to the current
-# working directory. Symlinks in <target_dir> are ignored. Source-control files
-# are ignored.
-relative_lndir ()
-{
- local SYMLINK_DIR REAL_DIR pref i j
- SYMLINK_DIR=$PWD
- REAL_DIR=$1
- (
- cd $REAL_DIR
- for i in `find . -type d | grep -v SCCS`; do
- [ -d $SYMLINK_DIR/$i ] || mkdir -p $SYMLINK_DIR/$i
- (
- cd $i
- pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'`
- for j in `find . -type f -o -type l -maxdepth 1`; do
- ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j
- done
- )
- done
- )
-}
-
-[ "$1" == "" ] && { echo "Syntax: $0 <linux tree to xenify>"; exit 1; }
-
-# Get absolute path to the destination directory
-pushd . >/dev/null
-cd ${1}
-AD=$PWD
-popd >/dev/null
-
-# Get absolute path to the source directory
-AS=`pwd`
-
-# Get name of sparse directory
-SDN=$(basename $AS)
-
-# Get path to source, relative to destination
-abs_to_rel ${AD} ${AS}
-RS=$DESTPATH
-
-# Remove old copies of files and directories at the destination
-for i in `find sys -type f -o -type l` ; do rm -f ${AD}/${i#./} ; done
-
-# We now work from the destination directory
-cd ${AD}
-
-# Remove old symlinks
-find sys -type l | while read f
-do
- case $(readlink $f) in
- */$SDN/*)
- rm -f $f
- ;;
- esac
-done
-
-if [ -f ${AD}/BUILDING ]; then
- # Create symlinks of files and directories which exist in the sparse source
- (cd sys && relative_lndir ../${RS}/sys)
-else
- # Create symlinks of files and directories which exist in the sparse source
- relative_lndir ${RS}
- rm -f mkbuildtree
-fi
-
-# Create links to the shared definitions of the Xen interface
-rm -rf ${AD}/sys/arch/xen/include/xen-public
-mkdir ${AD}/sys/arch/xen/include/xen-public
-cd ${AD}/sys/arch/xen/include/xen-public
-relative_lndir ../../../../../${RS}/../xen/include/public
-
-# Remove files which don't exist anymore
-rm -rf ${AD}/sys/arch/xen/xen/events.c
-rm -rf ${AD}/sys/arch/xen/include/events.h
diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/nbconfig-xen
--- a/netbsd-2.0-xen-sparse/nbconfig-xen Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,27 +0,0 @@
-#! /bin/sh
-#
-
-: ${HOS:=$(uname -s | tr /A-Z/ /a-z/)}
-: ${HARCH:=$(uname -m 2>/dev/null || echo i386)}
-: ${NETBSD_RELEASE:=$(basename $(cd $(dirname $0) && pwd) | sed
's/netbsd-\([0-9]\+\.[0-9]\+\).*/\1/')}
-: ${NETBSD_VERSION:=$(basename $(cd $(dirname $0) && pwd) | sed
's/netbsd-\([0-9]\+\.[0-9]\+.*\)-xen.*/\1/')}
-: ${TOPDIR:=$(cd $(dirname $0) && pwd | sed
's/\(netbsd-[0-9]\+\.[0-9]\+.*-xen[^/]*\)/\1/')}
-
-case "$HARCH" in
-i586|i686)
- HARCH=i386
- ;;
-esac
-
-TOOLDIR="$TOPDIR/../netbsd-${NETBSD_RELEASE}-tools/$HOS-$HARCH"; export TOOLDIR
-
-CONF="$1"
-case "$1" in
- /*)
- CONF="$1"
- ;;
- *)
- CONF="$TOPDIR"/sys/arch/xen/conf/"$1"
- ;;
-esac
-exec "${TOOLDIR}/bin/nbconfig" -b $(pwd) -s "$TOPDIR"/sys "$CONF"
diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/nbmake-xen
--- a/netbsd-2.0-xen-sparse/nbmake-xen Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,33 +0,0 @@
-#! /bin/sh
-# Set proper variables to allow easy "make" building of a NetBSD subtree.
-# Generated from: $NetBSD: build.sh,v 1.126 2004/02/04 11:23:40 lukem Exp $
-#
-
-: ${HOS:=$(uname -s | tr /A-Z/ /a-z/)}
-: ${HARCH:=$(uname -m 2>/dev/null || echo i386)}
-: ${NETBSD_RELEASE:=$(basename $(cd $(dirname $0) && pwd) | sed
's/netbsd-\([0-9]\+\.[0-9]\+\).*/\1/')}
-: ${NETBSD_VERSION:=$(basename $(cd $(dirname $0) && pwd) | sed
's/netbsd-\([0-9]\+\.[0-9]\+.*\)-xen.*/\1/')}
-: ${TOPDIR:=$(cd $(dirname $0) && pwd | sed
's/\(netbsd-[0-9]\+\.[0-9]\+.*-xen[^/]*\)/\1/')}
-
-case "$HARCH" in
-i586|i686)
- HARCH=i386
- ;;
-esac
-
-NETBSDSRCDIR="$TOPDIR"; export NETBSDSRCDIR
-DESTDIR="$TOPDIR/root"; export DESTDIR
-unset MAKEOBJDIRPREFIX
-MAKEOBJDIR='${.CURDIR:C,^'"$TOPDIR,$TOPDIR/obj,}"; export MAKEOBJDIR
-RELEASEDIR="$TOPDIR/release"; export RELEASEDIR
-MKUNPRIVED='yes'; export MKUNPRIVED
-MAKEVERBOSE='1'; export MAKEVERBOSE
-LC_ALL='C'; export LC_ALL
-TOOLDIR="$TOPDIR/../netbsd-${NETBSD_RELEASE}-tools/$HOS-$HARCH"; export TOOLDIR
-MACHINE='i386'; export MACHINE
-MACHINE_ARCH='i386'; export MACHINE_ARCH
-MAKEFLAGS="-de -m $TOPDIR/share/mk MKOBJDIRS=yes"; export MAKEFLAGS
-BUILDID="${NETBSD_RELEASE}"; export BUILDID
-USETOOLS=yes; export USETOOLS
-
-exec "${TOOLDIR}/bin/nbmake" ${1+"$@"}
diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/conf/XEN
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/conf/XEN Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,175 +0,0 @@
-# $NetBSD: XEN,v 1.1.2.2 2004/07/15 20:19:34 he Exp $
-
-include "arch/xen/conf/std.xen"
-
-options INCLUDE_CONFIG_FILE # embed config file in kernel binary
-
-#options UVMHIST
-#options UVMHIST_PRINT
-#options SYSCALL_DEBUG
-
-maxusers 32 # estimated number of users
-
-#
-options XEN
-#options DOM0OPS
-
-#options I586_CPU
-options I686_CPU
-
-#options VM86 # virtual 8086 emulation
-#options USER_LDT # user-settable LDT; used by WINE
-
-#options MTRR # memory-type range register syscall support
-
-#options CONSDEVNAME="\"xencons\""
-#options CONS_OVERRIDE
-
-options INSECURE # disable kernel security levels - X
needs this
-
-options RTC_OFFSET=0 # hardware clock is this many mins. west of GMT
-options NTP # NTP phase/frequency locked loop
-
-options KTRACE # system call tracing via ktrace(1)
-#options SYSTRACE # system call vetting via systrace(1)
-
-options SYSVMSG # System V-like message queues
-options SYSVSEM # System V-like semaphores
-#options SEMMNI=10 # number of semaphore identifiers
-#options SEMMNS=60 # number of semaphores in system
-#options SEMUME=10 # max number of undo entries per process
-#options SEMMNU=30 # number of undo structures in system
-options SYSVSHM # System V-like memory sharing
-#options SHMMAXPGS=2048 # 2048 pages is the default
-options P1003_1B_SEMAPHORE # p1003.1b semaphore support
-
-options LKM # loadable kernel modules
-
-options USERCONF # userconf(4) support
-options SYSCTL_INCLUDE_DESCR # Include sysctl descriptions in kernel
-
-# Diagnostic/debugging support options
-options DIAGNOSTIC # expensive kernel consistency checks
-options DEBUG # expensive debugging checks/support
-options KMEMSTATS # kernel memory statistics (vmstat -m)
-options DDB # in-kernel debugger
-options DDB_ONPANIC=1 # see also sysctl(8): `ddb.onpanic'
-options DDB_HISTORY_SIZE=512 # enable history editing in DDB
-#options KGDB # remote debugger
-#options KGDB_DEVNAME="\"com\"",KGDB_DEVADDR=0x2f8,KGDB_DEVRATE=57600
-makeoptions DEBUG="-g" # compile full symbol table
-
-#options COMPAT_14 # NetBSD 1.4
-#options COMPAT_15 # NetBSD 1.5
-options COMPAT_16 # NetBSD 1.6
-
-##options COMPAT_LINUX # binary compatibility with Linux
-#options COMPAT_FREEBSD # binary compatibility with FreeBSD
-#options COMPAT_MACH # binary compatibility with Mach binaries
-#options COMPAT_DARWIN # binary compatibility with Darwin binaries
-#options EXEC_MACHO # exec MACH-O binaries
-#options COMPAT_PECOFF # kernel support to run Win32 apps
-
-file-system FFS # UFS
-file-system EXT2FS # second extended file system (linux)
-#file-system LFS # log-structured file system
-#file-system MFS # memory file system
-file-system NFS # Network File System client
-#file-system NTFS # Windows/NT file system (experimental)
-#file-system CD9660 # ISO 9660 + Rock Ridge file system
-#file-system MSDOSFS # MS-DOS file system
-file-system FDESC # /dev/fd
-file-system KERNFS # /kern
-file-system NULLFS # loopback file system
-#file-system OVERLAY # overlay file system
-#file-system PORTAL # portal filesystem (still experimental)
-file-system PROCFS # /proc
-#file-system UMAPFS # NULLFS + uid and gid remapping
-#file-system UNION # union file system
-#file-system SMBFS # experimental - CIFS; also needs nsmb (below)
-
-#options QUOTA # UFS quotas
-#options SOFTDEP # FFS soft updates support.
-#options NFSSERVER # Network File System server
-
-options GATEWAY # packet forwarding
-options INET # IP + ICMP + TCP + UDP
-options INET6 # IPV6
-options IPSEC # IP security
-options IPSEC_ESP # IP security (encryption part; define w/IPSEC)
-options MROUTING # IP multicast routing
-options PFIL_HOOKS # pfil(9) packet filter hooks
-options IPFILTER_LOG # ipmon(8) log support
-
-options NFS_BOOT_DHCP,NFS_BOOT_BOOTPARAM,NFS_BOOT_BOOTSTATIC
-#options NFS_BOOTSTATIC_MYIP="\"169.254.1.2\""
-#options NFS_BOOTSTATIC_GWIP="\"169.254.1.1\""
-#options NFS_BOOTSTATIC_MASK="\"255.255.255.0\""
-#options NFS_BOOTSTATIC_SERVADDR="\"169.254.1.1\""
-#options NFS_BOOTSTATIC_SERVER="\"server:/path/to/root\""
-
-options WSEMUL_VT100 # VT100 / VT220 emulation
-options WS_KERNEL_FG=WSCOL_GREEN
-options WSDISPLAY_COMPAT_PCVT # emulate some ioctls
-options WSDISPLAY_COMPAT_SYSCONS # emulate some ioctls
-options WSDISPLAY_COMPAT_USL # VT handling
-options WSDISPLAY_COMPAT_RAWKBD # can get raw scancodes
-options WSDISPLAY_DEFAULTSCREENS=4
-options PCDISPLAY_SOFTCURSOR
-
-config netbsd root on ? type ?
-#config netbsd root on wd0a type ffs
-#config netbsd root on xennet0 type nfs
-
-mainbus0 at root
-
-cpu* at mainbus?
-
-hypervisor* at mainbus? # Xen hypervisor
-
-npx0 at hypervisor? # x86 math coprocessor
-
-xencons* at hypervisor? # Xen virtual console
-xennet* at hypervisor? # Xen virtual network interface
-
-xbd* at hypervisor? # Xen virtual block device
-wd* at hypervisor? # Xen vbd (wd identity)
-sd* at hypervisor? # Xen vbd (sd identity)
-cd* at hypervisor? # Xen vbd (cd identity)
-
-#xenkbc* at hypervisor? # Xen Keyboard/Mouse Interface
-#pckbd* at xenkbc? # Keyboard
-#vga* at hypervisor? # Xen VGA display
-#pms* at xenkbc? # PS/2 Mouse for wsmouse
-
-#wskbd* at pckbd? console ?
-#wsdisplay* at vga? console ?
-#wsmouse* at pms? mux 0
-
-
-include "arch/xen/conf/GENERIC.local"
-
-
-pseudo-device ccd 4 # concatenated/striped disk devices
-#pseudo-device cgd 4 # cryptographic disk devices
-#pseudo-device md 1 # memory disk device (ramdisk)
-pseudo-device vnd 4 # disk-like interface to files
-
-pseudo-device bpfilter 8 # Berkeley packet filter
-pseudo-device ipfilter # IP filter (firewall) and NAT
-pseudo-device loop # network loopback
-#pseudo-device tun 2 # network tunneling over tty
-#pseudo-device gre 2 # generic L3 over IP tunnel
-#pseudo-device gif 4 # IPv[46] over IPv[46] tunnel (RFC1933)
-#pseudo-device faith 1 # IPv[46] tcp relay translation i/f
-#pseudo-device stf 1 # 6to4 IPv6 over IPv4 encapsulation
-#pseudo-device vlan # IEEE 802.1q encapsulation
-#pseudo-device bridge # simple inter-network bridging
-
-pseudo-device pty # pseudo-terminals
-pseudo-device rnd # /dev/random and in-kernel generator
-pseudo-device clockctl # user control of clock subsystem
-
-pseudo-device wsmux # mouse & keyboard multiplexor
-pseudo-device wsfont
-pseudo-device ksyms # /dev/ksyms
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/conf/files.xen
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/conf/files.xen Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,232 +0,0 @@
-# $NetBSD: files.xen,v 1.3.2.1 2004/05/22 15:59:02 he Exp $
-# NetBSD: files.x86,v 1.10 2003/10/08 17:30:00 bouyer Exp
-# NetBSD: files.i386,v 1.254 2004/03/25 23:32:10 jmc Exp
-
-maxpartitions 8
-
-maxusers 2 16 128
-
-# Processor type options.
-defflag opt_cputype.h I686_CPU
-
-# delay before cpu_reset() for reboot.
-defparam CPURESET_DELAY
-
-# No unmapped page below kernel stack
-defflag NOREDZONE
-
-# Beep on halt
-defflag opt_beep.h BEEP_ONHALT
-defparam opt_beep.h BEEP_ONHALT_COUNT
-defparam opt_beep.h BEEP_ONHALT_PITCH BEEP_ONHALT_PERIOD
-
-file arch/xen/i386/autoconf.c
-file arch/i386/i386/db_dbgreg.S ddb | kstack_check_dr0
-file arch/i386/i386/db_disasm.c ddb
-file arch/i386/i386/db_interface.c ddb
-file arch/i386/i386/db_memrw.c ddb | kgdb
-file arch/i386/i386/db_trace.c ddb
-file kern/subr_disk_mbr.c disk
-file arch/xen/i386/gdt.c
-file arch/xen/i386/hypervisor_machdep.c
-file arch/i386/i386/in_cksum.S inet | inet6
-file arch/i386/i386/ipkdb_glue.c ipkdb
-file arch/i386/i386/kgdb_machdep.c kgdb
-file arch/xen/i386/machdep.c
-file arch/xen/i386/identcpu.c
-file arch/i386/i386/math_emulate.c math_emulate
-file arch/i386/i386/mem.c
-file kern/kern_microtime.c i586_cpu | i686_cpu
-file arch/i386/i386/mtrr_k6.c mtrr
-file netns/ns_cksum.c ns
-file arch/xen/i386/pmap.c
-file arch/i386/i386/process_machdep.c
-file arch/i386/i386/procfs_machdep.c procfs
-file arch/xen/i386/sys_machdep.c
-file arch/i386/i386/syscall.c
-file arch/xen/i386/trap.c
-file arch/i386/i386/vm_machdep.c
-file arch/xen/i386/xen_machdep.c
-
-file arch/xen/xen/xen_debug.c
-
-file arch/xen/xen/clock.c
-file arch/xen/xen/evtchn.c
-file arch/xen/xen/ctrl_if.c
-
-file dev/cons.c
-
-file arch/i386/i386/mptramp.S multiprocessor
-file arch/i386/i386/ipifuncs.c multiprocessor
-
-file arch/i386/i386/pmc.c perfctrs
-
-file crypto/des/arch/i386/des_enc.S des
-file crypto/des/arch/i386/des_cbc.S des
-
-file crypto/blowfish/arch/i386/bf_enc.S blowfish
-file crypto/blowfish/arch/i386/bf_cbc.S blowfish & !i386_cpu
-
-#
-# Machine-independent SCSI drivers
-#
-
-#xxx include "dev/scsipi/files.scsipi"
-
-#
-# Machine-independent ATA drivers
-#
-
-#xxx include "dev/ata/files.ata"
-
-# Memory Disk for install floppy
-file dev/md_root.c memory_disk_hooks
-
-#
-define mainbus { [apid = -1] }
-
-file arch/x86/x86/bus_dma.c
-file arch/xen/x86/bus_space.c
-file arch/x86/x86/cacheinfo.c
-file arch/xen/x86/consinit.c
-file arch/xen/x86/intr.c
-file arch/x86/x86/ipi.c multiprocessor
-file arch/x86/x86/lock_machdep.c lockdebug
-file arch/x86/x86/softintr.c
-
-include "arch/xen/conf/files.compat"
-
-#
-# System bus types
-#
-
-device mainbus: mainbus
-attach mainbus at root
-file arch/xen/i386/mainbus.c mainbus
-
-# Xen hypervisor
-device hypervisor { }
-attach hypervisor at mainbus
-file arch/xen/xen/hypervisor.c hypervisor needs-flag
-
-# Numeric Processing Extension; Math Co-processor
-device npx
-file arch/xen/i386/npx.c npx needs-flag
-
-attach npx at hypervisor with npx_hv
-file arch/xen/i386/npx_hv.c npx_hv
-
-# Xen console support
-device xencons: tty
-attach xencons at hypervisor
-file arch/xen/xen/xencons.c xencons needs-flag
-
-include "dev/wscons/files.wscons"
-include "dev/wsfont/files.wsfont"
-
-include "dev/pckbport/files.pckbport"
-
-# CPUS
-
-define cpu { [apid = -1] }
-device cpu
-attach cpu at mainbus
-file arch/xen/i386/cpu.c cpu
-
-#
-# Compatibility modules
-#
-
-# VM86 mode
-file arch/i386/i386/vm86.c vm86
-
-# VM86 in kernel
-file arch/i386/i386/kvm86.c kvm86
-file arch/i386/i386/kvm86call.S kvm86
-
-# Binary compatibility with previous NetBSD releases (COMPAT_XX)
-file arch/i386/i386/compat_13_machdep.c compat_13 | compat_aout
-file arch/i386/i386/compat_16_machdep.c compat_16 | compat_ibcs2
-
-# SVR4 binary compatibility (COMPAT_SVR4)
-include "compat/svr4/files.svr4"
-file arch/i386/i386/svr4_machdep.c compat_svr4
-file arch/i386/i386/svr4_sigcode.S compat_svr4
-file arch/i386/i386/svr4_syscall.c compat_svr4
-
-# MACH binary compatibility (COMPAT_MACH)
-include "compat/mach/files.mach"
-file arch/i386/i386/mach_machdep.c compat_mach | compat_darwin
-file arch/i386/i386/mach_sigcode.S compat_mach | compat_darwin
-file arch/i386/i386/mach_syscall.c compat_mach | compat_darwin
-file arch/i386/i386/macho_machdep.c exec_macho
-
-# DARWIN binary compatibility (COMPAT_DARWIN)
-include "compat/darwin/files.darwin"
-file arch/i386/i386/darwin_machdep.c compat_darwin
-
-# iBCS-2 binary compatibility (COMPAT_IBCS2)
-include "compat/ibcs2/files.ibcs2"
-file arch/i386/i386/ibcs2_machdep.c compat_ibcs2
-file arch/i386/i386/ibcs2_sigcode.S compat_ibcs2
-file arch/i386/i386/ibcs2_syscall.c compat_ibcs2
-
-# Linux binary compatibility (COMPAT_LINUX)
-include "compat/linux/files.linux"
-include "compat/linux/arch/i386/files.linux_i386"
-file arch/i386/i386/linux_sigcode.S compat_linux
-file arch/i386/i386/linux_syscall.c compat_linux
-file arch/i386/i386/linux_trap.c compat_linux
-
-# FreeBSD binary compatibility (COMPAT_FREEBSD)
-include "compat/freebsd/files.freebsd"
-file arch/i386/i386/freebsd_machdep.c compat_freebsd
-file arch/i386/i386/freebsd_sigcode.S compat_freebsd
-file arch/i386/i386/freebsd_syscall.c compat_freebsd
-
-# a.out binary compatibility (COMPAT_AOUT)
-include "compat/aout/files.aout"
-
-# Win32 binary compatibility (COMPAT_PECOFF)
-include "compat/pecoff/files.pecoff"
-
-# OSS audio driver compatibility
-include "compat/ossaudio/files.ossaudio"
-
-# Xen devices
-
-# Network driver
-device xennet: arp, ether, ifnet
-attach xennet at hypervisor
-file arch/xen/xen/if_xennet.c xennet needs-flag
-
-# Block device driver and wd/sd/cd identities
-device xbd: disk
-attach xbd at hypervisor
-file arch/xen/xen/xbd.c xbd | wd | sd | cd needs-flag
-
-device wd: disk
-attach wd at hypervisor
-
-device sd: disk
-attach sd at hypervisor
-
-device cd: disk
-attach cd at hypervisor
-
-# Keyboard
-device xenkbc: pckbport
-attach xenkbc at hypervisor
-file arch/xen/xen/xenkbc.c xenkbc needs-flag
-
-# Generic VGA
-attach vga at hypervisor with vga_xen
-file arch/xen/xen/vga_xen.c vga_xen needs-flag
-
-# Domain-0 operations
-defflag opt_xen.h DOM0OPS
-file arch/xen/xen/machmem.c dom0ops
-file arch/xen/xen/privcmd.c dom0ops
-file arch/xen/xen/vfr.c dom0ops
-
-include "arch/xen/conf/majors.i386"
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/i386/autoconf.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/autoconf.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,630 +0,0 @@
-/* $NetBSD: autoconf.c,v 1.1.2.1 2004/05/22 15:57:33 he Exp $ */
-/* NetBSD: autoconf.c,v 1.75 2003/12/30 12:33:22 pk Exp */
-
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)autoconf.c 7.1 (Berkeley) 5/9/91
- */
-
-/*
- * Setup the system to run on the current machine.
- *
- * Configure() is called at boot time and initializes the vba
- * device tables and the memory controller monitoring. Available
- * devices are determined (from possibilities mentioned in ioconf.c),
- * and the drivers are initialized.
- */
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: autoconf.c,v 1.1.2.1 2004/05/22 15:57:33 he Exp
$");
-
-#include "opt_compat_oldboot.h"
-#include "opt_multiprocessor.h"
-#include "opt_nfs_boot.h"
-#include "xennet.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/buf.h>
-#include <sys/disklabel.h>
-#include <sys/conf.h>
-#ifdef COMPAT_OLDBOOT
-#include <sys/reboot.h>
-#endif
-#include <sys/device.h>
-#include <sys/malloc.h>
-#include <sys/vnode.h>
-#include <sys/fcntl.h>
-#include <sys/dkio.h>
-#include <sys/proc.h>
-#include <sys/user.h>
-
-#ifdef NFS_BOOT_BOOTSTATIC
-#include <net/if.h>
-#include <net/if_ether.h>
-#include <netinet/in.h>
-#include <nfs/rpcv2.h>
-#include <nfs/nfsproto.h>
-#include <nfs/nfs.h>
-#include <nfs/nfsmount.h>
-#include <nfs/nfsdiskless.h>
-#include <machine/if_xennetvar.h>
-#endif
-
-#include <machine/pte.h>
-#include <machine/cpu.h>
-#include <machine/gdt.h>
-#include <machine/pcb.h>
-#include <machine/bootinfo.h>
-
-#include "ioapic.h"
-#include "lapic.h"
-
-#if NIOAPIC > 0
-#include <machine/i82093var.h>
-#endif
-
-#if NLAPIC > 0
-#include <machine/i82489var.h>
-#endif
-
-static int match_harddisk(struct device *, struct btinfo_bootdisk *);
-static void matchbiosdisks(void);
-static void findroot(void);
-static int is_valid_disk(struct device *);
-
-extern struct disklist *i386_alldisks;
-extern int i386_ndisks;
-
-#include "bios32.h"
-#if NBIOS32 > 0
-#include <machine/bios32.h>
-#endif
-
-#include "opt_pcibios.h"
-#ifdef PCIBIOS
-#include <dev/pci/pcireg.h>
-#include <dev/pci/pcivar.h>
-#include <i386/pci/pcibios.h>
-#endif
-
-#include "opt_kvm86.h"
-#ifdef KVM86
-#include <machine/kvm86.h>
-#endif
-
-#include "opt_xen.h"
-
-struct device *booted_device;
-int booted_partition;
-
-/*
- * Determine i/o configuration for a machine.
- */
-void
-cpu_configure(void)
-{
-
- startrtclock();
-
-#if NBIOS32 > 0
- bios32_init();
-#endif
-#ifdef PCIBIOS
- pcibios_init();
-#endif
-
- /* kvm86 needs a TSS */
- i386_proc0_tss_ldt_init();
-#ifdef KVM86
- kvm86_init();
-#endif
-
- if (config_rootfound("mainbus", NULL) == NULL)
- panic("configure: mainbus not configured");
-
-#ifdef INTRDEBUG
- intr_printconfig();
-#endif
-
-#if NIOAPIC > 0
- lapic_set_lvt();
- ioapic_enable();
-#endif
- /* resync cr0 after FPU configuration */
- lwp0.l_addr->u_pcb.pcb_cr0 = rcr0();
-#ifdef MULTIPROCESSOR
- /* propagate this to the idle pcb's. */
- cpu_init_idle_pcbs();
-#endif
-
- spl0();
-#if NLAPIC > 0
- lapic_tpr = 0;
-#endif
-}
-
-void
-cpu_rootconf(void)
-{
- findroot();
- matchbiosdisks();
-
- printf("boot device: %s\n",
- booted_device ? booted_device->dv_xname : "<unknown>");
-
- setroot(booted_device, booted_partition);
-}
-
-/*
- * XXX ugly bit of code. But, this is the only safe time that the
- * match between BIOS disks and native disks can be done.
- */
-static void
-matchbiosdisks(void)
-{
- struct btinfo_biosgeom *big;
- struct bi_biosgeom_entry *be;
- struct device *dv;
- int i, ck, error, m, n;
- struct vnode *tv;
- char mbr[DEV_BSIZE];
- int dklist_size;
- int bmajor;
-
- big = lookup_bootinfo(BTINFO_BIOSGEOM);
-
- if (big == NULL)
- return;
-
- /*
- * First, count all native disks
- */
- for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next)
- if (is_valid_disk(dv))
- i386_ndisks++;
-
- if (i386_ndisks == 0)
- return;
-
- dklist_size = sizeof (struct disklist) + (i386_ndisks - 1) *
- sizeof (struct nativedisk_info);
-
- /* XXX M_TEMP is wrong */
- i386_alldisks = malloc(dklist_size, M_TEMP, M_NOWAIT);
- if (i386_alldisks == NULL)
- return;
-
- memset(i386_alldisks, 0, dklist_size);
-
- i386_alldisks->dl_nnativedisks = i386_ndisks;
- i386_alldisks->dl_nbiosdisks = big->num;
- for (i = 0; i < big->num; i++) {
- i386_alldisks->dl_biosdisks[i].bi_dev = big->disk[i].dev;
- i386_alldisks->dl_biosdisks[i].bi_sec = big->disk[i].sec;
- i386_alldisks->dl_biosdisks[i].bi_head = big->disk[i].head;
- i386_alldisks->dl_biosdisks[i].bi_cyl = big->disk[i].cyl;
- i386_alldisks->dl_biosdisks[i].bi_lbasecs = big->disk[i].totsec;
- i386_alldisks->dl_biosdisks[i].bi_flags = big->disk[i].flags;
-#ifdef GEOM_DEBUG
-#ifdef NOTYET
- printf("disk %x: flags %x, interface %x, device %llx\n",
- big->disk[i].dev, big->disk[i].flags,
- big->disk[i].interface_path, big->disk[i].device_path);
-#endif
-#endif
- }
-
- /*
- * XXX code duplication from findroot()
- */
- n = -1;
- for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) {
- if (dv->dv_class != DV_DISK)
- continue;
-#ifdef GEOM_DEBUG
- printf("matchbiosdisks: trying to match (%s) %s\n",
- dv->dv_xname, dv->dv_cfdata->cf_name);
-#endif
- if (is_valid_disk(dv)) {
- n++;
- sprintf(i386_alldisks->dl_nativedisks[n].ni_devname,
- "%s%d", dv->dv_cfdata->cf_name,
- dv->dv_unit);
-
- bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
- if (bmajor == -1)
- return;
-
- if (bdevvp(MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART),
- &tv))
- panic("matchbiosdisks: can't alloc vnode");
-
- error = VOP_OPEN(tv, FREAD, NOCRED, 0);
- if (error) {
- vput(tv);
- continue;
- }
- error = vn_rdwr(UIO_READ, tv, mbr, DEV_BSIZE, 0,
- UIO_SYSSPACE, 0, NOCRED, NULL, 0);
- VOP_CLOSE(tv, FREAD, NOCRED, 0);
- if (error) {
-#ifdef GEOM_DEBUG
- printf("matchbiosdisks: %s: MBR read failure\n",
- dv->dv_xname);
-#endif
- continue;
- }
-
- for (ck = i = 0; i < DEV_BSIZE; i++)
- ck += mbr[i];
- for (m = i = 0; i < big->num; i++) {
- be = &big->disk[i];
-#ifdef GEOM_DEBUG
- printf("match %s with %d ", dv->dv_xname, i);
- printf("dev ck %x bios ck %x\n", ck, be->cksum);
-#endif
- if (be->flags & BI_GEOM_INVALID)
- continue;
- if (be->cksum == ck &&
- !memcmp(&mbr[MBR_PART_OFFSET], be->dosparts,
- MBR_PART_COUNT *
- sizeof (struct mbr_partition))) {
-#ifdef GEOM_DEBUG
- printf("matched bios disk %x with %s\n",
- be->dev, dv->dv_xname);
-#endif
- i386_alldisks->dl_nativedisks[n].
- ni_biosmatches[m++] = i;
- }
- }
- i386_alldisks->dl_nativedisks[n].ni_nmatches = m;
- vput(tv);
- }
- }
-}
-
-#ifdef COMPAT_OLDBOOT
-u_long bootdev = 0; /* should be dev_t, but not until 32 bits */
-#endif
-
-/*
- * helper function for "findroot()":
- * return nonzero if disk device matches bootinfo
- */
-static int
-match_harddisk(struct device *dv, struct btinfo_bootdisk *bid)
-{
- struct vnode *tmpvn;
- int error;
- struct disklabel label;
- int found = 0;
- int bmajor;
-
- /*
- * A disklabel is required here. The
- * bootblocks don't refuse to boot from
- * a disk without a label, but this is
- * normally not wanted.
- */
- if (bid->labelsector == -1)
- return(0);
-
- /*
- * lookup major number for disk block device
- */
- bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
- if (bmajor == -1)
- return(0); /* XXX panic() ??? */
-
- /*
- * Fake a temporary vnode for the disk, open
- * it, and read the disklabel for comparison.
- */
- if (bdevvp(MAKEDISKDEV(bmajor, dv->dv_unit, bid->partition), &tmpvn))
- panic("findroot can't alloc vnode");
- error = VOP_OPEN(tmpvn, FREAD, NOCRED, 0);
- if (error) {
-#ifndef DEBUG
- /*
- * Ignore errors caused by missing
- * device, partition or medium.
- */
- if (error != ENXIO && error != ENODEV)
-#endif
- printf("findroot: can't open dev %s%c (%d)\n",
- dv->dv_xname, 'a' + bid->partition, error);
- vput(tmpvn);
- return(0);
- }
- error = VOP_IOCTL(tmpvn, DIOCGDINFO, &label, FREAD, NOCRED, 0);
- if (error) {
- /*
- * XXX can't happen - open() would
- * have errored out (or faked up one)
- */
- printf("can't get label for dev %s%c (%d)\n",
- dv->dv_xname, 'a' + bid->partition, error);
- goto closeout;
- }
-
- /* compare with our data */
- if (label.d_type == bid->label.type &&
- label.d_checksum == bid->label.checksum &&
- !strncmp(label.d_packname, bid->label.packname, 16))
- found = 1;
-
-closeout:
- VOP_CLOSE(tmpvn, FREAD, NOCRED, 0);
- vput(tmpvn);
- return(found);
-}
-
-/*
- * Attempt to find the device from which we were booted.
- * If we can do so, and not instructed not to do so,
- * change rootdev to correspond to the load device.
- */
-void
-findroot(void)
-{
- struct btinfo_bootdisk *bid;
- struct device *dv;
- union xen_cmdline_parseinfo xcp;
-#ifdef COMPAT_OLDBOOT
- int i, majdev, unit, part;
- char buf[32];
-#endif
-
- if (booted_device)
- return;
-
- if (lookup_bootinfo(BTINFO_NETIF)) {
- /*
- * We got netboot interface information, but
- * "device_register()" couldn't match it to a configured
- * device. Bootdisk information cannot be present at the
- * same time, so give up.
- */
- printf("findroot: netboot interface not found\n");
- return;
- }
-
- bid = lookup_bootinfo(BTINFO_BOOTDISK);
- if (bid) {
- /*
- * Scan all disk devices for ones that match the passed data.
- * Don't break if one is found, to get possible multiple
- * matches - for problem tracking. Use the first match anyway
- * because lower device numbers are more likely to be the
- * boot device.
- */
- for (dv = alldevs.tqh_first; dv != NULL;
- dv = dv->dv_list.tqe_next) {
- if (dv->dv_class != DV_DISK)
- continue;
-
- if (!strcmp(dv->dv_cfdata->cf_name, "fd")) {
- /*
- * Assume the configured unit number matches
- * the BIOS device number. (This is the old
- * behaviour.) Needs some ideas how to handle
- * BIOS's "swap floppy drive" options.
- */
- if ((bid->biosdev & 0x80) ||
- dv->dv_unit != bid->biosdev)
- continue;
-
- goto found;
- }
-
- if (is_valid_disk(dv)) {
- /*
- * Don't trust BIOS device numbers, try
- * to match the information passed by the
- * bootloader instead.
- */
- if ((bid->biosdev & 0x80) == 0 ||
- !match_harddisk(dv, bid))
- continue;
-
- goto found;
- }
-
- /* no "fd", "wd", "sd", "ld", "ed" */
- continue;
-
-found:
- if (booted_device) {
- printf("warning: double match for boot "
- "device (%s, %s)\n",
- booted_device->dv_xname, dv->dv_xname);
- continue;
- }
- booted_device = dv;
- booted_partition = bid->partition;
- }
-
- if (booted_device)
- return;
- }
-
- xen_parse_cmdline(XEN_PARSE_BOOTDEV, &xcp);
-
- for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) {
- if (is_valid_disk(dv) == 0)
- continue;
-
- if (xcp.xcp_bootdev[0] == 0) {
- booted_device = dv;
- break;
- }
-
- if (strncmp(xcp.xcp_bootdev, dv->dv_xname,
- strlen(dv->dv_xname)))
- continue;
-
- if (strlen(xcp.xcp_bootdev) > strlen(dv->dv_xname)) {
- booted_partition = toupper(
- xcp.xcp_bootdev[strlen(dv->dv_xname)]) - 'A';
- }
-
- booted_device = dv;
- break;
- }
-
- if (booted_device)
- return;
-
-#ifdef COMPAT_OLDBOOT
-#if 0
- printf("howto %x bootdev %x ", boothowto, bootdev);
-#endif
-
- if ((bootdev & B_MAGICMASK) != (u_long)B_DEVMAGIC)
- return;
-
- majdev = (bootdev >> B_TYPESHIFT) & B_TYPEMASK;
- name = devsw_blk2name(majdev);
- if (name == NULL)
- return;
-
- part = (bootdev >> B_PARTITIONSHIFT) & B_PARTITIONMASK;
- unit = (bootdev >> B_UNITSHIFT) & B_UNITMASK;
-
- sprintf(buf, "%s%d", name, unit);
- for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) {
- if (strcmp(buf, dv->dv_xname) == 0) {
- booted_device = dv;
- booted_partition = part;
- return;
- }
- }
-#endif
-}
-
-#include "pci.h"
-
-#include <dev/isa/isavar.h>
-#if NPCI > 0
-#include <dev/pci/pcivar.h>
-#endif
-
-void
-device_register(struct device *dev, void *aux)
-{
- /*
- * Handle network interfaces here, the attachment information is
- * not available driver independantly later.
- * For disks, there is nothing useful available at attach time.
- */
-#if NXENNET > 0
- if (dev->dv_class == DV_IFNET) {
- union xen_cmdline_parseinfo xcp;
-
- xen_parse_cmdline(XEN_PARSE_BOOTDEV, &xcp);
- if (strncmp(xcp.xcp_bootdev, dev->dv_xname, 16) == 0) {
-#ifdef NFS_BOOT_BOOTSTATIC
- nfs_bootstatic_callback = xennet_bootstatic_callback;
-#endif
- goto found;
- }
- }
-#endif
- if (dev->dv_class == DV_IFNET) {
- struct btinfo_netif *bin = lookup_bootinfo(BTINFO_NETIF);
- if (bin == NULL)
- return;
-
- /*
- * We don't check the driver name against the device name
- * passed by the boot ROM. The ROM should stay usable
- * if the driver gets obsoleted.
- * The physical attachment information (checked below)
- * must be sufficient to identify the device.
- */
-
- if (bin->bus == BI_BUS_ISA &&
- !strcmp(dev->dv_parent->dv_cfdata->cf_name, "isa")) {
- struct isa_attach_args *iaa = aux;
-
- /* compare IO base address */
- /* XXXJRT what about multiple I/O addrs? */
- if (iaa->ia_nio > 0 &&
- bin->addr.iobase == iaa->ia_io[0].ir_addr)
- goto found;
- }
-#if NPCI > 0
- if (bin->bus == BI_BUS_PCI &&
- !strcmp(dev->dv_parent->dv_cfdata->cf_name, "pci")) {
- struct pci_attach_args *paa = aux;
- int b, d, f;
-
- /*
- * Calculate BIOS representation of:
- *
- * <bus,device,function>
- *
- * and compare.
- */
- pci_decompose_tag(paa->pa_pc, paa->pa_tag, &b, &d, &f);
- if (bin->addr.tag == ((b << 8) | (d << 3) | f))
- goto found;
- }
-#endif
- }
- return;
-
-found:
- if (booted_device) {
- /* XXX should be a "panic()" */
- printf("warning: double match for boot device (%s, %s)\n",
- booted_device->dv_xname, dev->dv_xname);
- return;
- }
- booted_device = dev;
-}
-
-static int
-is_valid_disk(struct device *dv)
-{
- const char *name;
-
- if (dv->dv_class != DV_DISK)
- return (0);
-
- name = dv->dv_cfdata->cf_name;
-
- return (strcmp(name, "sd") == 0 || strcmp(name, "wd") == 0 ||
- strcmp(name, "ld") == 0 || strcmp(name, "ed") == 0 ||
- strcmp(name, "xbd") == 0);
-}
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/i386/gdt.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/gdt.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,408 +0,0 @@
-/* $NetBSD: gdt.c,v 1.1 2004/03/11 21:44:08 cl Exp $ */
-/* NetBSD: gdt.c,v 1.32 2004/02/13 11:36:13 wiz Exp */
-
-/*-
- * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by John T. Kohl and Charles M. Hannum.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.1 2004/03/11 21:44:08 cl Exp $");
-
-#include "opt_multiprocessor.h"
-#include "opt_xen.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/lock.h>
-#include <sys/user.h>
-
-#include <uvm/uvm.h>
-
-#include <machine/gdt.h>
-
-int gdt_size[2]; /* total number of GDT entries */
-int gdt_count[2]; /* number of GDT entries in use */
-int gdt_next[2]; /* next available slot for sweeping */
-int gdt_free[2]; /* next free slot; terminated with GNULL_SEL */
-
-struct lock gdt_lock_store;
-
-static __inline void gdt_lock(void);
-static __inline void gdt_unlock(void);
-void gdt_init(void);
-void gdt_grow(int);
-int gdt_get_slot(void);
-int gdt_get_slot1(int);
-void gdt_put_slot(int);
-void gdt_put_slot1(int, int);
-
-/*
- * Lock and unlock the GDT, to avoid races in case gdt_{ge,pu}t_slot() sleep
- * waiting for memory.
- *
- * Note that the locking done here is not sufficient for multiprocessor
- * systems. A freshly allocated slot will still be of type SDT_SYSNULL for
- * some time after the GDT is unlocked, so gdt_compact() could attempt to
- * reclaim it.
- */
-static __inline void
-gdt_lock()
-{
-
- (void) lockmgr(&gdt_lock_store, LK_EXCLUSIVE, NULL);
-}
-
-static __inline void
-gdt_unlock()
-{
-
- (void) lockmgr(&gdt_lock_store, LK_RELEASE, NULL);
-}
-
-void
-setgdt(int sel, void *base, size_t limit,
- int type, int dpl, int def32, int gran)
-{
- struct segment_descriptor sd;
- CPU_INFO_ITERATOR cii;
- struct cpu_info *ci;
-
- if (type == SDT_SYS386TSS) {
- /* printk("XXX TSS descriptor not supported in GDT\n"); */
- return;
- }
-
- setsegment(&sd, base, limit, type, dpl, def32, gran);
- for (CPU_INFO_FOREACH(cii, ci)) {
- if (ci->ci_gdt != NULL) {
-#ifndef XEN
- ci->ci_gdt[sel].sd = sd;
-#else
- xen_update_descriptor(&ci->ci_gdt[sel],
- (union descriptor *)&sd);
-#endif
- }
- }
-}
-
-/*
- * Initialize the GDT subsystem. Called from autoconf().
- */
-void
-gdt_init()
-{
- size_t max_len, min_len;
- union descriptor *old_gdt;
- struct vm_page *pg;
- vaddr_t va;
- struct cpu_info *ci = &cpu_info_primary;
-
- lockinit(&gdt_lock_store, PZERO, "gdtlck", 0, 0);
-
- max_len = MAXGDTSIZ * sizeof(gdt[0]);
- min_len = MINGDTSIZ * sizeof(gdt[0]);
-
- gdt_size[0] = MINGDTSIZ;
- gdt_count[0] = NGDT;
- gdt_next[0] = NGDT;
- gdt_free[0] = GNULL_SEL;
-
- gdt_size[1] = 0;
- gdt_count[1] = MAXGDTSIZ;
- gdt_next[1] = MAXGDTSIZ;
- gdt_free[1] = GNULL_SEL;
-
- old_gdt = gdt;
- gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len + max_len);
- for (va = (vaddr_t)gdt; va < (vaddr_t)gdt + min_len; va += PAGE_SIZE) {
- pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
- if (pg == NULL) {
- panic("gdt_init: no pages");
- }
- pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
- VM_PROT_READ | VM_PROT_WRITE);
- }
- memcpy(gdt, old_gdt, NGDT * sizeof(gdt[0]));
- ci->ci_gdt = gdt;
- setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1,
- SDT_MEMRWA, SEL_KPL, 1, 1);
-
- gdt_init_cpu(ci);
-}
-
-/*
- * Allocate shadow GDT for a slave CPU.
- */
-void
-gdt_alloc_cpu(struct cpu_info *ci)
-{
- int max_len = MAXGDTSIZ * sizeof(gdt[0]);
- int min_len = MINGDTSIZ * sizeof(gdt[0]);
- struct vm_page *pg;
- vaddr_t va;
-
- ci->ci_gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len);
- for (va = (vaddr_t)ci->ci_gdt; va < (vaddr_t)ci->ci_gdt + min_len;
- va += PAGE_SIZE) {
- while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO))
- == NULL) {
- uvm_wait("gdt_alloc_cpu");
- }
- pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
- VM_PROT_READ | VM_PROT_WRITE);
- }
- memset(ci->ci_gdt, 0, min_len);
- memcpy(ci->ci_gdt, gdt, gdt_count[0] * sizeof(gdt[0]));
- setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1,
- SDT_MEMRWA, SEL_KPL, 1, 1);
-}
-
-
-/*
- * Load appropriate gdt descriptor; we better be running on *ci
- * (for the most part, this is how a CPU knows who it is).
- */
-void
-gdt_init_cpu(struct cpu_info *ci)
-{
-#ifndef XEN
- struct region_descriptor region;
- size_t max_len;
-
- max_len = MAXGDTSIZ * sizeof(gdt[0]);
- setregion(®ion, ci->ci_gdt, max_len - 1);
- lgdt(®ion);
-#else
- size_t len = gdt_size[0] * sizeof(gdt[0]);
- unsigned long frames[len >> PAGE_SHIFT];
- vaddr_t va;
- pt_entry_t *ptp;
- pt_entry_t *maptp;
- int f;
-
- for (va = (vaddr_t)ci->ci_gdt, f = 0;
- va < (vaddr_t)ci->ci_gdt + len;
- va += PAGE_SIZE, f++) {
- KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
- ptp = kvtopte(va);
- frames[f] = *ptp >> PAGE_SHIFT;
- maptp = (pt_entry_t *)vtomach((vaddr_t)ptp);
- PTE_CLEARBITS(ptp, maptp, PG_RW);
- }
- PTE_UPDATES_FLUSH();
- /* printk("loading gdt %x, %d entries, %d pages", */
- /* frames[0] << PAGE_SHIFT, gdt_size[0], len >> PAGE_SHIFT); */
- if (HYPERVISOR_set_gdt(frames, gdt_size[0]))
- panic("HYPERVISOR_set_gdt failed!\n");
- lgdt_finish();
-#endif
-}
-
-#ifdef MULTIPROCESSOR
-
-void
-gdt_reload_cpu(struct cpu_info *ci)
-{
- struct region_descriptor region;
- size_t max_len;
-
- max_len = MAXGDTSIZ * sizeof(gdt[0]);
- setregion(®ion, ci->ci_gdt, max_len - 1);
- lgdt(®ion);
-}
-#endif
-
-
-/*
- * Grow the GDT.
- */
-void
-gdt_grow(int which)
-{
- size_t old_len, new_len, max_len;
- CPU_INFO_ITERATOR cii;
- struct cpu_info *ci;
- struct vm_page *pg;
- vaddr_t va;
-
- old_len = gdt_size[which] * sizeof(gdt[0]);
- gdt_size[which] <<= 1;
- new_len = old_len << 1;
-
- if (which != 0) {
- max_len = MAXGDTSIZ * sizeof(gdt[0]);
- if (old_len == 0) {
- gdt_size[which] = MINGDTSIZ;
- new_len = gdt_size[which] * sizeof(gdt[0]);
- }
- for (va = (vaddr_t)(cpu_info_primary.ci_gdt) + old_len +
max_len;
- va < (vaddr_t)(cpu_info_primary.ci_gdt) + new_len +
max_len;
- va += PAGE_SIZE) {
- while ((pg = uvm_pagealloc(NULL, 0, NULL,
UVM_PGA_ZERO)) ==
- NULL) {
- uvm_wait("gdt_grow");
- }
- pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
- VM_PROT_READ | VM_PROT_WRITE);
- }
- return;
- }
-
- for (CPU_INFO_FOREACH(cii, ci)) {
- for (va = (vaddr_t)(ci->ci_gdt) + old_len;
- va < (vaddr_t)(ci->ci_gdt) + new_len;
- va += PAGE_SIZE) {
- while ((pg = uvm_pagealloc(NULL, 0, NULL,
UVM_PGA_ZERO)) ==
- NULL) {
- uvm_wait("gdt_grow");
- }
- pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
- VM_PROT_READ | VM_PROT_WRITE);
- }
- }
-}
-
-/*
- * Allocate a GDT slot as follows:
- * 1) If there are entries on the free list, use those.
- * 2) If there are fewer than gdt_size entries in use, there are free slots
- * near the end that we can sweep through.
- * 3) As a last resort, we increase the size of the GDT, and sweep through
- * the new slots.
- */
-int
-gdt_get_slot()
-{
- return gdt_get_slot1(0);
-}
-
-int
-gdt_get_slot1(int which)
-{
- size_t offset;
- int slot;
-
- gdt_lock();
-
- if (gdt_free[which] != GNULL_SEL) {
- slot = gdt_free[which];
- gdt_free[which] = gdt[slot].gd.gd_selector;
- } else {
- offset = which * MAXGDTSIZ * sizeof(gdt[0]);
- if (gdt_next[which] != gdt_count[which] + offset)
- panic("gdt_get_slot botch 1");
- if (gdt_next[which] - offset >= gdt_size[which]) {
- if (gdt_size[which] >= MAXGDTSIZ)
- panic("gdt_get_slot botch 2");
- gdt_grow(which);
- }
- slot = gdt_next[which]++;
- }
-
- gdt_count[which]++;
- gdt_unlock();
- return (slot);
-}
-
-/*
- * Deallocate a GDT slot, putting it on the free list.
- */
-void
-gdt_put_slot(int slot)
-{
- gdt_put_slot1(slot, 0);
-}
-
-void
-gdt_put_slot1(int slot, int which)
-{
-
- gdt_lock();
- gdt_count[which]--;
-
- gdt[slot].gd.gd_type = SDT_SYSNULL;
- gdt[slot].gd.gd_selector = gdt_free[which];
- gdt_free[which] = slot;
-
- gdt_unlock();
-}
-
-int
-tss_alloc(struct pcb *pcb)
-{
- int slot;
-
- slot = gdt_get_slot();
- setgdt(slot, &pcb->pcb_tss, sizeof(struct pcb) - 1,
- SDT_SYS386TSS, SEL_KPL, 0, 0);
- return GSEL(slot, SEL_KPL);
-}
-
-void
-tss_free(int sel)
-{
-
- gdt_put_slot(IDXSEL(sel));
-}
-
-/*
- * Caller must have pmap locked for both of these functions.
- */
-void
-ldt_alloc(struct pmap *pmap, union descriptor *ldt, size_t len)
-{
- int slot;
-
- slot = gdt_get_slot1(1);
-#ifndef XEN
- setgdt(slot, ldt, len - 1, SDT_SYSLDT, SEL_KPL, 0, 0);
-#else
- cpu_info_primary.ci_gdt[slot].ld.ld_base = (uint32_t)ldt;
- cpu_info_primary.ci_gdt[slot].ld.ld_entries =
- len / sizeof(union descriptor);
-#endif
- pmap->pm_ldt_sel = GSEL(slot, SEL_KPL);
-}
-
-void
-ldt_free(struct pmap *pmap)
-{
- int slot;
-
- slot = IDXSEL(pmap->pm_ldt_sel);
-
- gdt_put_slot1(slot, 1);
-}
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/i386/hypervisor_machdep.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/hypervisor_machdep.c Sun Dec
4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,229 +0,0 @@
-/* $NetBSD: hypervisor_machdep.c,v 1.2.2.2 2004/06/17 09:23:13 tron Exp $
*/
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/******************************************************************************
- * hypervisor.c
- *
- * Communication to/from hypervisor.
- *
- * Copyright (c) 2002-2004, K A Fraser
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: hypervisor_machdep.c,v 1.2.2.2 2004/06/17 09:23:13
tron Exp $");
-
-#include <sys/cdefs.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-#include <machine/evtchn.h>
-
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-void
-hypervisor_force_callback(void)
-{
-
- (void)HYPERVISOR_xen_version(0);
-}
-
-int stipending(void);
-int
-stipending()
-{
- uint32_t l1;
- unsigned long l2;
- unsigned int l1i, l2i, port;
- int irq;
- shared_info_t *s = HYPERVISOR_shared_info;
- struct cpu_info *ci;
- int ret;
-
- ret = 0;
- ci = curcpu();
-
-#if 0
- if (HYPERVISOR_shared_info->events)
- printf("stipending events %08lx mask %08lx ilevel %d\n",
- HYPERVISOR_shared_info->events,
- HYPERVISOR_shared_info->events_mask, ci->ci_ilevel);
-#endif
-
- /*
- * we're only called after STIC, so we know that we'll have to
- * STI at the end
- */
- cli();
- while (s->vcpu_data[0].evtchn_upcall_pending) {
- s->vcpu_data[0].evtchn_upcall_pending = 0;
- /* NB. No need for a barrier here -- XCHG is a barrier
- * on x86. */
- l1 = x86_atomic_xchg(&s->evtchn_pending_sel, 0);
- while ((l1i = ffs(l1)) != 0) {
- l1i--;
- l1 &= ~(1 << l1i);
-
- l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i];
- while ((l2i = ffs(l2)) != 0) {
- l2i--;
- l2 &= ~(1 << l2i);
-
- port = (l1i << 5) + l2i;
- if ((irq = evtchn_to_irq[port]) != -1) {
- hypervisor_acknowledge_irq(irq);
- ci->ci_ipending |= (1 << irq);
- if (ret == 0 && ci->ci_ilevel <
- ci->ci_isources[irq]->is_maxlevel)
- ret = 1;
- }
-#if 0 /* XXXcl dev/evtchn */
- else
- evtchn_device_upcall(port);
-#endif
- }
- }
- }
- sti();
-
-#if 0
- if (ci->ci_ipending & 0x1)
- printf("stipending events %08lx mask %08lx ilevel %d ipending
%08x\n",
- HYPERVISOR_shared_info->events,
- HYPERVISOR_shared_info->events_mask, ci->ci_ilevel,
- ci->ci_ipending);
-#endif
-
- return (ret);
-}
-
-void do_hypervisor_callback(struct intrframe *regs)
-{
- uint32_t l1;
- unsigned long l2;
- unsigned int l1i, l2i, port;
- int irq;
- shared_info_t *s = HYPERVISOR_shared_info;
- struct cpu_info *ci;
- int level;
-
- ci = curcpu();
- level = ci->ci_ilevel;
-
- while (s->vcpu_data[0].evtchn_upcall_pending) {
- s->vcpu_data[0].evtchn_upcall_pending = 0;
- /* NB. No need for a barrier here -- XCHG is a barrier
- * on x86. */
- l1 = x86_atomic_xchg(&s->evtchn_pending_sel, 0);
- while ((l1i = ffs(l1)) != 0) {
- l1i--;
- l1 &= ~(1 << l1i);
-
- l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i];
- while ((l2i = ffs(l2)) != 0) {
- l2i--;
- l2 &= ~(1 << l2i);
-
- port = (l1i << 5) + l2i;
- if ((irq = evtchn_to_irq[port]) != -1)
- do_event(irq, regs);
-#if 0 /* XXXcl dev/evtchn */
- else
- evtchn_device_upcall(port);
-#endif
- }
- }
- }
-
-#ifdef DIAGNOSTIC
- if (level != ci->ci_ilevel)
- printf("hypervisor done %08x level %d/%d ipending %08x\n",
- HYPERVISOR_shared_info->evtchn_pending_sel, level,
- ci->ci_ilevel, ci->ci_ipending);
-#endif
-}
-
-void hypervisor_unmask_event(unsigned int ev)
-{
- shared_info_t *s = HYPERVISOR_shared_info;
-
- x86_atomic_clear_bit(&s->evtchn_mask[0], ev);
- /*
- * The following is basically the equivalent of
- * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the
- * interrupt edge' if the channel is masked.
- */
- if (x86_atomic_test_bit(&s->evtchn_pending[0], ev) &&
- !x86_atomic_test_and_set_bit(&s->evtchn_pending_sel, ev>>5)) {
- s->vcpu_data[0].evtchn_upcall_pending = 1;
- if (!s->vcpu_data[0].evtchn_upcall_mask)
- hypervisor_force_callback();
- }
-}
-
-void hypervisor_mask_event(unsigned int ev)
-{
- shared_info_t *s = HYPERVISOR_shared_info;
-
- x86_atomic_set_bit(&s->evtchn_mask[0], ev);
-}
-
-void hypervisor_clear_event(unsigned int ev)
-{
- shared_info_t *s = HYPERVISOR_shared_info;
-
- x86_atomic_clear_bit(&s->evtchn_pending[0], ev);
-}
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,1998 +0,0 @@
-/* $NetBSD: locore.S,v 1.2.2.1 2004/05/22 15:59:48 he Exp $ */
-/* NetBSD: locore.S,v 1.26 2004/04/12 13:17:46 yamt Exp */
-
-/*-
- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Charles M. Hannum.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)locore.s 7.3 (Berkeley) 5/13/91
- */
-
-#include "opt_compat_netbsd.h"
-#include "opt_compat_oldboot.h"
-#include "opt_cputype.h"
-#include "opt_ddb.h"
-#include "opt_ipkdb.h"
-#include "opt_lockdebug.h"
-#include "opt_multiprocessor.h"
-#include "opt_realmem.h"
-#include "opt_user_ldt.h"
-#include "opt_vm86.h"
-#include "opt_xen.h"
-
-#include "npx.h"
-#include "assym.h"
-#include "apm.h"
-#include "lapic.h"
-#include "ioapic.h"
-#include "ksyms.h"
-
-#include <sys/errno.h>
-#include <sys/syscall.h>
-
-#include <machine/cputypes.h>
-#include <machine/param.h>
-#include <machine/pte.h>
-#include <machine/segments.h>
-#include <machine/specialreg.h>
-#include <machine/trap.h>
-#include <machine/bootinfo.h>
-
-#if NLAPIC > 0
-#include <machine/i82489reg.h>
-#endif
-
-/* LINTSTUB: include <sys/types.h> */
-/* LINTSTUB: include <machine/cpu.h> */
-/* LINTSTUB: include <sys/systm.h> */
-
-#include <machine/asm.h>
-
-#if defined(MULTIPROCESSOR)
-
-#define SET_CURLWP(lwp,cpu) \
- movl CPUVAR(SELF),cpu ; \
- movl lwp,CPUVAR(CURLWP) ; \
- movl cpu,L_CPU(lwp)
-
-#else
-
-#define SET_CURLWP(lwp,tcpu) movl lwp,CPUVAR(CURLWP)
-#define GET_CURLWP(reg) movl CPUVAR(CURLWP),reg
-
-#endif
-
-#define GET_CURPCB(reg) movl CPUVAR(CURPCB),reg
-#define SET_CURPCB(reg) movl reg,CPUVAR(CURPCB)
-
-#define CLEAR_RESCHED(reg) movl reg,CPUVAR(RESCHED)
-
-/* XXX temporary kluge; these should not be here */
-/* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
-#include <dev/isa/isareg.h>
-
-
-/* Disallow old names for REALBASEMEM */
-#ifdef BIOSBASEMEM
-#error BIOSBASEMEM option deprecated; use REALBASEMEM only if memory size
reported by latest boot block is incorrect
-#endif
-
-/* Disallow old names for REALEXTMEM */
-#ifdef EXTMEM_SIZE
-#error EXTMEM_SIZE option deprecated; use REALEXTMEM only if memory size
reported by latest boot block is incorrect
-#endif
-#ifdef BIOSEXTMEM
-#error BIOSEXTMEM option deprecated; use REALEXTMEM only if memory size
reported by latest boot block is incorrect
-#endif
-
-#include <machine/frameasm.h>
-
-
-#ifdef MULTIPROCESSOR
-#include <machine/i82489reg.h>
-#endif
-
-/*
- * PTmap is recursive pagemap at top of virtual address space.
- * Within PTmap, the page directory can be found (third indirection).
- *
- * XXX 4 == sizeof pde
- */
- .set _C_LABEL(PTmap),(PDSLOT_PTE << PDSHIFT)
- .set _C_LABEL(PTD),(_C_LABEL(PTmap) + PDSLOT_PTE * PAGE_SIZE)
- .set _C_LABEL(PTDpde),(_C_LABEL(PTD) + PDSLOT_PTE * 4)
-
-/*
- * APTmap, APTD is the alternate recursive pagemap.
- * It's used when modifying another process's page tables.
- *
- * XXX 4 == sizeof pde
- */
- .set _C_LABEL(APTmap),(PDSLOT_APTE << PDSHIFT)
- .set _C_LABEL(APTD),(_C_LABEL(APTmap) + PDSLOT_APTE * PAGE_SIZE)
- .set _C_LABEL(APTDpde),(_C_LABEL(PTD) + PDSLOT_APTE * 4)
-
-
-/*
- * Xen guest identifier and loader selection
- */
-.section __xen_guest
- .ascii "GUEST_OS=netbsd,GUEST_VER=2.0,XEN_VER=3.0"
- .ascii ",LOADER=generic"
-#if (NKSYMS || defined(DDB) || defined(LKM)) && !defined(SYMTAB_SPACE)
- .ascii ",BSD_SYMTAB"
-#endif
- .byte 0
-
-
-/*
- * Initialization
- */
- .data
-
- .globl _C_LABEL(cpu)
- .globl _C_LABEL(boothowto)
- .globl _C_LABEL(bootinfo),_C_LABEL(atdevbase)
-#ifdef COMPAT_OLDBOOT
- .globl _C_LABEL(bootdev)
-#endif
- .globl _C_LABEL(proc0paddr),_C_LABEL(PTDpaddr)
- .globl _C_LABEL(biosbasemem),_C_LABEL(biosextmem)
- .globl _C_LABEL(gdt)
-#ifdef I586_CPU
- .globl _C_LABEL(idt)
-#endif
- .globl _C_LABEL(lapic_tpr)
-
-#if NLAPIC > 0
-#ifdef __ELF__
- .align PAGE_SIZE
-#else
- .align 12
-#endif
- .globl _C_LABEL(local_apic), _C_LABEL(lapic_id)
-_C_LABEL(local_apic):
- .space LAPIC_ID
-_C_LABEL(lapic_id):
- .long 0x00000000
- .space LAPIC_TPRI-(LAPIC_ID+4)
-_C_LABEL(lapic_tpr):
- .space LAPIC_PPRI-LAPIC_TPRI
-_C_LABEL(lapic_ppr):
- .space LAPIC_ISR-LAPIC_PPRI
-_C_LABEL(lapic_isr):
- .space PAGE_SIZE-LAPIC_ISR
-#else
-_C_LABEL(lapic_tpr):
- .long 0
-#endif
-
-
-_C_LABEL(cpu): .long 0 # are we 386, 386sx, or 486,
- # or Pentium, or..
-_C_LABEL(atdevbase): .long 0 # location of start of iomem in virtual
-_C_LABEL(proc0paddr): .long 0
-_C_LABEL(PTDpaddr): .long 0 # paddr of PTD, for libkvm
-#ifndef REALBASEMEM
-_C_LABEL(biosbasemem): .long 0 # base memory reported by BIOS
-#else
-_C_LABEL(biosbasemem): .long REALBASEMEM
-#endif
-#ifndef REALEXTMEM
-_C_LABEL(biosextmem): .long 0 # extended memory reported by BIOS
-#else
-_C_LABEL(biosextmem): .long REALEXTMEM
-#endif
-
-#include <machine/xen.h>
-#define __HYPERVISOR_yield 8
-#define __SCHEDOP_yield 0
-
- .space 512
-tmpstk:
- .long tmpstk, __KERNEL_DS
-
-
-#define _RELOC(x) ((x))
-#define RELOC(x) _RELOC(_C_LABEL(x))
-
- .text
- .globl _C_LABEL(kernel_text)
- .set _C_LABEL(kernel_text),KERNTEXTOFF
-
- .globl start
-start:
- cld
-
- lss tmpstk,%esp # bootstrap stack end location
-
- movl %esi,%ebx # save start_info pointer
-
- /* Clear BSS first so that there are no surprises... */
- xorl %eax,%eax
- movl $RELOC(__bss_start),%edi
- movl $RELOC(_end),%ecx
- subl %edi,%ecx
- rep stosb
-
- movl %ebx,RELOC(avail_start)
-
- /* Copy the necessary stuff from start_info structure. */
- /* We need to copy shared_info early, so that sti/cli work */
- movl %ebx,%esi
- movl $RELOC(start_info_union),%edi
- movl $128,%ecx
- rep movsl
-
- /* (howto, [bootdev], bootinfo, basemem, extmem). */
- xorl %eax,%eax
- movl %eax,RELOC(boothowto)
-#ifdef COMPAT_OLDBOOT
- movl %eax,RELOC(bootdev)
-#endif
- movl $0x20000,%eax
- movl %eax,RELOC(boothowto)
-
- /* First, reset the PSL. */
- pushl $PSL_MBO
- popfl
-
- /* Clear segment registers; always null in proc0. */
- xorl %eax,%eax
- movw %ax,%fs
- movw %ax,%gs
- decl %eax
- movl %eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL
-
- xorl %eax,%eax
- cpuid
- movl %eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL
-
-/*
- * Virtual address space of kernel:
- *
- * text | data | bss | [syms] | page dir | proc0 kstack
- * 0 1 2 3
- */
-#define PROC0PDIR ((0) * PAGE_SIZE)
-#define PROC0STACK ((1) * PAGE_SIZE)
-#define SYSMAP ((1+UPAGES) * PAGE_SIZE)
-#define TABLESIZE ((1+UPAGES) * PAGE_SIZE) /* + nkpde * PAGE_SIZE
*/
-
- /* Find end of kernel image. */
- movl RELOC(avail_start),%edi
- /* Calculate where to start the bootstrap tables. */
- movl %edi,%esi
-
- /*
- * Calculate the size of the kernel page table directory, and
- * how many entries it will have.
- */
- movl RELOC(nkpde),%ecx # get nkpde
- cmpl $NKPTP_MIN,%ecx # larger than min?
- jge 1f
- movl $NKPTP_MIN,%ecx # set at min
- jmp 2f
-1: cmpl $NKPTP_MAX,%ecx # larger than max?
- jle 2f
- movl $NKPTP_MAX,%ecx
-2:
-
- /* Clear memory for bootstrap tables. */
- shll $PGSHIFT,%ecx
- addl $TABLESIZE,%ecx
- addl %esi,%ecx # end of tables
- movl %ecx,RELOC(gdt)
- addl $PAGE_SIZE,%ecx
- movl %ecx,RELOC(avail_start)
- subl %edi,%ecx # size of tables
- shrl $2,%ecx
- xorl %eax,%eax
- cld
- rep
- stosl
-
-/*
- * fillkpt
- * eax = pte (page frame | control | status)
- * ebx = page table address
- * ecx = number of pages to map
- */
-#define fillkpt \
-1: movl %eax,(%ebx) ; \
- addl $PAGE_SIZE,%eax ; /* increment physical address */ \
- addl $4,%ebx ; /* next pte */ \
- loop 1b ;
-
-/*
- * Build initial page tables.
- */
- /* Calculate end of text segment, rounded to a page. */
- leal (RELOC(etext)+PGOFSET),%edx
- andl $~PGOFSET,%edx
-
- /* Skip over the first 1MB. */
- movl $KERNTEXTOFF,%eax
- movl %eax,%ecx
- subl $KERNBASE_LOCORE,%ecx
- shrl $PGSHIFT,%ecx
- leal (SYSMAP)(%esi,%ecx,4),%ebx
-
- /* Map the kernel text read-only. */
- movl %edx,%ecx
- subl %eax,%ecx
- shrl $PGSHIFT,%ecx
- orl $(PG_V|PG_KR),%eax
- fillkpt
-
- /* Map the data, BSS, and bootstrap tables read-write. */
- movl RELOC(avail_start),%ecx
- # end of tables
- subl %edx,%ecx # subtract end of text
- shrl $PGSHIFT,%ecx
- leal (PG_V|PG_KW)(%edx),%eax
- fillkpt
-
- movl $0xffffffff,(%ebx)
- addl $4,%ebx
-
-/*
- * Construct a page table directory.
- */
- /* Map kernel PDEs. */
- movl RELOC(nkpde),%ecx # for this many pde s,
- leal (PROC0PDIR+PDSLOT_KERN*4)(%esi),%ebx # kernel pde offset
- leal (SYSMAP+PG_V|PG_KW)(%esi),%eax # pte for KPT in proc 0,
- fillkpt
-
- /* Install a PDE recursively mapping page directory as a page table! */
- leal (PROC0PDIR+PG_V/*|PG_KW*/)(%esi),%eax # pte for ptd
- movl %eax,(PROC0PDIR+PDSLOT_PTE*4)(%esi) # recursive PD slot
-
- /* Save phys. addr of PTD, for libkvm. */
- movl %esi,RELOC(PTDpaddr)
-
- call xpmap_init
-
- /* cr0 is 0x8005003b */
-
- /* Relocate atdevbase. */
- movl _C_LABEL(avail_start),%edx
- movl %edx,_C_LABEL(HYPERVISOR_shared_info)
- addl $PAGE_SIZE,%edx # shared_inf
- movl %edx,_C_LABEL(atdevbase)
-
- /* Set up bootstrap stack. */
- leal (PROC0STACK)(%esi),%eax
- movl %eax,_C_LABEL(proc0paddr)
- leal (USPACE-FRAMESIZE)(%eax),%esp
- subl $KERNBASE_LOCORE,%esi
- movl %esi,PCB_CR3(%eax) # pcb->pcb_cr3
- xorl %ebp,%ebp # mark end of frames
-
- movl _C_LABEL(atdevbase),%eax
- pushl %eax
- call _C_LABEL(init386) # wire 386 chip for unix operation
- addl $4,%esp
-
-#ifdef SAFARI_FIFO_HACK
- movb $5,%al
- movw $0x37b,%dx
- outb %al,%dx
- movw $0x37f,%dx
- inb %dx,%al
- movb %al,%cl
-
- orb $1,%cl
-
- movb $5,%al
- movw $0x37b,%dx
- outb %al,%dx
- movw $0x37f,%dx
- movb %cl,%al
- outb %al,%dx
-#endif /* SAFARI_FIFO_HACK */
-
- call _C_LABEL(main)
-
-/*
- * void proc_trampoline(void);
- * This is a trampoline function pushed onto the stack of a newly created
- * process in order to do some additional setup. The trampoline is entered by
- * cpu_switch()ing to the process, so we abuse the callee-saved registers used
- * by cpu_switch() to store the information about the stub to call.
- * NOTE: This function does not have a normal calling sequence!
- */
-/* LINTSTUB: Func: void proc_trampoline(void) */
-NENTRY(proc_trampoline)
-#ifdef MULTIPROCESSOR
- call _C_LABEL(proc_trampoline_mp)
-#endif
- movl $IPL_NONE,CPUVAR(ILEVEL)
- pushl %ebx
- call *%esi
- addl $4,%esp
- DO_DEFERRED_SWITCH(%eax)
- INTRFASTEXIT
- /* NOTREACHED */
-
-/*****************************************************************************/
-#ifdef COMPAT_16
-/*
- * Signal trampoline; copied to top of user stack.
- */
-/* LINTSTUB: Var: char sigcode[1], esigcode[1]; */
-NENTRY(sigcode)
- /*
- * Handler has returned here as if we called it. The sigcontext
- * is on the stack after the 3 args "we" pushed.
- */
- leal 12(%esp),%eax # get pointer to sigcontext
- movl %eax,4(%esp) # put it in the argument slot
- # fake return address already there
- movl $SYS_compat_16___sigreturn14,%eax
- int $0x80 # enter kernel with args on stack
- movl $SYS_exit,%eax
- int $0x80 # exit if sigreturn fails
- .globl _C_LABEL(esigcode)
-_C_LABEL(esigcode):
-#endif
-
-/*****************************************************************************/
-
-/*
- * The following primitives are used to fill and copy regions of memory.
- */
-
-/*
- * XXX No section 9 man page for fillw.
- * fillw seems to be very sparsely used (only in pccons it seems.)
- * One wonders if it couldn't be done without.
- * -- Perry Metzger, May 7, 2001
- */
-/*
- * void fillw(short pattern, void *addr, size_t len);
- * Write len copies of pattern at addr.
- */
-/* LINTSTUB: Func: void fillw(short pattern, void *addr, size_t len) */
-ENTRY(fillw)
- pushl %edi
- movl 8(%esp),%eax
- movl 12(%esp),%edi
- movw %ax,%cx
- rorl $16,%eax
- movw %cx,%ax
- cld
- movl 16(%esp),%ecx
- shrl %ecx # do longwords
- rep
- stosl
- movl 16(%esp),%ecx
- andl $1,%ecx # do remainder
- rep
- stosw
- popl %edi
- ret
-
-/*
- * int kcopy(const void *from, void *to, size_t len);
- * Copy len bytes, abort on fault.
- */
-/* LINTSTUB: Func: int kcopy(const void *from, void *to, size_t len) */
-ENTRY(kcopy)
- pushl %esi
- pushl %edi
- GET_CURPCB(%eax) # load curpcb into eax and set on-fault
- pushl PCB_ONFAULT(%eax)
- movl $_C_LABEL(kcopy_fault), PCB_ONFAULT(%eax)
-
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- movl 24(%esp),%ecx
- movl %edi,%eax
- subl %esi,%eax
- cmpl %ecx,%eax # overlapping?
- jb 1f
- cld # nope, copy forward
- shrl $2,%ecx # copy by 32-bit words
- rep
- movsl
- movl 24(%esp),%ecx
- andl $3,%ecx # any bytes left?
- rep
- movsb
-
- GET_CURPCB(%edx) # XXX save curpcb?
- popl PCB_ONFAULT(%edx)
- popl %edi
- popl %esi
- xorl %eax,%eax
- ret
-
- ALIGN_TEXT
-1: addl %ecx,%edi # copy backward
- addl %ecx,%esi
- std
- andl $3,%ecx # any fractional bytes?
- decl %edi
- decl %esi
- rep
- movsb
- movl 24(%esp),%ecx # copy remainder by 32-bit words
- shrl $2,%ecx
- subl $3,%esi
- subl $3,%edi
- rep
- movsl
- cld
-
- GET_CURPCB(%edx)
- popl PCB_ONFAULT(%edx)
- popl %edi
- popl %esi
- xorl %eax,%eax
- ret
-
-/*****************************************************************************/
-
-/*
- * The following primitives are used to copy data in and out of the user's
- * address space.
- */
-
-/*
- * Default to the lowest-common-denominator. We will improve it
- * later.
- */
-#if defined(I386_CPU)
-#define DEFAULT_COPYOUT _C_LABEL(i386_copyout)
-#define DEFAULT_COPYIN _C_LABEL(i386_copyin)
-#elif defined(I486_CPU)
-#define DEFAULT_COPYOUT _C_LABEL(i486_copyout)
-#define DEFAULT_COPYIN _C_LABEL(i386_copyin)
-#elif defined(I586_CPU)
-#define DEFAULT_COPYOUT _C_LABEL(i486_copyout) /* XXX */
-#define DEFAULT_COPYIN _C_LABEL(i386_copyin) /* XXX */
-#elif defined(I686_CPU)
-#define DEFAULT_COPYOUT _C_LABEL(i486_copyout) /* XXX */
-#define DEFAULT_COPYIN _C_LABEL(i386_copyin) /* XXX */
-#endif
-
- .data
-
- .globl _C_LABEL(copyout_func)
-_C_LABEL(copyout_func):
- .long DEFAULT_COPYOUT
-
- .globl _C_LABEL(copyin_func)
-_C_LABEL(copyin_func):
- .long DEFAULT_COPYIN
-
- .text
-
-/*
- * int copyout(const void *from, void *to, size_t len);
- * Copy len bytes into the user's address space.
- * see copyout(9)
- */
-/* LINTSTUB: Func: int copyout(const void *kaddr, void *uaddr, size_t len) */
-ENTRY(copyout)
- DO_DEFERRED_SWITCH(%eax)
- jmp *_C_LABEL(copyout_func)
-
-#if defined(I386_CPU)
-/* LINTSTUB: Func: int i386_copyout(const void *kaddr, void *uaddr, size_t
len) */
-ENTRY(i386_copyout)
- pushl %esi
- pushl %edi
- pushl $0
-
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- movl 24(%esp),%eax
-
- /*
- * We check that the end of the destination buffer is not past the end
- * of the user's address space. If it's not, then we only need to
- * check that each page is writable. The 486 will do this for us; the
- * 386 will not. (We assume that pages in user space that are not
- * writable by the user are not writable by the kernel either.)
- */
- movl %edi,%edx
- addl %eax,%edx
- jc _C_LABEL(copy_efault)
- cmpl $VM_MAXUSER_ADDRESS,%edx
- ja _C_LABEL(copy_efault)
-
- testl %eax,%eax # anything to do?
- jz 3f
-
- /*
- * We have to check each PTE for (write) permission, since the CPU
- * doesn't do it for us.
- */
-
- /* Compute number of pages. */
- movl %edi,%ecx
- andl $PGOFSET,%ecx
- addl %eax,%ecx
- decl %ecx
- shrl $PGSHIFT,%ecx
-
- /* Compute PTE offset for start address. */
- shrl $PGSHIFT,%edi
-
- GET_CURPCB(%edx)
- movl $2f,PCB_ONFAULT(%edx)
-
-1: /* Check PTE for each page. */
- testb $PG_RW,_C_LABEL(PTmap)(,%edi,4)
- jz 2f
-
-4: incl %edi
- decl %ecx
- jns 1b
-
- movl 20(%esp),%edi
- movl 24(%esp),%eax
- jmp 3f
-
-2: /* Simulate a trap. */
- pushl %ecx
- movl %edi,%eax
- shll $PGSHIFT,%eax
- pushl %eax
- call _C_LABEL(trapwrite) # trapwrite(addr)
- addl $4,%esp # pop argument
- popl %ecx
- testl %eax,%eax # if not ok, return EFAULT
- jz 4b
- jmp _C_LABEL(copy_efault)
-
-3: GET_CURPCB(%edx)
- movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx)
-
- /* bcopy(%esi, %edi, %eax); */
- cld
- movl %eax,%ecx
- shrl $2,%ecx
- rep
- movsl
- movl %eax,%ecx
- andl $3,%ecx
- rep
- movsb
-
- popl PCB_ONFAULT(%edx)
- popl %edi
- popl %esi
- xorl %eax,%eax
- ret
-#endif /* I386_CPU */
-
-#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
-/* LINTSTUB: Func: int i486_copyout(const void *kaddr, void *uaddr, size_t
len) */
-ENTRY(i486_copyout)
- pushl %esi
- pushl %edi
- pushl $0
-
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- movl 24(%esp),%eax
-
- /*
- * We check that the end of the destination buffer is not past the end
- * of the user's address space.
- */
- movl %edi,%edx
- addl %eax,%edx
- jc _C_LABEL(copy_efault)
- cmpl $VM_MAXUSER_ADDRESS,%edx
- ja _C_LABEL(copy_efault)
-
- GET_CURPCB(%edx)
- movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx)
-
- /* bcopy(%esi, %edi, %eax); */
- cld
- movl %eax,%ecx
- shrl $2,%ecx
- rep
- movsl
- movl %eax,%ecx
- andl $3,%ecx
- rep
- movsb
-
- popl PCB_ONFAULT(%edx)
- popl %edi
- popl %esi
- xorl %eax,%eax
- ret
-#endif /* I486_CPU || I586_CPU || I686_CPU */
-
-/*
- * int copyin(const void *from, void *to, size_t len);
- * Copy len bytes from the user's address space.
- * see copyin(9)
- */
-/* LINTSTUB: Func: int copyin(const void *uaddr, void *kaddr, size_t len) */
-ENTRY(copyin)
- DO_DEFERRED_SWITCH(%eax)
- jmp *_C_LABEL(copyin_func)
-
-#if defined(I386_CPU) || defined(I486_CPU) || defined(I586_CPU) || \
- defined(I686_CPU)
-/* LINTSTUB: Func: int i386_copyin(const void *uaddr, void *kaddr, size_t len)
*/
-ENTRY(i386_copyin)
- pushl %esi
- pushl %edi
- GET_CURPCB(%eax)
- pushl $0
- movl $_C_LABEL(copy_fault),PCB_ONFAULT(%eax)
-
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- movl 24(%esp),%eax
-
- /*
- * We check that the end of the destination buffer is not past the end
- * of the user's address space. If it's not, then we only need to
- * check that each page is readable, and the CPU will do that for us.
- */
- movl %esi,%edx
- addl %eax,%edx
- jc _C_LABEL(copy_efault)
- cmpl $VM_MAXUSER_ADDRESS,%edx
- ja _C_LABEL(copy_efault)
-
- /* bcopy(%esi, %edi, %eax); */
- cld
- movl %eax,%ecx
- shrl $2,%ecx
- rep
- movsl
- movl %eax,%ecx
- andl $3,%ecx
- rep
- movsb
-
- GET_CURPCB(%edx)
- popl PCB_ONFAULT(%edx)
- popl %edi
- popl %esi
- xorl %eax,%eax
- ret
-#endif /* I386_CPU || I486_CPU || I586_CPU || I686_CPU */
-
-/* LINTSTUB: Ignore */
-NENTRY(copy_efault)
- movl $EFAULT,%eax
-
-/*
- * kcopy_fault is used by kcopy and copy_fault is used by copyin/out.
- *
- * they're distinguished for lazy pmap switching. see trap().
- */
-/* LINTSTUB: Ignore */
-NENTRY(kcopy_fault)
- GET_CURPCB(%edx)
- popl PCB_ONFAULT(%edx)
- popl %edi
- popl %esi
- ret
-
-/* LINTSTUB: Ignore */
-NENTRY(copy_fault)
- GET_CURPCB(%edx)
- popl PCB_ONFAULT(%edx)
- popl %edi
- popl %esi
- ret
-
-/*
- * int copyoutstr(const void *from, void *to, size_t maxlen, size_t
*lencopied);
- * Copy a NUL-terminated string, at most maxlen characters long, into the
- * user's address space. Return the number of characters copied (including the
- * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else
- * return 0 or EFAULT.
- * see copyoutstr(9)
- */
-/* LINTSTUB: Func: int copyoutstr(const void *kaddr, void *uaddr, size_t len,
size_t *done) */
-ENTRY(copyoutstr)
- pushl %esi
- pushl %edi
-
- DO_DEFERRED_SWITCH(%eax)
-
- movl 12(%esp),%esi # esi = from
- movl 16(%esp),%edi # edi = to
- movl 20(%esp),%edx # edx = maxlen
-
-#if defined(I386_CPU)
-#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
- cmpl $CPUCLASS_386,_C_LABEL(cpu_class)
- jne 5f
-#endif /* I486_CPU || I586_CPU || I686_CPU */
-
- /* Compute number of bytes in first page. */
- movl %edi,%eax
- andl $PGOFSET,%eax
- movl $PAGE_SIZE,%ecx
- subl %eax,%ecx # ecx = PAGE_SIZE - (src % PAGE_SIZE)
-
- GET_CURPCB(%eax)
- movl $6f,PCB_ONFAULT(%eax)
-
-1: /*
- * Once per page, check that we are still within the bounds of user
- * space, and check for a write fault.
- */
- cmpl $VM_MAXUSER_ADDRESS,%edi
- jae _C_LABEL(copystr_efault)
-
- /* Compute PTE offset. */
- movl %edi,%eax
- shrl $PGSHIFT,%eax # calculate pte address
-
- testb $PG_RW,_C_LABEL(PTmap)(,%eax,4)
- jnz 2f
-
-6: /* Simulate a trap. */
- pushl %edx
- pushl %edi
- call _C_LABEL(trapwrite) # trapwrite(addr)
- addl $4,%esp # clear argument from stack
- popl %edx
- testl %eax,%eax
- jnz _C_LABEL(copystr_efault)
-
-2: /* Copy up to end of this page. */
- subl %ecx,%edx # predecrement total count
- jnc 3f
- addl %edx,%ecx # ecx += (edx - ecx) = edx
- xorl %edx,%edx
-
-3: decl %ecx
- js 4f
- lodsb
- stosb
- testb %al,%al
- jnz 3b
-
- /* Success -- 0 byte reached. */
- addl %ecx,%edx # add back residual for this page
- xorl %eax,%eax
- jmp copystr_return
-
-4: /* Go to next page, if any. */
- movl $PAGE_SIZE,%ecx
- testl %edx,%edx
- jnz 1b
-
- /* edx is zero -- return ENAMETOOLONG. */
- movl $ENAMETOOLONG,%eax
- jmp copystr_return
-#endif /* I386_CPU */
-
-#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
-5: GET_CURPCB(%eax)
- movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax)
- /*
- * Get min(%edx, VM_MAXUSER_ADDRESS-%edi).
- */
- movl $VM_MAXUSER_ADDRESS,%eax
- subl %edi,%eax
- cmpl %edx,%eax
- jae 1f
- movl %eax,%edx
- movl %eax,20(%esp)
-
-1: incl %edx
- cld
-
-1: decl %edx
- jz 2f
- lodsb
- stosb
- testb %al,%al
- jnz 1b
-
- /* Success -- 0 byte reached. */
- decl %edx
- xorl %eax,%eax
- jmp copystr_return
-
-2: /* edx is zero -- return EFAULT or ENAMETOOLONG. */
- cmpl $VM_MAXUSER_ADDRESS,%edi
- jae _C_LABEL(copystr_efault)
- movl $ENAMETOOLONG,%eax
- jmp copystr_return
-#endif /* I486_CPU || I586_CPU || I686_CPU */
-
-/*
- * int copyinstr(const void *from, void *to, size_t maxlen, size_t *lencopied);
- * Copy a NUL-terminated string, at most maxlen characters long, from the
- * user's address space. Return the number of characters copied (including the
- * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else
- * return 0 or EFAULT.
- * see copyinstr(9)
- */
-/* LINTSTUB: Func: int copyinstr(const void *uaddr, void *kaddr, size_t len,
size_t *done) */
-ENTRY(copyinstr)
- pushl %esi
- pushl %edi
-
- DO_DEFERRED_SWITCH(%eax)
-
- GET_CURPCB(%ecx)
- movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx)
-
- movl 12(%esp),%esi # %esi = from
- movl 16(%esp),%edi # %edi = to
- movl 20(%esp),%edx # %edx = maxlen
-
- /*
- * Get min(%edx, VM_MAXUSER_ADDRESS-%esi).
- */
- movl $VM_MAXUSER_ADDRESS,%eax
- subl %esi,%eax
- cmpl %edx,%eax
- jae 1f
- movl %eax,%edx
- movl %eax,20(%esp)
-
-1: incl %edx
- cld
-
-1: decl %edx
- jz 2f
- lodsb
- stosb
- testb %al,%al
- jnz 1b
-
- /* Success -- 0 byte reached. */
- decl %edx
- xorl %eax,%eax
- jmp copystr_return
-
-2: /* edx is zero -- return EFAULT or ENAMETOOLONG. */
- cmpl $VM_MAXUSER_ADDRESS,%esi
- jae _C_LABEL(copystr_efault)
- movl $ENAMETOOLONG,%eax
- jmp copystr_return
-
-/* LINTSTUB: Ignore */
-NENTRY(copystr_efault)
- movl $EFAULT,%eax
-
-/* LINTSTUB: Ignore */
-NENTRY(copystr_fault)
-copystr_return:
- /* Set *lencopied and return %eax. */
- GET_CURPCB(%ecx)
- movl $0,PCB_ONFAULT(%ecx)
- movl 20(%esp),%ecx
- subl %edx,%ecx
- movl 24(%esp),%edx
- testl %edx,%edx
- jz 8f
- movl %ecx,(%edx)
-
-8: popl %edi
- popl %esi
- ret
-
-/*
- * int copystr(const void *from, void *to, size_t maxlen, size_t *lencopied);
- * Copy a NUL-terminated string, at most maxlen characters long. Return the
- * number of characters copied (including the NUL) in *lencopied. If the
- * string is too long, return ENAMETOOLONG; else return 0.
- * see copystr(9)
- */
-/* LINTSTUB: Func: int copystr(const void *kfaddr, void *kdaddr, size_t len,
size_t *done) */
-ENTRY(copystr)
- pushl %esi
- pushl %edi
-
- movl 12(%esp),%esi # esi = from
- movl 16(%esp),%edi # edi = to
- movl 20(%esp),%edx # edx = maxlen
- incl %edx
- cld
-
-1: decl %edx
- jz 4f
- lodsb
- stosb
- testb %al,%al
- jnz 1b
-
- /* Success -- 0 byte reached. */
- decl %edx
- xorl %eax,%eax
- jmp 6f
-
-4: /* edx is zero -- return ENAMETOOLONG. */
- movl $ENAMETOOLONG,%eax
-
-6: /* Set *lencopied and return %eax. */
- movl 20(%esp),%ecx
- subl %edx,%ecx
- movl 24(%esp),%edx
- testl %edx,%edx
- jz 7f
- movl %ecx,(%edx)
-
-7: popl %edi
- popl %esi
- ret
-
-/*
- * long fuword(const void *uaddr);
- * Fetch an int from the user's address space.
- * see fuword(9)
- */
-/* LINTSTUB: Func: long fuword(const void *base) */
-ENTRY(fuword)
- DO_DEFERRED_SWITCH(%eax)
- movl 4(%esp),%edx
- cmpl $VM_MAXUSER_ADDRESS-4,%edx
- ja _C_LABEL(fusuaddrfault)
- GET_CURPCB(%ecx)
- movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
- movl (%edx),%eax
- movl $0,PCB_ONFAULT(%ecx)
- ret
-
-/*
- * int fusword(const void *uaddr);
- * Fetch a short from the user's address space.
- * see fusword(9)
- */
-/* LINTSTUB: Func: int fusword(const void *base) */
-ENTRY(fusword)
- DO_DEFERRED_SWITCH(%eax)
- movl 4(%esp),%edx
- cmpl $VM_MAXUSER_ADDRESS-2,%edx
- ja _C_LABEL(fusuaddrfault)
- GET_CURPCB(%ecx)
- movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
- movzwl (%edx),%eax
- movl $0,PCB_ONFAULT(%ecx)
- ret
-
-/*
- * int fuswintr(const void *uaddr);
- * Fetch a short from the user's address space. Can be called during an
- * interrupt.
- * see fuswintr(9)
- */
-/* LINTSTUB: Func: int fuswintr(const void *base) */
-ENTRY(fuswintr)
- cmpl $TLBSTATE_VALID, CPUVAR(TLBSTATE)
- jnz _C_LABEL(fusuaddrfault)
- movl 4(%esp),%edx
- cmpl $VM_MAXUSER_ADDRESS-2,%edx
- ja _C_LABEL(fusuaddrfault)
- movl CPUVAR(CURLWP),%ecx
- movl L_ADDR(%ecx),%ecx
- movl $_C_LABEL(fusubail),PCB_ONFAULT(%ecx)
- movzwl (%edx),%eax
- movl $0,PCB_ONFAULT(%ecx)
- ret
-
-/*
- * int fubyte(const void *uaddr);
- * Fetch a byte from the user's address space.
- * see fubyte(9)
- */
-/* LINTSTUB: Func: int fubyte(const void *base) */
-ENTRY(fubyte)
- DO_DEFERRED_SWITCH(%eax)
- movl 4(%esp),%edx
- cmpl $VM_MAXUSER_ADDRESS-1,%edx
- ja _C_LABEL(fusuaddrfault)
- GET_CURPCB(%ecx)
- movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
- movzbl (%edx),%eax
- movl $0,PCB_ONFAULT(%ecx)
- ret
-
-/*
- * Handle faults from [fs]u*(). Clean up and return -1.
- */
-/* LINTSTUB: Ignore */
-NENTRY(fusufault)
- movl $0,PCB_ONFAULT(%ecx)
- movl $-1,%eax
- ret
-
-/*
- * Handle faults from [fs]u*(). Clean up and return -1. This differs from
- * fusufault() in that trap() will recognize it and return immediately rather
- * than trying to page fault.
- */
-/* LINTSTUB: Ignore */
-NENTRY(fusubail)
- movl $0,PCB_ONFAULT(%ecx)
- movl $-1,%eax
- ret
-
-/*
- * Handle earlier faults from [fs]u*(), due to our of range addresses.
- */
-/* LINTSTUB: Ignore */
-NENTRY(fusuaddrfault)
- movl $-1,%eax
- ret
-
-/*
- * int suword(void *uaddr, long x);
- * Store an int in the user's address space.
- * see suword(9)
- */
-/* LINTSTUB: Func: int suword(void *base, long c) */
-ENTRY(suword)
- DO_DEFERRED_SWITCH(%eax)
- movl 4(%esp),%edx
- cmpl $VM_MAXUSER_ADDRESS-4,%edx
- ja _C_LABEL(fusuaddrfault)
-
-#if defined(I386_CPU)
-#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
- cmpl $CPUCLASS_386,_C_LABEL(cpu_class)
- jne 2f
-#endif /* I486_CPU || I586_CPU || I686_CPU */
-
- GET_CURPCB(%eax)
- movl $3f,PCB_ONFAULT(%eax)
-
- movl %edx,%eax
- shrl $PGSHIFT,%eax # calculate pte address
- testb $PG_RW,_C_LABEL(PTmap)(,%eax,4)
- jnz 1f
-
-3: /* Simulate a trap. */
- pushl %edx
- pushl %edx
- call _C_LABEL(trapwrite) # trapwrite(addr)
- addl $4,%esp # clear parameter from the stack
- popl %edx
- GET_CURPCB(%ecx)
- testl %eax,%eax
- jnz _C_LABEL(fusufault)
-
-1: /* XXX also need to check the following 3 bytes for validity! */
-#endif
-
-2: GET_CURPCB(%ecx)
- movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
-
- movl 8(%esp),%eax
- movl %eax,(%edx)
- xorl %eax,%eax
- movl %eax,PCB_ONFAULT(%ecx)
- ret
-
-/*
- * int susword(void *uaddr, short x);
- * Store a short in the user's address space.
- * see susword(9)
- */
-/* LINTSTUB: Func: int susword(void *base, short c) */
-ENTRY(susword)
- DO_DEFERRED_SWITCH(%eax)
- movl 4(%esp),%edx
- cmpl $VM_MAXUSER_ADDRESS-2,%edx
- ja _C_LABEL(fusuaddrfault)
-
-#if defined(I386_CPU)
-#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
- cmpl $CPUCLASS_386,_C_LABEL(cpu_class)
- jne 2f
-#endif /* I486_CPU || I586_CPU || I686_CPU */
-
- GET_CURPCB(%eax)
- movl $3f,PCB_ONFAULT(%eax)
-
- movl %edx,%eax
- shrl $PGSHIFT,%eax # calculate pte address
- testb $PG_RW,_C_LABEL(PTmap)(,%eax,4)
- jnz 1f
-
-3: /* Simulate a trap. */
- pushl %edx
- pushl %edx
- call _C_LABEL(trapwrite) # trapwrite(addr)
- addl $4,%esp # clear parameter from the stack
- popl %edx
- GET_CURPCB(%ecx)
- testl %eax,%eax
- jnz _C_LABEL(fusufault)
-
-1: /* XXX also need to check the following byte for validity! */
-#endif
-
-2: GET_CURPCB(%ecx)
- movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
-
- movl 8(%esp),%eax
- movw %ax,(%edx)
- xorl %eax,%eax
- movl %eax,PCB_ONFAULT(%ecx)
- ret
-
-/*
- * int suswintr(void *uaddr, short x);
- * Store a short in the user's address space. Can be called during an
- * interrupt.
- * see suswintr(9)
- */
-/* LINTSTUB: Func: int suswintr(void *base, short c) */
-ENTRY(suswintr)
- cmpl $TLBSTATE_VALID, CPUVAR(TLBSTATE)
- jnz _C_LABEL(fusuaddrfault)
- movl 4(%esp),%edx
- cmpl $VM_MAXUSER_ADDRESS-2,%edx
- ja _C_LABEL(fusuaddrfault)
- movl CPUVAR(CURLWP),%ecx
- movl L_ADDR(%ecx),%ecx
- movl $_C_LABEL(fusubail),PCB_ONFAULT(%ecx)
-
-#if defined(I386_CPU)
-#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
- cmpl $CPUCLASS_386,_C_LABEL(cpu_class)
- jne 2f
-#endif /* I486_CPU || I586_CPU || I686_CPU */
-
- movl %edx,%eax
- shrl $PGSHIFT,%eax # calculate pte address
- testb $PG_RW,_C_LABEL(PTmap)(,%eax,4)
- jnz 1f
-
- /* Simulate a trap. */
- jmp _C_LABEL(fusubail)
-
-1: /* XXX also need to check the following byte for validity! */
-#endif
-
-2: movl 8(%esp),%eax
- movw %ax,(%edx)
- xorl %eax,%eax
- movl %eax,PCB_ONFAULT(%ecx)
- ret
-
-/*
- * int subyte(void *uaddr, char x);
- * Store a byte in the user's address space.
- * see subyte(9)
- */
-/* LINTSTUB: Func: int subyte(void *base, int c) */
-ENTRY(subyte)
- DO_DEFERRED_SWITCH(%eax)
- movl 4(%esp),%edx
- cmpl $VM_MAXUSER_ADDRESS-1,%edx
- ja _C_LABEL(fusuaddrfault)
-
-#if defined(I386_CPU)
-#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
- cmpl $CPUCLASS_386,_C_LABEL(cpu_class)
- jne 2f
-#endif /* I486_CPU || I586_CPU || I686_CPU */
-
- GET_CURPCB(%eax)
- movl $3f,PCB_ONFAULT(%eax)
-
- movl %edx,%eax
- shrl $PGSHIFT,%eax # calculate pte address
- testb $PG_RW,_C_LABEL(PTmap)(,%eax,4)
- jnz 1f
-
-3: /* Simulate a trap. */
- pushl %edx
- pushl %edx
- call _C_LABEL(trapwrite) # trapwrite(addr)
- addl $4,%esp # clear parameter from the stack
- popl %edx
- GET_CURPCB(%ecx)
- testl %eax,%eax
- jnz _C_LABEL(fusufault)
-
-1:
-#endif
-
-2: GET_CURPCB(%ecx)
- movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx)
-
- movb 8(%esp),%al
- movb %al,(%edx)
- xorl %eax,%eax
- movl %eax,PCB_ONFAULT(%ecx)
- ret
-
-/*****************************************************************************/
-
-/*
- * The following is i386-specific nonsense.
- */
-
-/*
- * void lgdt_finish(void);
- * Finish load a new GDT pointer (do any necessary cleanup).
- * XXX It's somewhat questionable whether reloading all the segment registers
- * is necessary, since the actual descriptor data is not changed except by
- * process creation and exit, both of which clean up via task switches. OTOH,
- * this only happens at run time when the GDT is resized.
- */
-/* LINTSTUB: Func: void lgdt_finish(void) */
-NENTRY(lgdt_finish)
- movl $GSEL(GDATA_SEL, SEL_KPL),%eax
- movw %ax,%ds
- movw %ax,%es
- movw %ax,%gs
- movw %ax,%ss
- movl $GSEL(GCPU_SEL, SEL_KPL),%eax
- movw %ax,%fs
- /* Reload code selector by doing intersegment return. */
- popl %eax
- pushl $GSEL(GCODE_SEL, SEL_KPL)
- pushl %eax
- lret
-
-/*****************************************************************************/
-
-/*
- * These functions are primarily used by DDB.
- */
-
-/* LINTSTUB: Func: int setjmp (label_t *l) */
-ENTRY(setjmp)
- movl 4(%esp),%eax
- movl %ebx,(%eax) # save ebx
- movl %esp,4(%eax) # save esp
- movl %ebp,8(%eax) # save ebp
- movl %esi,12(%eax) # save esi
- movl %edi,16(%eax) # save edi
- movl (%esp),%edx # get rta
- movl %edx,20(%eax) # save eip
- xorl %eax,%eax # return (0);
- ret
-
-/* LINTSTUB: Func: void longjmp (label_t *l) */
-ENTRY(longjmp)
- movl 4(%esp),%eax
- movl (%eax),%ebx # restore ebx
- movl 4(%eax),%esp # restore esp
- movl 8(%eax),%ebp # restore ebp
- movl 12(%eax),%esi # restore esi
- movl 16(%eax),%edi # restore edi
- movl 20(%eax),%edx # get rta
- movl %edx,(%esp) # put in return frame
- xorl %eax,%eax # return (1);
- incl %eax
- ret
-
-/*****************************************************************************/
-
- .globl _C_LABEL(sched_whichqs),_C_LABEL(sched_qs)
- .globl _C_LABEL(uvmexp),_C_LABEL(panic)
-
-#ifdef DIAGNOSTIC
-NENTRY(switch_error)
- pushl $1f
-3: call _C_LABEL(panic)
- /* NOTREACHED */
-1: .asciz "cpu_switch"
-#endif /* DIAGNOSTIC */
-
-/*
- * void cpu_switch(struct lwp *)
- * Find a runnable process and switch to it. Wait if necessary. If the new
- * process is the same as the old one, we short-circuit the context save and
- * restore.
- *
- * Note that the stack frame layout is known to "struct switchframe"
- * in <machine/frame.h> and to the code in cpu_fork() which initializes
- * it for a new lwp.
- */
-ENTRY(cpu_switch)
- pushl %ebx
- pushl %esi
- pushl %edi
-
-#ifdef DEBUG
- cmpl $IPL_SCHED,CPUVAR(ILEVEL)
- jae 1f
- pushl $2f
- call _C_LABEL(panic)
- /* NOTREACHED */
-2: .asciz "not splsched() in cpu_switch!"
-1:
-#endif /* DEBUG */
-
- movl 16(%esp),%esi # current
-
- /*
- * Clear curlwp so that we don't accumulate system time while idle.
- * This also insures that schedcpu() will move the old lwp to
- * the correct queue if it happens to get called from the spllower()
- * below and changes the priority. (See corresponding comment in
- * userret()).
- */
- movl $0,CPUVAR(CURLWP)
- /*
- * First phase: find new lwp.
- *
- * Registers:
- * %eax - queue head, scratch, then zero
- * %ebx - queue number
- * %ecx - cached value of whichqs
- * %edx - next lwp in queue
- * %esi - old lwp
- * %edi - new lwp
- */
-
- /* Look for new lwp. */
- CLI(%ecx) # splhigh doesn't do a cli
- movl _C_LABEL(sched_whichqs),%ecx
- bsfl %ecx,%ebx # find a full q
- jnz switch_dequeue
-
- /*
- * idling: save old context.
- *
- * Registers:
- * %eax, %ecx - scratch
- * %esi - old lwp, then old pcb
- * %edi - idle pcb
- */
-
- pushl %esi
- call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc)
- addl $4,%esp
-
- movl L_ADDR(%esi),%esi
-
- /* Save stack pointers. */
- movl %esp,PCB_ESP(%esi)
- movl %ebp,PCB_EBP(%esi)
-
- /* Find idle PCB for this CPU */
-#ifndef MULTIPROCESSOR
- movl $_C_LABEL(lwp0),%ebx
- movl L_ADDR(%ebx),%edi
- movl L_MD_TSS_SEL(%ebx),%edx
-#else
- movl CPUVAR(IDLE_PCB),%edi
- movl CPUVAR(IDLE_TSS_SEL),%edx
-#endif
- movl $0,CPUVAR(CURLWP) /* In case we fault... */
-
- /* Restore the idle context (avoid interrupts) */
- CLI(%ecx)
-
- /* Restore stack pointers. */
- movl PCB_ESP(%edi),%esp
- movl PCB_EBP(%edi),%ebp
-
- pushl %edi
- call _C_LABEL(i386_switch_context)
- addl $4,%esp
-
- /* Record new pcb. */
- SET_CURPCB(%edi)
-
- xorl %esi,%esi
- STI(%eax)
-idle_unlock:
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
- call _C_LABEL(sched_unlock_idle)
-#endif
- /* Interrupts are okay again. */
- pushl $IPL_NONE # spl0()
- call _C_LABEL(Xspllower) # process pending interrupts
- addl $4,%esp
- jmp idle_start
-idle_zero:
- STIC(%eax)
- jz 4f
- call _C_LABEL(stipending)
- testl %eax,%eax
- jz 4f
- pushl $IPL_NONE
- call _C_LABEL(Xspllower)
- addl $4,%esp
- jmp idle_start
-4:
- call _C_LABEL(uvm_pageidlezero)
- CLI(%eax)
- cmpl $0,_C_LABEL(sched_whichqs)
- jnz idle_exit
-idle_loop:
- /* Try to zero some pages. */
- movl _C_LABEL(uvm)+UVM_PAGE_IDLE_ZERO,%ecx
- testl %ecx,%ecx
- jnz idle_zero
- call _C_LABEL(idle_block)
- cmpl $0,_C_LABEL(sched_whichqs)
- jnz idle_exit
- STIC(%eax)
- jz 4f
- call _C_LABEL(stipending)
- testl %eax,%eax
- jz 4f
- pushl $IPL_NONE
- call _C_LABEL(Xspllower)
- addl $4,%esp
- jmp idle_start
-4:
- movl $__HYPERVISOR_yield,%eax
- movl $__SCHEDOP_yield,%ebx
- TRAP_INSTR
-NENTRY(mpidle)
-idle_start:
- CLI(%eax)
- cmpl $0,_C_LABEL(sched_whichqs)
- jz idle_loop
-idle_exit:
- movl $IPL_HIGH,CPUVAR(ILEVEL) # splhigh
- STI(%eax)
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
- call _C_LABEL(sched_lock_idle)
-#endif
- movl _C_LABEL(sched_whichqs),%ecx
- bsfl %ecx,%ebx
- jz idle_unlock
-
-#ifdef XENDEBUG_LOW
- pushl %ecx
- call _C_LABEL(xen_dbg1)
- xorl %ecx,%ecx
- movl %ecx,_C_LABEL(xen_once)
- popl %ecx
-#endif
-switch_dequeue:
- /*
- * we're running at splhigh(), but it's otherwise okay to take
- * interrupts here.
- */
- STI(%edi)
- leal _C_LABEL(sched_qs)(,%ebx,8),%eax # select q
-
- movl L_FORW(%eax),%edi # unlink from front of process q
-#ifdef DIAGNOSTIC
- cmpl %edi,%eax # linked to self (i.e. nothing queued)?
- je _C_LABEL(switch_error) # not possible
-#endif /* DIAGNOSTIC */
- movl L_FORW(%edi),%edx
- movl %edx,L_FORW(%eax)
- movl %eax,L_BACK(%edx)
-
- cmpl %edx,%eax # q empty?
- jne 3f
-
- btrl %ebx,%ecx # yes, clear to indicate empty
- movl %ecx,_C_LABEL(sched_whichqs) # update q status
-
-3: /* We just did it. */
- xorl %eax,%eax
- CLEAR_RESCHED(%eax)
-
-switch_resume:
-#ifdef DIAGNOSTIC
- cmpl %eax,L_WCHAN(%edi) # Waiting for something?
- jne _C_LABEL(switch_error) # Yes; shouldn't be queued.
- cmpb $LSRUN,L_STAT(%edi) # In run state?
- jne _C_LABEL(switch_error) # No; shouldn't be queued.
-#endif /* DIAGNOSTIC */
-
- /* Isolate lwp. XXX Is this necessary? */
- movl %eax,L_BACK(%edi)
-
- /* Record new lwp. */
- movb $LSONPROC,L_STAT(%edi) # l->l_stat = LSONPROC
- SET_CURLWP(%edi,%ecx)
-
- /* Skip context switch if same lwp. */
- xorl %ebx,%ebx
- cmpl %edi,%esi
- je switch_return
-
- /* If old lwp exited, don't bother. */
- testl %esi,%esi
- jz switch_exited
-
- /*
- * Second phase: save old context.
- *
- * Registers:
- * %eax, %ecx - scratch
- * %esi - old lwp, then old pcb
- * %edi - new lwp
- */
-
- pushl %esi
- call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc)
- addl $4,%esp
-
- movl L_ADDR(%esi),%esi
-
- /* Save stack pointers. */
- movl %esp,PCB_ESP(%esi)
- movl %ebp,PCB_EBP(%esi)
-
-switch_exited:
- /*
- * Third phase: restore saved context.
- *
- * Registers:
- * %eax, %ebx, %ecx, %edx - scratch
- * %esi - new pcb
- * %edi - new lwp
- */
-
- /* No interrupts while loading new state. */
- CLI(%eax)
- movl L_ADDR(%edi),%esi
-
- /* Restore stack pointers. */
- movl PCB_ESP(%esi),%esp
- movl PCB_EBP(%esi),%ebp
-
-#if 0
- /* Don't bother with the rest if switching to a system process. */
- testl $P_SYSTEM,L_FLAG(%edi); XXX NJWLWP lwp's don't have P_SYSTEM!
- jnz switch_restored ; XXX skip stack_switch+pmap_activate
-#endif
-
- pushl %edi
- call _C_LABEL(pmap_activate) # pmap_activate(p)
- addl $4,%esp
-
- pushl %esi
- call _C_LABEL(i386_switch_context)
- addl $4,%esp
-
- /* Record new pcb. */
- SET_CURPCB(%esi)
-
- /* Interrupts are okay again. */
- STI(%edi)
-
-/*
- * Check for restartable atomic sequences (RAS)
- */
- movl CPUVAR(CURLWP),%edi
- movl L_PROC(%edi),%esi
- cmpl $0,P_RASLIST(%esi)
- jne 2f
-1:
- movl $1,%ebx
-
-switch_return:
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
- call _C_LABEL(sched_unlock_idle)
-#endif
- pushl $IPL_NONE # spl0()
- call _C_LABEL(Xspllower) # process pending interrupts
- addl $4,%esp
- movl $IPL_HIGH,CPUVAR(ILEVEL) # splhigh()
-
- movl %ebx,%eax
-
- popl %edi
- popl %esi
- popl %ebx
- ret
-
-2: # check RAS list
- movl L_MD_REGS(%edi),%ebx
- movl TF_EIP(%ebx),%eax
- pushl %eax
- pushl %esi
- call _C_LABEL(ras_lookup)
- addl $8,%esp
- cmpl $-1,%eax
- je 1b
- movl %eax,TF_EIP(%ebx)
- jmp 1b
-
-/*
- * void cpu_switchto(struct lwp *current, struct lwp *next)
- * Switch to the specified next LWP.
- */
-ENTRY(cpu_switchto)
- pushl %ebx
- pushl %esi
- pushl %edi
-
-#ifdef DEBUG
- cmpl $IPL_SCHED,CPUVAR(ILEVEL)
- jae 1f
- pushl $2f
- call _C_LABEL(panic)
- /* NOTREACHED */
-2: .asciz "not splsched() in cpu_switchto!"
-1:
-#endif /* DEBUG */
-
- movl 16(%esp),%esi # current
- movl 20(%esp),%edi # next
-
- /*
- * Clear curlwp so that we don't accumulate system time while idle.
- * This also insures that schedcpu() will move the old process to
- * the correct queue if it happens to get called from the spllower()
- * below and changes the priority. (See corresponding comment in
- * usrret()).
- *
- * XXX Is this necessary? We know we won't go idle.
- */
- movl $0,CPUVAR(CURLWP)
-
- /*
- * We're running at splhigh(), but it's otherwise okay to take
- * interrupts here.
- */
- STI(%eax)
-
- /* Jump into the middle of cpu_switch */
- xorl %eax,%eax
- jmp switch_resume
-
-/*
- * void cpu_exit(struct lwp *l)
- * Switch to the appropriate idle context (lwp0's if uniprocessor; the CPU's
- * if multiprocessor) and deallocate the address space and kernel stack for p.
- * Then jump into cpu_switch(), as if we were in the idle proc all along.
- */
-#ifndef MULTIPROCESSOR
- .globl _C_LABEL(lwp0)
-#endif
- .globl _C_LABEL(uvmspace_free),_C_LABEL(kernel_map)
- .globl _C_LABEL(uvm_km_free),_C_LABEL(tss_free)
-/* LINTSTUB: Func: void cpu_exit(struct lwp *l) */
-ENTRY(cpu_exit)
- movl 4(%esp),%edi # old process
-#ifndef MULTIPROCESSOR
- movl $_C_LABEL(lwp0),%ebx
- movl L_ADDR(%ebx),%esi
- movl L_MD_TSS_SEL(%ebx),%edx
-#else
- movl CPUVAR(IDLE_PCB),%esi
- movl CPUVAR(IDLE_TSS_SEL),%edx
-#endif
- /* In case we fault... */
- movl $0,CPUVAR(CURLWP)
-
- /* Restore the idle context. */
- CLI(%eax)
-
- /* Restore stack pointers. */
- movl PCB_ESP(%esi),%esp
- movl PCB_EBP(%esi),%ebp
-
- pushl %esi
- call _C_LABEL(i386_switch_context)
- addl $4,%esp
-
- /* Record new pcb. */
- SET_CURPCB(%esi)
-
- /* Interrupts are okay again. */
- STI(%eax)
-
- /*
- * Schedule the dead LWP's stack to be freed.
- */
- pushl %edi
- call _C_LABEL(lwp_exit2)
- addl $4,%esp
-
- /* Jump into cpu_switch() with the right state. */
- xorl %esi,%esi
- movl %esi,CPUVAR(CURLWP)
- jmp idle_start
-
-/*
- * void savectx(struct pcb *pcb);
- * Update pcb, saving current processor state.
- */
-/* LINTSTUB: Func: void savectx(struct pcb *pcb) */
-ENTRY(savectx)
- movl 4(%esp),%edx # edx = p->p_addr
-
- /* Save stack pointers. */
- movl %esp,PCB_ESP(%edx)
- movl %ebp,PCB_EBP(%edx)
-
- ret
-
-/*
- * Old call gate entry for syscall
- */
-/* LINTSTUB: Var: char Xosyscall[1]; */
-IDTVEC(osyscall)
- /* Set eflags in trap frame. */
- pushfl
- popl 8(%esp)
- pushl $7 # size of instruction for restart
- jmp syscall1
-
-/*
- * Trap gate entry for syscall
- */
-/* LINTSTUB: Var: char Xsyscall[1]; */
-IDTVEC(syscall)
- pushl $2 # size of instruction for restart
-syscall1:
- pushl $T_ASTFLT # trap # for doing ASTs
- INTRENTRY
-
-#ifdef DIAGNOSTIC
- cmpl $0, CPUVAR(WANT_PMAPLOAD)
- jz 1f
- pushl $6f
- call _C_LABEL(printf)
- addl $4, %esp
-1:
- movl CPUVAR(ILEVEL),%ebx
- testl %ebx,%ebx
- jz 1f
- pushl $5f
- call _C_LABEL(printf)
- addl $4,%esp
-#ifdef DDB
- int $3
-#endif
-1:
-#endif /* DIAGNOSTIC */
- movl CPUVAR(CURLWP),%edx
- movl %esp,L_MD_REGS(%edx) # save pointer to frame
- movl L_PROC(%edx),%edx
- pushl %esp
- call *P_MD_SYSCALL(%edx) # get pointer to syscall() function
- addl $4,%esp
-syscall_checkast:
- /* Check for ASTs on exit to user mode. */
- CLI(%eax)
- CHECK_ASTPENDING(%eax)
- je 1f
- /* Always returning to user mode here. */
- CLEAR_ASTPENDING(%eax)
- STI(%eax)
- /* Pushed T_ASTFLT into tf_trapno on entry. */
- pushl %esp
- call _C_LABEL(trap)
- addl $4,%esp
- jmp syscall_checkast
-1: STI(%eax)
- CHECK_DEFERRED_SWITCH(%eax)
- jnz 9f
-#ifndef DIAGNOSTIC
- INTRFASTEXIT
-#else /* DIAGNOSTIC */
- cmpl $IPL_NONE,CPUVAR(ILEVEL)
- jne 3f
- INTRFASTEXIT
-3: pushl $4f
- call _C_LABEL(printf)
- addl $4,%esp
-#ifdef DDB
- int $3
-#endif /* DDB */
- movl $IPL_NONE,CPUVAR(ILEVEL)
- jmp 2b
-4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL EXIT\n"
-5: .asciz "WARNING: SPL NOT ZERO ON SYSCALL ENTRY\n"
-6: .asciz "WARNING: WANT PMAPLOAD ON SYSCALL ENTRY\n"
-#endif /* DIAGNOSTIC */
-9: call _C_LABEL(pmap_load)
- jmp syscall_checkast /* re-check ASTs */
-
-#if NNPX > 0
-/*
- * Special interrupt handlers. Someday intr0-intr15 will be used to count
- * interrupts. We'll still need a special exception 16 handler. The busy
- * latch stuff in probintr() can be moved to npxprobe().
- */
-
-/* LINTSTUB: Func: void probeintr(void) */
-NENTRY(probeintr)
- ss
- incl _C_LABEL(npx_intrs_while_probing)
- pushl %eax
- movb $0x20,%al # EOI (asm in strings loses cpp features)
- outb %al,$0xa0 # IO_ICU2
- outb %al,$0x20 # IO_ICU1
- movb $0,%al
- outb %al,$0xf0 # clear BUSY# latch
- popl %eax
- iret
-
-/* LINTSTUB: Func: void probetrap(void) */
-NENTRY(probetrap)
- ss
- incl _C_LABEL(npx_traps_while_probing)
- fnclex
- iret
-
-/* LINTSTUB: Func: int npx586bug1(int a, int b) */
-NENTRY(npx586bug1)
- fildl 4(%esp) # x
- fildl 8(%esp) # y
- fld %st(1)
- fdiv %st(1),%st # x/y
- fmulp %st,%st(1) # (x/y)*y
- fsubrp %st,%st(1) # x-(x/y)*y
- pushl $0
- fistpl (%esp)
- popl %eax
- ret
-#endif /* NNPX > 0 */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,2567 +0,0 @@
-/* $NetBSD: machdep.c,v 1.2.2.1 2004/05/22 15:58:02 he Exp $ */
-/* NetBSD: machdep.c,v 1.552 2004/03/24 15:34:49 atatat Exp */
-
-/*-
- * Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
- * Simulation Facility, NASA Ames Research Center.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*-
- * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)machdep.c 7.4 (Berkeley) 6/3/91
- */
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.2.2.1 2004/05/22 15:58:02 he Exp $");
-
-#include "opt_beep.h"
-#include "opt_compat_ibcs2.h"
-#include "opt_compat_mach.h" /* need to get the right segment def */
-#include "opt_compat_netbsd.h"
-#include "opt_compat_svr4.h"
-#include "opt_cpureset_delay.h"
-#include "opt_cputype.h"
-#include "opt_ddb.h"
-#include "opt_ipkdb.h"
-#include "opt_kgdb.h"
-#include "opt_mtrr.h"
-#include "opt_multiprocessor.h"
-#include "opt_realmem.h"
-#include "opt_user_ldt.h"
-#include "opt_vm86.h"
-#include "opt_xen.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/signal.h>
-#include <sys/signalvar.h>
-#include <sys/kernel.h>
-#include <sys/proc.h>
-#include <sys/user.h>
-#include <sys/exec.h>
-#include <sys/buf.h>
-#include <sys/reboot.h>
-#include <sys/conf.h>
-#include <sys/file.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/msgbuf.h>
-#include <sys/mount.h>
-#include <sys/vnode.h>
-#include <sys/extent.h>
-#include <sys/syscallargs.h>
-#include <sys/core.h>
-#include <sys/kcore.h>
-#include <sys/ucontext.h>
-#include <machine/kcore.h>
-#include <sys/ras.h>
-#include <sys/sa.h>
-#include <sys/savar.h>
-#include <sys/ksyms.h>
-
-#ifdef IPKDB
-#include <ipkdb/ipkdb.h>
-#endif
-
-#ifdef KGDB
-#include <sys/kgdb.h>
-#endif
-
-#include <dev/cons.h>
-
-#include <uvm/uvm_extern.h>
-#include <uvm/uvm_page.h>
-
-#include <sys/sysctl.h>
-
-#include <machine/cpu.h>
-#include <machine/cpufunc.h>
-#include <machine/cpuvar.h>
-#include <machine/gdt.h>
-#include <machine/pio.h>
-#include <machine/psl.h>
-#include <machine/reg.h>
-#include <machine/specialreg.h>
-#include <machine/bootinfo.h>
-#include <machine/mtrr.h>
-#include <machine/evtchn.h>
-
-#include <dev/isa/isareg.h>
-#include <machine/isa_machdep.h>
-#include <dev/ic/i8042reg.h>
-
-#ifdef DDB
-#include <machine/db_machdep.h>
-#include <ddb/db_extern.h>
-#endif
-
-#ifdef VM86
-#include <machine/vm86.h>
-#endif
-
-#include "acpi.h"
-#include "apm.h"
-#include "bioscall.h"
-
-#if NBIOSCALL > 0
-#include <machine/bioscall.h>
-#endif
-
-#if NACPI > 0
-#include <dev/acpi/acpivar.h>
-#define ACPI_MACHDEP_PRIVATE
-#include <machine/acpi_machdep.h>
-#endif
-
-#if NAPM > 0
-#include <machine/apmvar.h>
-#endif
-
-#include "isa.h"
-#include "isadma.h"
-#include "npx.h"
-#include "ksyms.h"
-
-#include "mca.h"
-#if NMCA > 0
-#include <machine/mca_machdep.h> /* for mca_busprobe() */
-#endif
-
-#ifdef MULTIPROCESSOR /* XXX */
-#include <machine/mpbiosvar.h> /* XXX */
-#endif /* XXX */
-
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-
-#if defined(DDB) || defined(KGDB)
-#include <ddb/db_interface.h>
-#include <ddb/db_output.h>
-
-void ddb_trap_hook(int);
-#endif
-
-/* #define XENDEBUG */
-/* #define XENDEBUG_LOW */
-
-#ifdef XENDEBUG
-extern void printk(char *, ...);
-#define XENPRINTF(x) printf x
-#define XENPRINTK(x) printk x
-#else
-#define XENPRINTF(x)
-#define XENPRINTK(x)
-#endif
-#define PRINTK(x) printf x
-
-#ifdef XENDEBUG_LOW
-void xen_dbglow_init(void);
-#endif
-
-#ifndef BEEP_ONHALT_COUNT
-#define BEEP_ONHALT_COUNT 3
-#endif
-#ifndef BEEP_ONHALT_PITCH
-#define BEEP_ONHALT_PITCH 1500
-#endif
-#ifndef BEEP_ONHALT_PERIOD
-#define BEEP_ONHALT_PERIOD 250
-#endif
-
-/* the following is used externally (sysctl_hw) */
-char machine[] = "i386"; /* CPU "architecture" */
-char machine_arch[] = "i386"; /* machine == machine_arch */
-
-char bootinfo[BOOTINFO_MAXSIZE];
-
-struct bi_devmatch *i386_alldisks = NULL;
-int i386_ndisks = 0;
-
-#ifdef CPURESET_DELAY
-int cpureset_delay = CPURESET_DELAY;
-#else
-int cpureset_delay = 2000; /* default to 2s */
-#endif
-
-#ifdef MTRR
-struct mtrr_funcs *mtrr_funcs;
-#endif
-
-#ifdef COMPAT_NOMID
-static int exec_nomid(struct proc *, struct exec_package *);
-#endif
-
-int physmem;
-int dumpmem_low;
-int dumpmem_high;
-unsigned int cpu_feature;
-int cpu_class;
-int i386_fpu_present;
-int i386_fpu_exception;
-int i386_fpu_fdivbug;
-
-int i386_use_fxsave;
-int i386_has_sse;
-int i386_has_sse2;
-
-int tmx86_has_longrun;
-
-vaddr_t msgbuf_vaddr;
-paddr_t msgbuf_paddr;
-
-vaddr_t idt_vaddr;
-paddr_t idt_paddr;
-
-#ifdef I586_CPU
-vaddr_t pentium_idt_vaddr;
-#endif
-
-struct vm_map *exec_map = NULL;
-struct vm_map *mb_map = NULL;
-struct vm_map *phys_map = NULL;
-
-extern paddr_t avail_start, avail_end;
-extern paddr_t pmap_pa_start, pmap_pa_end;
-
-#ifdef ISA_CLOCK
-void (*delay_func)(int) = i8254_delay;
-void (*microtime_func)(struct timeval *) = i8254_microtime;
-void (*initclock_func)(void) = i8254_initclocks;
-#else
-void (*delay_func)(int) = xen_delay;
-void (*microtime_func)(struct timeval *) = xen_microtime;
-void (*initclock_func)(void) = xen_initclocks;
-#endif
-
-void hypervisor_callback(void);
-void failsafe_callback(void);
-
-/*
- * Size of memory segments, before any memory is stolen.
- */
-phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
-int mem_cluster_cnt;
-
-int cpu_dump(void);
-int cpu_dumpsize(void);
-u_long cpu_dump_mempagecnt(void);
-void dumpsys(void);
-void init386(paddr_t);
-void initgdt(void);
-
-#if !defined(REALBASEMEM) && !defined(REALEXTMEM)
-void add_mem_cluster(u_int64_t, u_int64_t, u_int32_t);
-#endif /* !defnied(REALBASEMEM) && !defined(REALEXTMEM) */
-
-extern int time_adjusted;
-
-/*
- * Machine-dependent startup code
- */
-void
-cpu_startup()
-{
- int x;
- vaddr_t minaddr, maxaddr;
- char pbuf[9];
-
- /*
- * Initialize error message buffer (et end of core).
- */
- msgbuf_vaddr = uvm_km_valloc(kernel_map, x86_round_page(MSGBUFSIZE));
- if (msgbuf_vaddr == 0)
- panic("failed to valloc msgbuf_vaddr");
-
- /* msgbuf_paddr was init'd in pmap */
- for (x = 0; x < btoc(MSGBUFSIZE); x++)
- pmap_kenter_pa((vaddr_t)msgbuf_vaddr + x * PAGE_SIZE,
- msgbuf_paddr + x * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE);
- pmap_update(pmap_kernel());
-
- initmsgbuf((caddr_t)msgbuf_vaddr, round_page(MSGBUFSIZE));
-
- printf("%s", version);
-
-#ifdef TRAPLOG
- /*
- * Enable recording of branch from/to in MSR's
- */
- wrmsr(MSR_DEBUGCTLMSR, 0x1);
-#endif
-
- format_bytes(pbuf, sizeof(pbuf), ptoa(physmem));
- printf("total memory = %s\n", pbuf);
-
- minaddr = 0;
-
- /*
- * Allocate a submap for exec arguments. This map effectively
- * limits the number of processes exec'ing at any time.
- */
- exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
- 16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
-
- /*
- * Allocate a submap for physio
- */
- phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
- VM_PHYS_SIZE, 0, FALSE, NULL);
-
- /*
- * Finally, allocate mbuf cluster submap.
- */
- mb_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
- nmbclusters * mclbytes, VM_MAP_INTRSAFE, FALSE, NULL);
-
- format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
- printf("avail memory = %s\n", pbuf);
-
- /* Safe for i/o port / memory space allocation to use malloc now. */
- x86_bus_space_mallocok();
-}
-
-/*
- * Set up proc0's TSS and LDT.
- */
-void
-i386_proc0_tss_ldt_init()
-{
- struct pcb *pcb;
- int x;
-
- gdt_init();
-
- cpu_info_primary.ci_curpcb = pcb = &lwp0.l_addr->u_pcb;
-
- pcb->pcb_tss.tss_ioopt =
- ((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16
- | SEL_KPL; /* i/o pl */
-
- for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
- pcb->pcb_iomap[x] = 0xffffffff;
-
- pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
- pcb->pcb_cr0 = rcr0();
- pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
- pcb->pcb_tss.tss_esp0 = (int)lwp0.l_addr + USPACE - 16;
- lwp0.l_md.md_regs = (struct trapframe *)pcb->pcb_tss.tss_esp0 - 1;
- lwp0.l_md.md_tss_sel = tss_alloc(pcb);
-
-#ifndef XEN
- ltr(lwp0.l_md.md_tss_sel);
- lldt(pcb->pcb_ldt_sel);
-#else
- HYPERVISOR_fpu_taskswitch(1);
- XENPRINTF(("lwp tss sp %p ss %04x/%04x\n",
- (void *)pcb->pcb_tss.tss_esp0,
- pcb->pcb_tss.tss_ss0, IDXSEL(pcb->pcb_tss.tss_ss0)));
- HYPERVISOR_stack_switch(pcb->pcb_tss.tss_ss0, pcb->pcb_tss.tss_esp0);
-#endif
-}
-
-/*
- * Set up TSS and LDT for a new PCB.
- */
-
-void
-i386_init_pcb_tss_ldt(struct cpu_info *ci)
-{
- int x;
- struct pcb *pcb = ci->ci_idle_pcb;
-
- pcb->pcb_tss.tss_ioopt =
- ((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16
- | SEL_KPL; /* i/o pl */
- for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
- pcb->pcb_iomap[x] = 0xffffffff;
-
- pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
- pcb->pcb_cr0 = rcr0();
-
- ci->ci_idle_tss_sel = tss_alloc(pcb);
-}
-
-/*
- * Switch context:
- * - honor CR0_TS in saved CR0 and request DNA exception on FPU use
- * - switch stack pointer for user->kernel transition
- */
-void
-i386_switch_context(struct pcb *new)
-{
- dom0_op_t op;
- struct cpu_info *ci;
-
- ci = curcpu();
- if (ci->ci_fpused) {
- HYPERVISOR_fpu_taskswitch(1);
- ci->ci_fpused = 0;
- }
-
- HYPERVISOR_stack_switch(new->pcb_tss.tss_ss0, new->pcb_tss.tss_esp0);
-
- if (xen_start_info.flags & SIF_PRIVILEGED) {
- op.cmd = DOM0_IOPL;
- op.u.iopl.domain = DOMID_SELF;
- op.u.iopl.iopl = new->pcb_tss.tss_ioopt & SEL_RPL; /* i/o pl */
- HYPERVISOR_dom0_op(&op);
- }
-}
-
-/*
- * sysctl helper routine for machdep.tm* nodes.
- */
-static int
-sysctl_machdep_tm_longrun(SYSCTLFN_ARGS)
-{
- struct sysctlnode node;
- int io, error;
-
- if (!tmx86_has_longrun)
- return (EOPNOTSUPP);
-
- node = *rnode;
- node.sysctl_data = &io;
-
- switch (rnode->sysctl_num) {
- case CPU_TMLR_MODE:
- io = (int)(crusoe_longrun = tmx86_get_longrun_mode());
- break;
- case CPU_TMLR_FREQUENCY:
- tmx86_get_longrun_status_all();
- io = crusoe_frequency;
- break;
- case CPU_TMLR_VOLTAGE:
- tmx86_get_longrun_status_all();
- io = crusoe_voltage;
- break;
- case CPU_TMLR_PERCENTAGE:
- tmx86_get_longrun_status_all();
- io = crusoe_percentage;
- break;
- default:
- return (EOPNOTSUPP);
- }
-
- error = sysctl_lookup(SYSCTLFN_CALL(&node));
- if (error || newp == NULL)
- return (error);
-
- if (rnode->sysctl_num == CPU_TMLR_MODE) {
- if (tmx86_set_longrun_mode(io))
- crusoe_longrun = (u_int)io;
- else
- return (EINVAL);
- }
-
- return (0);
-}
-
-/*
- * sysctl helper routine for machdep.booted_kernel
- */
-static int
-sysctl_machdep_booted_kernel(SYSCTLFN_ARGS)
-{
- struct btinfo_bootpath *bibp;
- struct sysctlnode node;
-
- bibp = lookup_bootinfo(BTINFO_BOOTPATH);
- if(!bibp)
- return(ENOENT); /* ??? */
-
- node = *rnode;
- node.sysctl_data = bibp->bootpath;
- node.sysctl_size = sizeof(bibp->bootpath);
- return (sysctl_lookup(SYSCTLFN_CALL(&node)));
-}
-
-/*
- * sysctl helper routine for machdep.diskinfo
- */
-static int
-sysctl_machdep_diskinfo(SYSCTLFN_ARGS)
-{
- struct sysctlnode node;
-
- node = *rnode;
- node.sysctl_data = i386_alldisks;
- node.sysctl_size = sizeof(struct disklist) +
- (i386_ndisks - 1) * sizeof(struct nativedisk_info);
- return (sysctl_lookup(SYSCTLFN_CALL(&node)));
-}
-
-/*
- * machine dependent system variables.
- */
-SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
-{
-
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_NODE, "machdep", NULL,
- NULL, 0, NULL, 0,
- CTL_MACHDEP, CTL_EOL);
-
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_STRUCT, "console_device", NULL,
- sysctl_consdev, 0, NULL, sizeof(dev_t),
- CTL_MACHDEP, CPU_CONSDEV, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_INT, "biosbasemem", NULL,
- NULL, 0, &biosbasemem, 0,
- CTL_MACHDEP, CPU_BIOSBASEMEM, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_INT, "biosextmem", NULL,
- NULL, 0, &biosextmem, 0,
- CTL_MACHDEP, CPU_BIOSEXTMEM, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_INT, "nkpde", NULL,
- NULL, 0, &nkpde, 0,
- CTL_MACHDEP, CPU_NKPDE, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_STRING, "booted_kernel", NULL,
- sysctl_machdep_booted_kernel, 0, NULL, 0,
- CTL_MACHDEP, CPU_BOOTED_KERNEL, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_STRUCT, "diskinfo", NULL,
- sysctl_machdep_diskinfo, 0, NULL, 0,
- CTL_MACHDEP, CPU_DISKINFO, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_INT, "fpu_present", NULL,
- NULL, 0, &i386_fpu_present, 0,
- CTL_MACHDEP, CPU_FPU_PRESENT, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_INT, "osfxsr", NULL,
- NULL, 0, &i386_use_fxsave, 0,
- CTL_MACHDEP, CPU_OSFXSR, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_INT, "sse", NULL,
- NULL, 0, &i386_has_sse, 0,
- CTL_MACHDEP, CPU_SSE, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_INT, "sse2", NULL,
- NULL, 0, &i386_has_sse2, 0,
- CTL_MACHDEP, CPU_SSE2, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
- CTLTYPE_INT, "tm_longrun_mode", NULL,
- sysctl_machdep_tm_longrun, 0, NULL, 0,
- CTL_MACHDEP, CPU_TMLR_MODE, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_INT, "tm_longrun_frequency", NULL,
- sysctl_machdep_tm_longrun, 0, NULL, 0,
- CTL_MACHDEP, CPU_TMLR_FREQUENCY, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_INT, "tm_longrun_voltage", NULL,
- sysctl_machdep_tm_longrun, 0, NULL, 0,
- CTL_MACHDEP, CPU_TMLR_VOLTAGE, CTL_EOL);
- sysctl_createv(clog, 0, NULL, NULL,
- CTLFLAG_PERMANENT,
- CTLTYPE_INT, "tm_longrun_percentage", NULL,
- sysctl_machdep_tm_longrun, 0, NULL, 0,
- CTL_MACHDEP, CPU_TMLR_PERCENTAGE, CTL_EOL);
-}
-
-void *
-getframe(struct lwp *l, int sig, int *onstack)
-{
- struct proc *p = l->l_proc;
- struct sigctx *ctx = &p->p_sigctx;
- struct trapframe *tf = l->l_md.md_regs;
-
- /* Do we need to jump onto the signal stack? */
- *onstack = (ctx->ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0
- && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
- if (*onstack)
- return (char *)ctx->ps_sigstk.ss_sp + ctx->ps_sigstk.ss_size;
-#ifdef VM86
- if (tf->tf_eflags & PSL_VM)
- return (void *)(tf->tf_esp + (tf->tf_ss << 4));
- else
-#endif
- return (void *)tf->tf_esp;
-}
-
-/*
- * Build context to run handler in. We invoke the handler
- * directly, only returning via the trampoline. Note the
- * trampoline version numbers are coordinated with machine-
- * dependent code in libc.
- */
-void
-buildcontext(struct lwp *l, int sel, void *catcher, void *fp)
-{
- struct trapframe *tf = l->l_md.md_regs;
-
- tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
- tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
- tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
- tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
- tf->tf_eip = (int)catcher;
- tf->tf_cs = GSEL(sel, SEL_UPL);
- tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
- tf->tf_esp = (int)fp;
- tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
-}
-
-static void
-sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
-{
- struct lwp *l = curlwp;
- struct proc *p = l->l_proc;
- struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map);
- int sel = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
- GUCODEBIG_SEL : GUCODE_SEL;
- struct sigacts *ps = p->p_sigacts;
- int onstack;
- int sig = ksi->ksi_signo;
- struct sigframe_siginfo *fp = getframe(l, sig, &onstack), frame;
- sig_t catcher = SIGACTION(p, sig).sa_handler;
- struct trapframe *tf = l->l_md.md_regs;
-
- fp--;
-
- /* Build stack frame for signal trampoline. */
- switch (ps->sa_sigdesc[sig].sd_vers) {
- case 0: /* handled by sendsig_sigcontext */
- case 1: /* handled by sendsig_sigcontext */
- default: /* unknown version */
- printf("nsendsig: bad version %d\n",
- ps->sa_sigdesc[sig].sd_vers);
- sigexit(l, SIGILL);
- case 2:
- break;
- }
-
- frame.sf_ra = (int)ps->sa_sigdesc[sig].sd_tramp;
- frame.sf_signum = sig;
- frame.sf_sip = &fp->sf_si;
- frame.sf_ucp = &fp->sf_uc;
- frame.sf_si._info = ksi->ksi_info;
- frame.sf_uc.uc_flags = _UC_SIGMASK|_UC_VM;
- frame.sf_uc.uc_sigmask = *mask;
- frame.sf_uc.uc_link = NULL;
- frame.sf_uc.uc_flags |= (p->p_sigctx.ps_sigstk.ss_flags & SS_ONSTACK)
- ? _UC_SETSTACK : _UC_CLRSTACK;
- memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack));
- cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
-
- if (tf->tf_eflags & PSL_VM)
- (*p->p_emul->e_syscall_intern)(p);
-
- if (copyout(&frame, fp, sizeof(frame)) != 0) {
- /*
- * Process has trashed its stack; give it an illegal
- * instruction to halt it in its tracks.
- */
- sigexit(l, SIGILL);
- /* NOTREACHED */
- }
-
- buildcontext(l, sel, catcher, fp);
-
- /* Remember that we're now on the signal stack. */
- if (onstack)
- p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
-}
-
-void
-sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
-{
-#ifdef COMPAT_16
- if (curproc->p_sigacts->sa_sigdesc[ksi->ksi_signo].sd_vers < 2)
- sendsig_sigcontext(ksi, mask);
- else
-#endif
- sendsig_siginfo(ksi, mask);
-}
-
-void
-cpu_upcall(struct lwp *l, int type, int nevents, int ninterrupted, void *sas,
- void *ap, void *sp, sa_upcall_t upcall)
-{
- struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
- struct saframe *sf, frame;
- struct trapframe *tf;
-
- tf = l->l_md.md_regs;
-
- /* Finally, copy out the rest of the frame. */
- frame.sa_type = type;
- frame.sa_sas = sas;
- frame.sa_events = nevents;
- frame.sa_interrupted = ninterrupted;
- frame.sa_arg = ap;
- frame.sa_ra = 0;
-
- sf = (struct saframe *)sp - 1;
- if (copyout(&frame, sf, sizeof(frame)) != 0) {
- /* Copying onto the stack didn't work. Die. */
- sigexit(l, SIGILL);
- /* NOTREACHED */
- }
-
- tf->tf_eip = (int) upcall;
- tf->tf_esp = (int) sf;
- tf->tf_ebp = 0; /* indicate call-frame-top to debuggers */
- tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
- tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
- tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
- tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
- tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
- GSEL(GUCODEBIG_SEL, SEL_UPL) : GSEL(GUCODE_SEL, SEL_UPL);
- tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
- tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
-}
-
-int waittime = -1;
-struct pcb dumppcb;
-
-void
-cpu_reboot(int howto, char *bootstr)
-{
-
- if (cold) {
- howto |= RB_HALT;
- goto haltsys;
- }
-
- boothowto = howto;
- if ((howto & RB_NOSYNC) == 0 && waittime < 0) {
- waittime = 0;
- vfs_shutdown();
- /*
- * If we've been adjusting the clock, the todr
- * will be out of synch; adjust it now.
- */
- if (time_adjusted != 0)
- resettodr();
- }
-
- /* Disable interrupts. */
- splhigh();
-
- /* Do a dump if requested. */
- if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
- dumpsys();
-
-haltsys:
- doshutdownhooks();
-
-#ifdef MULTIPROCESSOR
- x86_broadcast_ipi(X86_IPI_HALT);
-#endif
-
- if ((howto & RB_POWERDOWN) == RB_POWERDOWN) {
-#if NACPI > 0
- if (acpi_softc != NULL) {
- delay(500000);
- acpi_enter_sleep_state(acpi_softc, ACPI_STATE_S5);
- printf("WARNING: ACPI powerdown failed!\n");
- }
-#endif
-#if NAPM > 0 && !defined(APM_NO_POWEROFF)
- /* turn off, if we can. But try to turn disk off and
- * wait a bit first--some disk drives are slow to clean up
- * and users have reported disk corruption.
- */
- delay(500000);
- apm_set_powstate(APM_DEV_DISK(0xff), APM_SYS_OFF);
- delay(500000);
- apm_set_powstate(APM_DEV_ALLDEVS, APM_SYS_OFF);
- printf("WARNING: APM powerdown failed!\n");
- /*
- * RB_POWERDOWN implies RB_HALT... fall into it...
- */
-#endif
- HYPERVISOR_shutdown();
- }
-
- if (howto & RB_HALT) {
- printf("\n");
- printf("The operating system has halted.\n");
-
- /* XXX cngetc() below doesn't work, shutdown machine for now */
- HYPERVISOR_shutdown();
-
- printf("Please press any key to reboot.\n\n");
-
-#ifdef BEEP_ONHALT
- {
- int c;
- for (c = BEEP_ONHALT_COUNT; c > 0; c--) {
- sysbeep(BEEP_ONHALT_PITCH,
- BEEP_ONHALT_PERIOD * hz / 1000);
- delay(BEEP_ONHALT_PERIOD * 1000);
- sysbeep(0, BEEP_ONHALT_PERIOD * hz / 1000);
- delay(BEEP_ONHALT_PERIOD * 1000);
- }
- }
-#endif
-
- cnpollc(1); /* for proper keyboard command handling */
- if (cngetc() == 0) {
- /* no console attached, so just hlt */
- for(;;) {
- __asm __volatile("hlt");
- }
- }
- cnpollc(0);
- }
-
- printf("rebooting...\n");
- if (cpureset_delay > 0)
- delay(cpureset_delay * 1000);
- cpu_reset();
- for(;;) ;
- /*NOTREACHED*/
-}
-
-/*
- * These variables are needed by /sbin/savecore
- */
-u_int32_t dumpmag = 0x8fca0101; /* magic number */
-int dumpsize = 0; /* pages */
-long dumplo = 0; /* blocks */
-
-/*
- * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers.
- */
-int
-cpu_dumpsize()
-{
- int size;
-
- size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) +
- ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t));
- if (roundup(size, dbtob(1)) != dbtob(1))
- return (-1);
-
- return (1);
-}
-
-/*
- * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped.
- */
-u_long
-cpu_dump_mempagecnt()
-{
- u_long i, n;
-
- n = 0;
- for (i = 0; i < mem_cluster_cnt; i++)
- n += atop(mem_clusters[i].size);
- return (n);
-}
-
-/*
- * cpu_dump: dump the machine-dependent kernel core dump headers.
- */
-int
-cpu_dump()
-{
- int (*dump)(dev_t, daddr_t, caddr_t, size_t);
- char buf[dbtob(1)];
- kcore_seg_t *segp;
- cpu_kcore_hdr_t *cpuhdrp;
- phys_ram_seg_t *memsegp;
- const struct bdevsw *bdev;
- int i;
-
- bdev = bdevsw_lookup(dumpdev);
- if (bdev == NULL)
- return (ENXIO);
- dump = bdev->d_dump;
-
- memset(buf, 0, sizeof buf);
- segp = (kcore_seg_t *)buf;
- cpuhdrp = (cpu_kcore_hdr_t *)&buf[ALIGN(sizeof(*segp))];
- memsegp = (phys_ram_seg_t *)&buf[ ALIGN(sizeof(*segp)) +
- ALIGN(sizeof(*cpuhdrp))];
-
- /*
- * Generate a segment header.
- */
- CORE_SETMAGIC(*segp, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
- segp->c_size = dbtob(1) - ALIGN(sizeof(*segp));
-
- /*
- * Add the machine-dependent header info.
- */
- cpuhdrp->ptdpaddr = PTDpaddr;
- cpuhdrp->nmemsegs = mem_cluster_cnt;
-
- /*
- * Fill in the memory segment descriptors.
- */
- for (i = 0; i < mem_cluster_cnt; i++) {
- memsegp[i].start = mem_clusters[i].start;
- memsegp[i].size = mem_clusters[i].size;
- }
-
- return (dump(dumpdev, dumplo, (caddr_t)buf, dbtob(1)));
-}
-
-/*
- * This is called by main to set dumplo and dumpsize.
- * Dumps always skip the first PAGE_SIZE of disk space
- * in case there might be a disk label stored there.
- * If there is extra space, put dump at the end to
- * reduce the chance that swapping trashes it.
- */
-void
-cpu_dumpconf()
-{
- const struct bdevsw *bdev;
- int nblks, dumpblks; /* size of dump area */
-
- if (dumpdev == NODEV)
- goto bad;
- bdev = bdevsw_lookup(dumpdev);
- if (bdev == NULL)
- panic("dumpconf: bad dumpdev=0x%x", dumpdev);
- if (bdev->d_psize == NULL)
- goto bad;
- nblks = (*bdev->d_psize)(dumpdev);
- if (nblks <= ctod(1))
- goto bad;
-
- dumpblks = cpu_dumpsize();
- if (dumpblks < 0)
- goto bad;
- dumpblks += ctod(cpu_dump_mempagecnt());
-
- /* If dump won't fit (incl. room for possible label), punt. */
- if (dumpblks > (nblks - ctod(1)))
- goto bad;
-
- /* Put dump at end of partition */
- dumplo = nblks - dumpblks;
-
- /* dumpsize is in page units, and doesn't include headers. */
- dumpsize = cpu_dump_mempagecnt();
- return;
-
- bad:
- dumpsize = 0;
-}
-
-/*
- * Doadump comes here after turning off memory management and
- * getting on the dump stack, either when called above, or by
- * the auto-restart code.
- */
-#define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small
*/
-static vaddr_t dumpspace;
-
-vaddr_t
-reserve_dumppages(vaddr_t p)
-{
-
- dumpspace = p;
- return (p + BYTES_PER_DUMP);
-}
-
-void
-dumpsys()
-{
- u_long totalbytesleft, bytes, i, n, memseg;
- u_long maddr;
- int psize;
- daddr_t blkno;
- const struct bdevsw *bdev;
- int (*dump)(dev_t, daddr_t, caddr_t, size_t);
- int error;
-
- /* Save registers. */
- savectx(&dumppcb);
-
- if (dumpdev == NODEV)
- return;
-
- bdev = bdevsw_lookup(dumpdev);
- if (bdev == NULL || bdev->d_psize == NULL)
- return;
-
- /*
- * For dumps during autoconfiguration,
- * if dump device has already configured...
- */
- if (dumpsize == 0)
- cpu_dumpconf();
- if (dumplo <= 0 || dumpsize == 0) {
- printf("\ndump to dev %u,%u not possible\n", major(dumpdev),
- minor(dumpdev));
- return;
- }
- printf("\ndumping to dev %u,%u offset %ld\n", major(dumpdev),
- minor(dumpdev), dumplo);
-
- psize = (*bdev->d_psize)(dumpdev);
- printf("dump ");
- if (psize == -1) {
- printf("area unavailable\n");
- return;
- }
-
-#if 0 /* XXX this doesn't work. grr. */
- /* toss any characters present prior to dump */
- while (sget() != NULL); /*syscons and pccons differ */
-#endif
-
- if ((error = cpu_dump()) != 0)
- goto err;
-
- totalbytesleft = ptoa(cpu_dump_mempagecnt());
- blkno = dumplo + cpu_dumpsize();
- dump = bdev->d_dump;
- error = 0;
-
- for (memseg = 0; memseg < mem_cluster_cnt; memseg++) {
- maddr = mem_clusters[memseg].start;
- bytes = mem_clusters[memseg].size;
-
- for (i = 0; i < bytes; i += n, totalbytesleft -= n) {
- /* Print out how many MBs we have left to go. */
- if ((totalbytesleft % (1024*1024)) == 0)
- printf("%ld ", totalbytesleft / (1024 * 1024));
-
- /* Limit size for next transfer. */
- n = bytes - i;
- if (n > BYTES_PER_DUMP)
- n = BYTES_PER_DUMP;
-
- (void) pmap_map(dumpspace, maddr, maddr + n,
- VM_PROT_READ);
-
- error = (*dump)(dumpdev, blkno, (caddr_t)dumpspace, n);
- if (error)
- goto err;
- maddr += n;
- blkno += btodb(n); /* XXX? */
-
-#if 0 /* XXX this doesn't work. grr. */
- /* operator aborting dump? */
- if (sget() != NULL) {
- error = EINTR;
- break;
- }
-#endif
- }
- }
-
- err:
- switch (error) {
-
- case ENXIO:
- printf("device bad\n");
- break;
-
- case EFAULT:
- printf("device not ready\n");
- break;
-
- case EINVAL:
- printf("area improper\n");
- break;
-
- case EIO:
- printf("i/o error\n");
- break;
-
- case EINTR:
- printf("aborted from console\n");
- break;
-
- case 0:
- printf("succeeded\n");
- break;
-
- default:
- printf("error %d\n", error);
- break;
- }
- printf("\n\n");
- delay(5000000); /* 5 seconds */
-}
-
-/*
- * Clear registers on exec
- */
-void
-setregs(struct lwp *l, struct exec_package *pack, u_long stack)
-{
- struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
- struct pcb *pcb = &l->l_addr->u_pcb;
- struct trapframe *tf;
-
-#if NNPX > 0
- /* If we were using the FPU, forget about it. */
- if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
- npxsave_lwp(l, 0);
-#endif
-
-#ifdef USER_LDT
- pmap_ldt_cleanup(l);
-#endif
-
- l->l_md.md_flags &= ~MDL_USEDFPU;
- if (i386_use_fxsave) {
- pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __NetBSD_NPXCW__;
- pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
- } else
- pcb->pcb_savefpu.sv_87.sv_env.en_cw = __NetBSD_NPXCW__;
-
- tf = l->l_md.md_regs;
- tf->tf_gs = LSEL(LUDATA_SEL, SEL_UPL);
- tf->tf_fs = LSEL(LUDATA_SEL, SEL_UPL);
- tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
- tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
- tf->tf_edi = 0;
- tf->tf_esi = 0;
- tf->tf_ebp = 0;
- tf->tf_ebx = (int)l->l_proc->p_psstr;
- tf->tf_edx = 0;
- tf->tf_ecx = 0;
- tf->tf_eax = 0;
- tf->tf_eip = pack->ep_entry;
- tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
- LSEL(LUCODEBIG_SEL, SEL_UPL) : LSEL(LUCODE_SEL, SEL_UPL);
- tf->tf_eflags = PSL_USERSET;
- tf->tf_esp = stack;
- tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
-}
-
-/*
- * Initialize segments and descriptor tables
- */
-
-union descriptor *gdt, *ldt;
-struct gate_descriptor *idt;
-char idt_allocmap[NIDT];
-struct simplelock idt_lock = SIMPLELOCK_INITIALIZER;
-#ifdef I586_CPU
-union descriptor *pentium_idt;
-#endif
-extern struct user *proc0paddr;
-
-void
-setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl,
- int sel)
-{
-
- gd->gd_looffset = (int)func;
- gd->gd_selector = sel;
- gd->gd_stkcpy = args;
- gd->gd_xx = 0;
- gd->gd_type = type;
- gd->gd_dpl = dpl;
- gd->gd_p = 1;
- gd->gd_hioffset = (int)func >> 16;
-}
-
-void
-unsetgate(struct gate_descriptor *gd)
-{
- gd->gd_p = 0;
- gd->gd_hioffset = 0;
- gd->gd_looffset = 0;
- gd->gd_selector = 0;
- gd->gd_xx = 0;
- gd->gd_stkcpy = 0;
- gd->gd_type = 0;
- gd->gd_dpl = 0;
-}
-
-
-void
-setregion(struct region_descriptor *rd, void *base, size_t limit)
-{
-
- rd->rd_limit = (int)limit;
- rd->rd_base = (int)base;
-}
-
-void
-setsegment(struct segment_descriptor *sd, void *base, size_t limit, int type,
- int dpl, int def32, int gran)
-{
-
- sd->sd_lolimit = (int)limit;
- sd->sd_lobase = (int)base;
- sd->sd_type = type;
- sd->sd_dpl = dpl;
- sd->sd_p = 1;
- sd->sd_hilimit = (int)limit >> 16;
- sd->sd_xx = 0;
- sd->sd_def32 = def32;
- sd->sd_gran = gran;
- sd->sd_hibase = (int)base >> 24;
-}
-
-#define IDTVEC(name) __CONCAT(X, name)
-typedef void (vector)(void);
-extern vector IDTVEC(syscall);
-extern vector IDTVEC(osyscall);
-extern vector *IDTVEC(exceptions)[];
-#ifdef COMPAT_SVR4
-extern vector IDTVEC(svr4_fasttrap);
-#endif /* COMPAT_SVR4 */
-#ifdef COMPAT_MACH
-extern vector IDTVEC(mach_trap);
-#endif
-#define MAX_XEN_IDT 128
-trap_info_t xen_idt[MAX_XEN_IDT];
-int xen_idt_idx;
-
-#define KBTOB(x) ((size_t)(x) * 1024UL)
-
-void cpu_init_idt()
-{
- struct region_descriptor region;
-
- panic("cpu_init_idt");
-#ifdef I586_CPU
- setregion(®ion, pentium_idt, NIDT * sizeof(idt[0]) - 1);
-#else
- setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1);
-#endif
- lidt(®ion);
-}
-
-#if !defined(REALBASEMEM) && !defined(REALEXTMEM)
-void
-add_mem_cluster(u_int64_t seg_start, u_int64_t seg_end, u_int32_t type)
-{
- extern struct extent *iomem_ex;
- int i;
-
- if (seg_end > 0x100000000ULL) {
- printf("WARNING: skipping large "
- "memory map entry: "
- "0x%qx/0x%qx/0x%x\n",
- seg_start,
- (seg_end - seg_start),
- type);
- return;
- }
-
- /*
- * XXX Chop the last page off the size so that
- * XXX it can fit in avail_end.
- */
- if (seg_end == 0x100000000ULL)
- seg_end -= PAGE_SIZE;
-
- if (seg_end <= seg_start)
- return;
-
- for (i = 0; i < mem_cluster_cnt; i++) {
- if ((mem_clusters[i].start == round_page(seg_start))
- && (mem_clusters[i].size
- == trunc_page(seg_end) - mem_clusters[i].start)) {
-#ifdef DEBUG_MEMLOAD
- printf("WARNING: skipping duplicate segment entry\n");
-#endif
- return;
- }
- }
-
- /*
- * Allocate the physical addresses used by RAM
- * from the iomem extent map. This is done before
- * the addresses are page rounded just to make
- * sure we get them all.
- */
- if (extent_alloc_region(iomem_ex, seg_start,
- seg_end - seg_start, EX_NOWAIT)) {
- /* XXX What should we do? */
- printf("WARNING: CAN'T ALLOCATE "
- "MEMORY SEGMENT "
- "(0x%qx/0x%qx/0x%x) FROM "
- "IOMEM EXTENT MAP!\n",
- seg_start, seg_end - seg_start, type);
- return;
- }
-
- /*
- * If it's not free memory, skip it.
- */
- if (type != BIM_Memory)
- return;
-
- /* XXX XXX XXX */
- if (mem_cluster_cnt >= VM_PHYSSEG_MAX)
- panic("init386: too many memory segments");
-
- seg_start = round_page(seg_start);
- seg_end = trunc_page(seg_end);
-
- if (seg_start == seg_end)
- return;
-
- mem_clusters[mem_cluster_cnt].start = seg_start;
- mem_clusters[mem_cluster_cnt].size =
- seg_end - seg_start;
-
- if (avail_end < seg_end)
- avail_end = seg_end;
- physmem += atop(mem_clusters[mem_cluster_cnt].size);
- mem_cluster_cnt++;
-}
-#endif /* !defined(REALBASEMEM) && !defined(REALEXTMEM) */
-
-void
-initgdt()
-{
-#if !defined(XEN)
- struct region_descriptor region;
-#else
- paddr_t frames[16];
-#endif
-
-#if !defined(XEN)
- gdt = tgdt;
- memset(gdt, 0, NGDT*sizeof(*gdt));
-#endif
- /* make gdt gates and memory segments */
- setsegment(&gdt[GCODE_SEL].sd, 0, 0xfc3ff, SDT_MEMERA, SEL_KPL, 1, 1);
- setsegment(&gdt[GDATA_SEL].sd, 0, 0xfc3ff, SDT_MEMRWA, SEL_KPL, 1, 1);
- setsegment(&gdt[GUCODE_SEL].sd, 0, x86_btop(I386_MAX_EXE_ADDR) - 1,
- SDT_MEMERA, SEL_UPL, 1, 1);
- setsegment(&gdt[GUCODEBIG_SEL].sd, 0, x86_btop(VM_MAXUSER_ADDRESS) - 1,
- SDT_MEMERA, SEL_UPL, 1, 1);
- setsegment(&gdt[GUDATA_SEL].sd, 0, x86_btop(VM_MAXUSER_ADDRESS) - 1,
- SDT_MEMRWA, SEL_UPL, 1, 1);
-#ifdef COMPAT_MACH
- setgate(&gdt[GMACHCALLS_SEL].gd, &IDTVEC(mach_trap), 1,
- SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
-#endif
-#if NBIOSCALL > 0
- /* bios trampoline GDT entries */
- setsegment(&gdt[GBIOSCODE_SEL].sd, 0, 0xfc3ff, SDT_MEMERA, SEL_KPL, 0,
- 0);
- setsegment(&gdt[GBIOSDATA_SEL].sd, 0, 0xfc3ff, SDT_MEMRWA, SEL_KPL, 0,
- 0);
-#endif
- setsegment(&gdt[GCPU_SEL].sd, &cpu_info_primary,
- sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 1, 1);
-
-#if !defined(XEN)
- setregion(®ion, gdt, NGDT * sizeof(gdt[0]) - 1);
- lgdt(®ion);
-#else
- frames[0] = xpmap_ptom((uint32_t)gdt - KERNBASE) >> PAGE_SHIFT;
- /* pmap_kremove((vaddr_t)gdt, PAGE_SIZE); */
- pmap_kenter_pa((vaddr_t)gdt, (uint32_t)gdt - KERNBASE,
- VM_PROT_READ);
- XENPRINTK(("loading gdt %lx, %d entries\n", frames[0] << PAGE_SHIFT,
- NGDT));
- if (HYPERVISOR_set_gdt(frames, NGDT))
- panic("HYPERVISOR_set_gdt failed!\n");
- lgdt_finish();
-#endif
-}
-
-void
-init386(paddr_t first_avail)
-{
-#if !defined(XEN)
- union descriptor *tgdt;
-#endif
- extern void consinit(void);
-#if !defined(XEN)
- extern struct extent *iomem_ex;
-#if !defined(REALBASEMEM) && !defined(REALEXTMEM)
- struct btinfo_memmap *bim;
-#endif
- struct region_descriptor region;
-#endif
- int x;
-#if !defined(XEN)
- int first16q;
- u_int64_t seg_start, seg_end;
- u_int64_t seg_start1, seg_end1;
-#endif
- paddr_t realmode_reserved_start;
- psize_t realmode_reserved_size;
- int needs_earlier_install_pte0;
-#if NBIOSCALL > 0
- extern int biostramp_image_size;
- extern u_char biostramp_image[];
-#endif
-
- XENPRINTK(("HYPERVISOR_shared_info %p\n", HYPERVISOR_shared_info));
-#ifdef XENDEBUG_LOW
- xen_dbglow_init();
-#endif
-
- cpu_probe_features(&cpu_info_primary);
- cpu_feature = cpu_info_primary.ci_feature_flags;
-
- /* not on Xen... */
- cpu_feature &= ~(CPUID_PGE|CPUID_PSE|CPUID_MTRR|CPUID_FXSR);
-
- lwp0.l_addr = proc0paddr;
- cpu_info_primary.ci_curpcb = &lwp0.l_addr->u_pcb;
-
- XENPRINTK(("proc0paddr %p pcb %p first_avail %p\n",
- proc0paddr, cpu_info_primary.ci_curpcb, (void *)first_avail));
- XENPRINTK(("ptdpaddr %p atdevbase %p\n", (void *)PTDpaddr,
- (void *)atdevbase));
-
- x86_bus_space_init();
- consinit(); /* XXX SHOULD NOT BE DONE HERE */
- /*
- * Initailize PAGE_SIZE-dependent variables.
- */
- uvm_setpagesize();
-
- /*
- * Saving SSE registers won't work if the save area isn't
- * 16-byte aligned.
- */
- if (offsetof(struct user, u_pcb.pcb_savefpu) & 0xf)
- panic("init386: pcb_savefpu not 16-byte aligned");
-
- /*
- * Start with 2 color bins -- this is just a guess to get us
- * started. We'll recolor when we determine the largest cache
- * sizes on the system.
- */
- uvmexp.ncolors = 2;
-
-#if !defined(XEN)
- /*
- * BIOS leaves data in physical page 0
- * Even if it didn't, our VM system doesn't like using zero as a
- * physical page number.
- * We may also need pages in low memory (one each) for secondary CPU
- * startup, for BIOS calls, and for ACPI, plus a page table page to map
- * them into the first few pages of the kernel's pmap.
- */
- avail_start = PAGE_SIZE;
-#else
- /* Make sure the end of the space used by the kernel is rounded. */
- first_avail = round_page(first_avail);
- avail_start = first_avail - KERNBASE;
- avail_end = ptoa(xen_start_info.nr_pages) +
- (KERNTEXTOFF - KERNBASE_LOCORE);
- pmap_pa_start = (KERNTEXTOFF - KERNBASE_LOCORE);
- pmap_pa_end = avail_end;
- mem_clusters[0].start = avail_start;
- mem_clusters[0].size = avail_end - avail_start;
- mem_cluster_cnt++;
- physmem += atop(mem_clusters[0].size);
-#endif
-
- /*
- * reserve memory for real-mode call
- */
- needs_earlier_install_pte0 = 0;
- realmode_reserved_start = 0;
- realmode_reserved_size = 0;
-#if NBIOSCALL > 0
- /* save us a page for trampoline code */
- realmode_reserved_size += PAGE_SIZE;
- needs_earlier_install_pte0 = 1;
-#endif
-#ifdef MULTIPROCESSOR /* XXX */
-#if !defined(XEN)
- KASSERT(avail_start == PAGE_SIZE); /* XXX */
-#endif
- if (realmode_reserved_size < MP_TRAMPOLINE) /* XXX */
- realmode_reserved_size = MP_TRAMPOLINE; /* XXX */
- needs_earlier_install_pte0 = 1; /* XXX */
-#endif /* XXX */
-#if NACPI > 0
- /* trampoline code for wake handler */
- realmode_reserved_size += ptoa(acpi_md_get_npages_of_wakecode()+1);
- needs_earlier_install_pte0 = 1;
-#endif
- if (needs_earlier_install_pte0) {
- /* page table for directory entry 0 */
- realmode_reserved_size += PAGE_SIZE;
- }
- if (realmode_reserved_size>0) {
- realmode_reserved_start = avail_start;
- avail_start += realmode_reserved_size;
- }
-
-#ifdef DEBUG_MEMLOAD
- printf("mem_cluster_count: %d\n", mem_cluster_cnt);
-#endif
-
- /*
- * Call pmap initialization to make new kernel address space.
- * We must do this before loading pages into the VM system.
- */
- pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE);
-
-#if !defined(XEN)
-#if !defined(REALBASEMEM) && !defined(REALEXTMEM)
- /*
- * Check to see if we have a memory map from the BIOS (passed
- * to us by the boot program.
- */
- bim = lookup_bootinfo(BTINFO_MEMMAP);
- if (bim != NULL && bim->num > 0) {
-#ifdef DEBUG_MEMLOAD
- printf("BIOS MEMORY MAP (%d ENTRIES):\n", bim->num);
-#endif
- for (x = 0; x < bim->num; x++) {
-#ifdef DEBUG_MEMLOAD
- printf(" addr 0x%qx size 0x%qx type 0x%x\n",
- bim->entry[x].addr,
- bim->entry[x].size,
- bim->entry[x].type);
-#endif
-
- /*
- * If the segment is not memory, skip it.
- */
- switch (bim->entry[x].type) {
- case BIM_Memory:
- case BIM_ACPI:
- case BIM_NVS:
- break;
- default:
- continue;
- }
-
- /*
- * Sanity check the entry.
- * XXX Need to handle uint64_t in extent code
- * XXX and 64-bit physical addresses in i386
- * XXX port.
- */
- seg_start = bim->entry[x].addr;
- seg_end = bim->entry[x].addr + bim->entry[x].size;
-
- /*
- * Avoid Compatibility Holes.
- * XXX Holes within memory space that allow access
- * XXX to be directed to the PC-compatible frame buffer
- * XXX (0xa0000-0xbffff),to adapter ROM space
- * XXX (0xc0000-0xdffff), and to system BIOS space
- * XXX (0xe0000-0xfffff).
- * XXX Some laptop(for example,Toshiba Satellite2550X)
- * XXX report this area and occurred problems,
- * XXX so we avoid this area.
- */
- if (seg_start < 0x100000 && seg_end > 0xa0000) {
- printf("WARNING: memory map entry overlaps "
- "with ``Compatibility Holes'': "
- "0x%qx/0x%qx/0x%x\n", seg_start,
- seg_end - seg_start, bim->entry[x].type);
- add_mem_cluster(seg_start, 0xa0000,
- bim->entry[x].type);
- add_mem_cluster(0x100000, seg_end,
- bim->entry[x].type);
- } else
- add_mem_cluster(seg_start, seg_end,
- bim->entry[x].type);
- }
- }
-#endif /* ! REALBASEMEM && ! REALEXTMEM */
- /*
- * If the loop above didn't find any valid segment, fall back to
- * former code.
- */
- if (mem_cluster_cnt == 0) {
- /*
- * Allocate the physical addresses used by RAM from the iomem
- * extent map. This is done before the addresses are
- * page rounded just to make sure we get them all.
- */
- if (extent_alloc_region(iomem_ex, 0, KBTOB(biosbasemem),
- EX_NOWAIT)) {
- /* XXX What should we do? */
- printf("WARNING: CAN'T ALLOCATE BASE MEMORY FROM "
- "IOMEM EXTENT MAP!\n");
- }
- mem_clusters[0].start = 0;
- mem_clusters[0].size = trunc_page(KBTOB(biosbasemem));
- physmem += atop(mem_clusters[0].size);
- if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem),
- EX_NOWAIT)) {
- /* XXX What should we do? */
- printf("WARNING: CAN'T ALLOCATE EXTENDED MEMORY FROM "
- "IOMEM EXTENT MAP!\n");
- }
-#if NISADMA > 0
- /*
- * Some motherboards/BIOSes remap the 384K of RAM that would
- * normally be covered by the ISA hole to the end of memory
- * so that it can be used. However, on a 16M system, this
- * would cause bounce buffers to be allocated and used.
- * This is not desirable behaviour, as more than 384K of
- * bounce buffers might be allocated. As a work-around,
- * we round memory down to the nearest 1M boundary if
- * we're using any isadma devices and the remapped memory
- * is what puts us over 16M.
- */
- if (biosextmem > (15*1024) && biosextmem < (16*1024)) {
- char pbuf[9];
-
- format_bytes(pbuf, sizeof(pbuf),
- biosextmem - (15*1024));
- printf("Warning: ignoring %s of remapped memory\n",
- pbuf);
- biosextmem = (15*1024);
- }
-#endif
- mem_clusters[1].start = IOM_END;
- mem_clusters[1].size = trunc_page(KBTOB(biosextmem));
- physmem += atop(mem_clusters[1].size);
-
- mem_cluster_cnt = 2;
-
- avail_end = IOM_END + trunc_page(KBTOB(biosextmem));
- }
- /*
- * If we have 16M of RAM or less, just put it all on
- * the default free list. Otherwise, put the first
- * 16M of RAM on a lower priority free list (so that
- * all of the ISA DMA'able memory won't be eaten up
- * first-off).
- */
- if (avail_end <= (16 * 1024 * 1024))
- first16q = VM_FREELIST_DEFAULT;
- else
- first16q = VM_FREELIST_FIRST16;
-
- /* Make sure the end of the space used by the kernel is rounded. */
- first_avail = round_page(first_avail);
-#endif
-
- XENPRINTK(("load the memory cluster %p(%d) - %p(%ld)\n",
- (void *)avail_start, (int)atop(avail_start),
- (void *)avail_end, (int)atop(avail_end)));
- uvm_page_physload(atop(avail_start), atop(avail_end),
- atop(avail_start), atop(avail_end),
- VM_FREELIST_DEFAULT);
-
-#if !defined(XEN)
-
- /*
- * Now, load the memory clusters (which have already been
- * rounded and truncated) into the VM system.
- *
- * NOTE: WE ASSUME THAT MEMORY STARTS AT 0 AND THAT THE KERNEL
- * IS LOADED AT IOM_END (1M).
- */
- for (x = 0; x < mem_cluster_cnt; x++) {
- seg_start = mem_clusters[x].start;
- seg_end = mem_clusters[x].start + mem_clusters[x].size;
- seg_start1 = 0;
- seg_end1 = 0;
-
- /*
- * Skip memory before our available starting point.
- */
- if (seg_end <= avail_start)
- continue;
-
- if (avail_start >= seg_start && avail_start < seg_end) {
- if (seg_start != 0)
- panic("init386: memory doesn't start at 0");
- seg_start = avail_start;
- if (seg_start == seg_end)
- continue;
- }
-
- /*
- * If this segment contains the kernel, split it
- * in two, around the kernel.
- */
- if (seg_start <= IOM_END && first_avail <= seg_end) {
- seg_start1 = first_avail;
- seg_end1 = seg_end;
- seg_end = IOM_END;
- }
-
- /* First hunk */
- if (seg_start != seg_end) {
- if (seg_start < (16 * 1024 * 1024) &&
- first16q != VM_FREELIST_DEFAULT) {
- u_int64_t tmp;
-
- if (seg_end > (16 * 1024 * 1024))
- tmp = (16 * 1024 * 1024);
- else
- tmp = seg_end;
-
- if (tmp != seg_start) {
-#ifdef DEBUG_MEMLOAD
- printf("loading 0x%qx-0x%qx "
- "(0x%lx-0x%lx)\n",
- seg_start, tmp,
- atop(seg_start), atop(tmp));
-#endif
- uvm_page_physload(atop(seg_start),
- atop(tmp), atop(seg_start),
- atop(tmp), first16q);
- }
- seg_start = tmp;
- }
-
- if (seg_start != seg_end) {
-#ifdef DEBUG_MEMLOAD
- printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
- seg_start, seg_end,
- atop(seg_start), atop(seg_end));
-#endif
- uvm_page_physload(atop(seg_start),
- atop(seg_end), atop(seg_start),
- atop(seg_end), VM_FREELIST_DEFAULT);
- }
- }
-
- /* Second hunk */
- if (seg_start1 != seg_end1) {
- if (seg_start1 < (16 * 1024 * 1024) &&
- first16q != VM_FREELIST_DEFAULT) {
- u_int64_t tmp;
-
- if (seg_end1 > (16 * 1024 * 1024))
- tmp = (16 * 1024 * 1024);
- else
- tmp = seg_end1;
-
- if (tmp != seg_start1) {
-#ifdef DEBUG_MEMLOAD
- printf("loading 0x%qx-0x%qx "
- "(0x%lx-0x%lx)\n",
- seg_start1, tmp,
- atop(seg_start1), atop(tmp));
-#endif
- uvm_page_physload(atop(seg_start1),
- atop(tmp), atop(seg_start1),
- atop(tmp), first16q);
- }
- seg_start1 = tmp;
- }
-
- if (seg_start1 != seg_end1) {
-#ifdef DEBUG_MEMLOAD
- printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
- seg_start1, seg_end1,
- atop(seg_start1), atop(seg_end1));
-#endif
- uvm_page_physload(atop(seg_start1),
- atop(seg_end1), atop(seg_start1),
- atop(seg_end1), VM_FREELIST_DEFAULT);
- }
- }
- }
-#endif
-
- /*
- * Steal memory for the message buffer (at end of core).
- */
- {
- struct vm_physseg *vps;
- psize_t sz = round_page(MSGBUFSIZE);
- psize_t reqsz = sz;
-
- for (x = 0; x < vm_nphysseg; x++) {
- vps = &vm_physmem[x];
- if (ptoa(vps->avail_end) == avail_end)
- goto found;
- }
- panic("init386: can't find end of memory");
-
- found:
- /* Shrink so it'll fit in the last segment. */
- if ((vps->avail_end - vps->avail_start) < atop(sz))
- sz = ptoa(vps->avail_end - vps->avail_start);
-
- vps->avail_end -= atop(sz);
- vps->end -= atop(sz);
- msgbuf_paddr = ptoa(vps->avail_end);
-
- /* Remove the last segment if it now has no pages. */
- if (vps->start == vps->end) {
- for (vm_nphysseg--; x < vm_nphysseg; x++)
- vm_physmem[x] = vm_physmem[x + 1];
- }
-
- /* Now find where the new avail_end is. */
- for (avail_end = 0, x = 0; x < vm_nphysseg; x++)
- if (vm_physmem[x].avail_end > avail_end)
- avail_end = vm_physmem[x].avail_end;
- avail_end = ptoa(avail_end);
-
- /* Warn if the message buffer had to be shrunk. */
- if (sz != reqsz)
- printf("WARNING: %ld bytes not available for msgbuf "
- "in last cluster (%ld used)\n", reqsz, sz);
- }
-
- /*
- * install PT page for the first 4M if needed.
- */
- if (needs_earlier_install_pte0) {
- paddr_t paddr;
-#ifdef DIAGNOSTIC
- if (realmode_reserved_size < PAGE_SIZE) {
- panic("cannot steal memory for first 4M PT page.");
- }
-#endif
- paddr=realmode_reserved_start+realmode_reserved_size-PAGE_SIZE;
- pmap_enter(pmap_kernel(), (vaddr_t)vtopte(0), paddr,
- VM_PROT_READ|VM_PROT_WRITE,
- PMAP_WIRED|VM_PROT_READ|VM_PROT_WRITE);
- pmap_update(pmap_kernel());
- /* make sure it is clean before using */
- memset(vtopte(0), 0, PAGE_SIZE);
- realmode_reserved_size -= PAGE_SIZE;
- }
-
-#if NBIOSCALL > 0
- /*
- * this should be caught at kernel build time, but put it here
- * in case someone tries to fake it out...
- */
-#ifdef DIAGNOSTIC
- if (realmode_reserved_start > BIOSTRAMP_BASE ||
- (realmode_reserved_start+realmode_reserved_size) < (BIOSTRAMP_BASE+
- PAGE_SIZE)) {
- panic("cannot steal memory for PT page of bioscall.");
- }
- if (biostramp_image_size > PAGE_SIZE)
- panic("biostramp_image_size too big: %x vs. %x",
- biostramp_image_size, PAGE_SIZE);
-#endif
- pmap_kenter_pa((vaddr_t)BIOSTRAMP_BASE, /* virtual */
- (paddr_t)BIOSTRAMP_BASE, /* physical */
- VM_PROT_ALL); /* protection */
- pmap_update(pmap_kernel());
- memcpy((caddr_t)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size);
-#ifdef DEBUG_BIOSCALL
- printf("biostramp installed @ %x\n", BIOSTRAMP_BASE);
-#endif
- realmode_reserved_size -= PAGE_SIZE;
- realmode_reserved_start += PAGE_SIZE;
-#endif
-
-#if NACPI > 0
- /*
- * Steal memory for the acpi wake code
- */
- {
- paddr_t paddr, p;
- psize_t sz;
- int npg;
-
- paddr = realmode_reserved_start;
- npg = acpi_md_get_npages_of_wakecode();
- sz = ptoa(npg);
-#ifdef DIAGNOSTIC
- if (realmode_reserved_size < sz) {
- panic("cannot steal memory for ACPI wake code.");
- }
-#endif
-
- /* identical mapping */
- p = paddr;
- for (x=0; x<npg; x++) {
- printf("kenter: 0x%08X\n", (unsigned)p);
- pmap_kenter_pa((vaddr_t)p, p, VM_PROT_ALL);
- p += PAGE_SIZE;
- }
- pmap_update(pmap_kernel());
-
- acpi_md_install_wakecode(paddr);
-
- realmode_reserved_size -= sz;
- realmode_reserved_start += sz;
- }
-#endif
-
- pmap_enter(pmap_kernel(), idt_vaddr, idt_paddr,
- VM_PROT_READ|VM_PROT_WRITE, PMAP_WIRED|VM_PROT_READ|VM_PROT_WRITE);
- pmap_update(pmap_kernel());
- memset((void *)idt_vaddr, 0, PAGE_SIZE);
-
-#if !defined(XEN)
- idt = (struct gate_descriptor *)idt_vaddr;
-#ifdef I586_CPU
- pmap_enter(pmap_kernel(), pentium_idt_vaddr, idt_paddr,
- VM_PROT_READ, PMAP_WIRED|VM_PROT_READ);
- pentium_idt = (union descriptor *)pentium_idt_vaddr;
-#endif
-#endif
- pmap_update(pmap_kernel());
-
- initgdt();
-
- HYPERVISOR_set_callbacks(
- GSEL(GCODE_SEL, SEL_KPL), (unsigned long)hypervisor_callback,
- GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
-
-#if !defined(XEN)
- tgdt = gdt;
- gdt = (union descriptor *)
- ((char *)idt + NIDT * sizeof (struct gate_descriptor));
- ldt = gdt + NGDT;
-
- memcpy(gdt, tgdt, NGDT*sizeof(*gdt));
-
- setsegment(&gdt[GLDT_SEL].sd, ldt, NLDT * sizeof(ldt[0]) - 1,
- SDT_SYSLDT, SEL_KPL, 0, 0);
-#else
- ldt = (union descriptor *)idt_vaddr;
-#endif
-
- /* make ldt gates and memory segments */
- setgate(&ldt[LSYS5CALLS_SEL].gd, &IDTVEC(osyscall), 1,
- SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
-
- ldt[LUCODE_SEL] = gdt[GUCODE_SEL];
- ldt[LUCODEBIG_SEL] = gdt[GUCODEBIG_SEL];
- ldt[LUDATA_SEL] = gdt[GUDATA_SEL];
- ldt[LSOL26CALLS_SEL] = ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
-
-#if !defined(XEN)
- /* exceptions */
- for (x = 0; x < 32; x++) {
- setgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386TGT,
- (x == 3 || x == 4) ? SEL_UPL : SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- idt_allocmap[x] = 1;
- }
-
- /* new-style interrupt gate for syscalls */
- setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386TGT, SEL_UPL,
- GSEL(GCODE_SEL, SEL_KPL));
- idt_allocmap[128] = 1;
-#ifdef COMPAT_SVR4
- setgate(&idt[0xd2], &IDTVEC(svr4_fasttrap), 0, SDT_SYS386TGT,
- SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
- idt_allocmap[0xd2] = 1;
-#endif /* COMPAT_SVR4 */
-#endif
-
- memset(xen_idt, 0, sizeof(trap_info_t) * MAX_XEN_IDT);
- xen_idt_idx = 0;
- for (x = 0; x < 32; x++) {
- KASSERT(xen_idt_idx < MAX_XEN_IDT);
- xen_idt[xen_idt_idx].vector = x;
- xen_idt[xen_idt_idx].flags =
- (x == 3 || x == 4) ? SEL_UPL : SEL_XEN;
- xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
- xen_idt[xen_idt_idx].address =
- (uint32_t)IDTVEC(exceptions)[x];
- xen_idt_idx++;
- }
- KASSERT(xen_idt_idx < MAX_XEN_IDT);
- xen_idt[xen_idt_idx].vector = 128;
- xen_idt[xen_idt_idx].flags = SEL_UPL;
- xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
- xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(syscall);
- xen_idt_idx++;
-#ifdef COMPAT_SVR4
- KASSERT(xen_idt_idx < MAX_XEN_IDT);
- xen_idt[xen_idt_idx].vector = 0xd2;
- xen_idt[xen_idt_idx].flags = SEL_UPL;
- xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
- xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(svr4_fasttrap);
- xen_idt_idx++;
-#endif /* COMPAT_SVR4 */
-
-#if !defined(XEN)
- setregion(®ion, gdt, NGDT * sizeof(gdt[0]) - 1);
- lgdt(®ion);
-#else
- lldt(GSEL(GLDT_SEL, SEL_KPL));
-#endif
-
-#if !defined(XEN)
- cpu_init_idt();
-#else
- db_trap_callback = ddb_trap_hook;
-
- XENPRINTF(("HYPERVISOR_set_trap_table %p\n", xen_idt));
- if (HYPERVISOR_set_trap_table(xen_idt))
- panic("HYPERVISOR_set_trap_table %p failed\n", xen_idt);
-#endif
-
-#if NKSYMS || defined(DDB) || defined(LKM)
- {
- extern int end;
- struct btinfo_symtab *symtab;
-
-#ifdef DDB
- db_machine_init();
-#endif
-
- symtab = lookup_bootinfo(BTINFO_SYMTAB);
-
- if (symtab) {
- symtab->ssym += KERNBASE;
- symtab->esym += KERNBASE;
- ksyms_init(symtab->nsym, (int *)symtab->ssym,
- (int *)symtab->esym);
- }
- else
- ksyms_init(*(int *)&end, ((int *)&end) + 1,
- xen_start_info.mod_start ?
- (void *)xen_start_info.mod_start :
- (void *)xen_start_info.mfn_list);
- }
-#endif
-#ifdef DDB
- if (boothowto & RB_KDB)
- Debugger();
-#endif
-#ifdef IPKDB
- ipkdb_init();
- if (boothowto & RB_KDB)
- ipkdb_connect(0);
-#endif
-#ifdef KGDB
- kgdb_port_init();
- if (boothowto & RB_KDB) {
- kgdb_debug_init = 1;
- kgdb_connect(1);
- }
-#endif
-
-#if NMCA > 0
- /* check for MCA bus, needed to be done before ISA stuff - if
- * MCA is detected, ISA needs to use level triggered interrupts
- * by default */
- mca_busprobe();
-#endif
-
-#if defined(XEN)
- events_default_setup();
-#else
- intr_default_setup();
-#endif
-
- /* Initialize software interrupts. */
- softintr_init();
-
- splraise(IPL_IPI);
- enable_intr();
-
- if (physmem < btoc(2 * 1024 * 1024)) {
- printf("warning: too little memory available; "
- "have %lu bytes, want %lu bytes\n"
- "running in degraded mode\n"
- "press a key to confirm\n\n",
- ptoa(physmem), 2*1024*1024UL);
- cngetc();
- }
-
-#ifdef __HAVE_CPU_MAXPROC
- /* Make sure maxproc is sane */
- if (maxproc > cpu_maxproc())
- maxproc = cpu_maxproc();
-#endif
-}
-
-#ifdef COMPAT_NOMID
-static int
-exec_nomid(struct proc *p, struct exec_package *epp)
-{
- int error;
- u_long midmag, magic;
- u_short mid;
- struct exec *execp = epp->ep_hdr;
-
- /* check on validity of epp->ep_hdr performed by exec_out_makecmds */
-
- midmag = ntohl(execp->a_midmag);
- mid = (midmag >> 16) & 0xffff;
- magic = midmag & 0xffff;
-
- if (magic == 0) {
- magic = (execp->a_midmag & 0xffff);
- mid = MID_ZERO;
- }
-
- midmag = mid << 16 | magic;
-
- switch (midmag) {
- case (MID_ZERO << 16) | ZMAGIC:
- /*
- * 386BSD's ZMAGIC format:
- */
- error = exec_aout_prep_oldzmagic(p, epp);
- break;
-
- case (MID_ZERO << 16) | QMAGIC:
- /*
- * BSDI's QMAGIC format:
- * same as new ZMAGIC format, but with different magic number
- */
- error = exec_aout_prep_zmagic(p, epp);
- break;
-
- case (MID_ZERO << 16) | NMAGIC:
- /*
- * BSDI's NMAGIC format:
- * same as NMAGIC format, but with different magic number
- * and with text starting at 0.
- */
- error = exec_aout_prep_oldnmagic(p, epp);
- break;
-
- case (MID_ZERO << 16) | OMAGIC:
- /*
- * BSDI's OMAGIC format:
- * same as OMAGIC format, but with different magic number
- * and with text starting at 0.
- */
- error = exec_aout_prep_oldomagic(p, epp);
- break;
-
- default:
- error = ENOEXEC;
- }
-
- return error;
-}
-#endif
-
-/*
- * cpu_exec_aout_makecmds():
- * CPU-dependent a.out format hook for execve().
- *
- * Determine of the given exec package refers to something which we
- * understand and, if so, set up the vmcmds for it.
- *
- * On the i386, old (386bsd) ZMAGIC binaries and BSDI QMAGIC binaries
- * if COMPAT_NOMID is given as a kernel option.
- */
-int
-cpu_exec_aout_makecmds(struct proc *p, struct exec_package *epp)
-{
- int error = ENOEXEC;
-
-#ifdef COMPAT_NOMID
- if ((error = exec_nomid(p, epp)) == 0)
- return error;
-#endif /* ! COMPAT_NOMID */
-
- return error;
-}
-
-void *
-lookup_bootinfo(int type)
-{
- struct btinfo_common *help;
- int n = *(int*)bootinfo;
- help = (struct btinfo_common *)(bootinfo + sizeof(int));
- while(n--) {
- if(help->type == type)
- return(help);
- help = (struct btinfo_common *)((char*)help + help->len);
- }
- return(0);
-}
-
-#include <dev/ic/mc146818reg.h> /* for NVRAM POST */
-#include <i386/isa/nvram.h> /* for NVRAM POST */
-
-void
-cpu_reset()
-{
-
- disable_intr();
-
-#if 0
- /*
- * Ensure the NVRAM reset byte contains something vaguely sane.
- */
-
- outb(IO_RTC, NVRAM_RESET);
- outb(IO_RTC+1, NVRAM_RESET_RST);
-
- /*
- * The keyboard controller has 4 random output pins, one of which is
- * connected to the RESET pin on the CPU in many PCs. We tell the
- * keyboard controller to pulse this line a couple of times.
- */
- outb(IO_KBD + KBCMDP, KBC_PULSE0);
- delay(100000);
- outb(IO_KBD + KBCMDP, KBC_PULSE0);
- delay(100000);
-#endif
-
- HYPERVISOR_reboot();
-
- for (;;);
-}
-
-void
-cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
-{
- const struct trapframe *tf = l->l_md.md_regs;
- __greg_t *gr = mcp->__gregs;
- __greg_t ras_eip;
-
- /* Save register context. */
-#ifdef VM86
- if (tf->tf_eflags & PSL_VM) {
- gr[_REG_GS] = tf->tf_vm86_gs;
- gr[_REG_FS] = tf->tf_vm86_fs;
- gr[_REG_ES] = tf->tf_vm86_es;
- gr[_REG_DS] = tf->tf_vm86_ds;
- gr[_REG_EFL] = get_vflags(l);
- } else
-#endif
- {
- gr[_REG_GS] = tf->tf_gs;
- gr[_REG_FS] = tf->tf_fs;
- gr[_REG_ES] = tf->tf_es;
- gr[_REG_DS] = tf->tf_ds;
- gr[_REG_EFL] = tf->tf_eflags;
- }
- gr[_REG_EDI] = tf->tf_edi;
- gr[_REG_ESI] = tf->tf_esi;
- gr[_REG_EBP] = tf->tf_ebp;
- gr[_REG_EBX] = tf->tf_ebx;
- gr[_REG_EDX] = tf->tf_edx;
- gr[_REG_ECX] = tf->tf_ecx;
- gr[_REG_EAX] = tf->tf_eax;
- gr[_REG_EIP] = tf->tf_eip;
- gr[_REG_CS] = tf->tf_cs;
- gr[_REG_ESP] = tf->tf_esp;
- gr[_REG_UESP] = tf->tf_esp;
- gr[_REG_SS] = tf->tf_ss;
- gr[_REG_TRAPNO] = tf->tf_trapno;
- gr[_REG_ERR] = tf->tf_err;
-
- if ((ras_eip = (__greg_t)ras_lookup(l->l_proc,
- (caddr_t) gr[_REG_EIP])) != -1)
- gr[_REG_EIP] = ras_eip;
-
- *flags |= _UC_CPU;
-
- /* Save floating point register context, if any. */
- if ((l->l_md.md_flags & MDL_USEDFPU) != 0) {
-#if NNPX > 0
- /*
- * If this process is the current FP owner, dump its
- * context to the PCB first.
- * XXX npxsave() also clears the FPU state; depending on the
- * XXX application this might be a penalty.
- */
- if (l->l_addr->u_pcb.pcb_fpcpu) {
- npxsave_lwp(l, 1);
- }
-#endif
- if (i386_use_fxsave) {
-
memcpy(&mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm,
- &l->l_addr->u_pcb.pcb_savefpu.sv_xmm,
- sizeof
(mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm));
- *flags |= _UC_FXSAVE;
- } else {
-
memcpy(&mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state,
- &l->l_addr->u_pcb.pcb_savefpu.sv_87,
- sizeof
(mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state));
- }
-#if 0
- /* Apparently nothing ever touches this. */
- ucp->mcp.mc_fp.fp_emcsts = l->l_addr->u_pcb.pcb_saveemc;
-#endif
- *flags |= _UC_FPU;
- }
-}
-
-int
-cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
-{
- struct trapframe *tf = l->l_md.md_regs;
- __greg_t *gr = mcp->__gregs;
-
- /* Restore register context, if any. */
- if ((flags & _UC_CPU) != 0) {
-#ifdef VM86
- if (gr[_REG_EFL] & PSL_VM) {
- tf->tf_vm86_gs = gr[_REG_GS];
- tf->tf_vm86_fs = gr[_REG_FS];
- tf->tf_vm86_es = gr[_REG_ES];
- tf->tf_vm86_ds = gr[_REG_DS];
- set_vflags(l, gr[_REG_EFL]);
- if (flags & _UC_VM) {
- void syscall_vm86(struct trapframe *);
- l->l_proc->p_md.md_syscall = syscall_vm86;
- }
- } else
-#endif
- {
- /*
- * Check for security violations. If we're returning
- * to protected mode, the CPU will validate the segment
- * registers automatically and generate a trap on
- * violations. We handle the trap, rather than doing
- * all of the checking here.
- */
- if (((gr[_REG_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) ||
- !USERMODE(gr[_REG_CS], gr[_REG_EFL])) {
- printf("cpu_setmcontext error: uc EFL: 0x%08x"
- " tf EFL: 0x%08x uc CS: 0x%x\n",
- gr[_REG_EFL], tf->tf_eflags, gr[_REG_CS]);
- return (EINVAL);
- }
- tf->tf_gs = gr[_REG_GS];
- tf->tf_fs = gr[_REG_FS];
- tf->tf_es = gr[_REG_ES];
- tf->tf_ds = gr[_REG_DS];
- /* Only change the user-alterable part of eflags */
- tf->tf_eflags &= ~PSL_USER;
- tf->tf_eflags |= (gr[_REG_EFL] & PSL_USER);
- }
- tf->tf_edi = gr[_REG_EDI];
- tf->tf_esi = gr[_REG_ESI];
- tf->tf_ebp = gr[_REG_EBP];
- tf->tf_ebx = gr[_REG_EBX];
- tf->tf_edx = gr[_REG_EDX];
- tf->tf_ecx = gr[_REG_ECX];
- tf->tf_eax = gr[_REG_EAX];
- tf->tf_eip = gr[_REG_EIP];
- tf->tf_cs = gr[_REG_CS];
- tf->tf_esp = gr[_REG_UESP];
- tf->tf_ss = gr[_REG_SS];
- }
-
- /* Restore floating point register context, if any. */
- if ((flags & _UC_FPU) != 0) {
-#if NNPX > 0
- /*
- * If we were using the FPU, forget that we were.
- */
- if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
- npxsave_lwp(l, 0);
-#endif
- if (flags & _UC_FXSAVE) {
- if (i386_use_fxsave) {
- memcpy(
- &l->l_addr->u_pcb.pcb_savefpu.sv_xmm,
-
&mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm,
- sizeof
(&l->l_addr->u_pcb.pcb_savefpu.sv_xmm));
- } else {
- /* This is a weird corner case */
- process_xmm_to_s87((struct savexmm *)
-
&mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm,
- &l->l_addr->u_pcb.pcb_savefpu.sv_87);
- }
- } else {
- if (i386_use_fxsave) {
- process_s87_to_xmm((struct save87 *)
-
&mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state,
- &l->l_addr->u_pcb.pcb_savefpu.sv_xmm);
- } else {
- memcpy(&l->l_addr->u_pcb.pcb_savefpu.sv_87,
-
&mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state,
- sizeof
(l->l_addr->u_pcb.pcb_savefpu.sv_87));
- }
- }
- /* If not set already. */
- l->l_md.md_flags |= MDL_USEDFPU;
-#if 0
- /* Apparently unused. */
- l->l_addr->u_pcb.pcb_saveemc = mcp->mc_fp.fp_emcsts;
-#endif
- }
- if (flags & _UC_SETSTACK)
- l->l_proc->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
- if (flags & _UC_CLRSTACK)
- l->l_proc->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
- return (0);
-}
-
-void
-cpu_initclocks()
-{
- (*initclock_func)();
-}
-
-#ifdef MULTIPROCESSOR
-void
-need_resched(struct cpu_info *ci)
-{
-
- if (ci->ci_want_resched)
- return;
-
- ci->ci_want_resched = 1;
- if ((ci)->ci_curlwp != NULL)
- aston((ci)->ci_curlwp->l_proc);
- else if (ci != curcpu())
- x86_send_ipi(ci, 0);
-}
-#endif
-
-/*
- * Allocate an IDT vector slot within the given range.
- * XXX needs locking to avoid MP allocation races.
- */
-
-int
-idt_vec_alloc(int low, int high)
-{
- int vec;
-
- simple_lock(&idt_lock);
- for (vec = low; vec <= high; vec++) {
- if (idt_allocmap[vec] == 0) {
- idt_allocmap[vec] = 1;
- simple_unlock(&idt_lock);
- return vec;
- }
- }
- simple_unlock(&idt_lock);
- return 0;
-}
-
-void
-idt_vec_set(int vec, void (*function)(void))
-{
- /*
- * Vector should be allocated, so no locking needed.
- */
- KASSERT(idt_allocmap[vec] == 1);
- setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
-}
-
-void
-idt_vec_free(int vec)
-{
- simple_lock(&idt_lock);
- unsetgate(&idt[vec]);
- idt_allocmap[vec] = 0;
- simple_unlock(&idt_lock);
-}
-
-/*
- * Number of processes is limited by number of available GDT slots.
- */
-int
-cpu_maxproc(void)
-{
-#ifdef USER_LDT
- return ((MAXGDTSIZ - NGDT) / 2);
-#else
- return (MAXGDTSIZ - NGDT);
-#endif
-}
-
-#if defined(DDB) || defined(KGDB)
-
-/*
- * Callback to output a backtrace when entering ddb.
- */
-void
-ddb_trap_hook(int where)
-{
- static int once = 0;
- db_addr_t db_dot;
-
- if (once != 0 || where != 1)
- return;
- once = 1;
-
- if (curlwp != NULL) {
- db_printf("Stopped");
- if (curproc == NULL)
- db_printf("; curlwp = %p,"
- " curproc is NULL at\t", curlwp);
- else
- db_printf(" in pid %d.%d (%s) at\t",
- curproc->p_pid, curlwp->l_lid,
- curproc->p_comm);
- } else
- db_printf("Stopped at\t");
- db_dot = PC_REGS(DDB_REGS);
- db_print_loc_and_inst(db_dot);
-
- db_stack_trace_print((db_expr_t) db_dot, FALSE, 65535,
- "", db_printf);
-#ifdef DEBUG
- db_show_regs((db_expr_t) db_dot, FALSE, 65535, "");
-#endif
-}
-
-#endif /* DDB || KGDB */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/i386/pmap.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/pmap.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,4522 +0,0 @@
-/* $NetBSD: pmap.c,v 1.1.2.1 2004/05/22 15:57:52 he Exp $ */
-/* NetBSD: pmap.c,v 1.172 2004/04/12 13:17:46 yamt Exp */
-
-/*
- *
- * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Charles D. Cranor and
- * Washington University.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * pmap.c: i386 pmap module rewrite
- * Chuck Cranor <chuck@xxxxxxxxxxxxxx>
- * 11-Aug-97
- *
- * history of this pmap module: in addition to my own input, i used
- * the following references for this rewrite of the i386 pmap:
- *
- * [1] the NetBSD i386 pmap. this pmap appears to be based on the
- * BSD hp300 pmap done by Mike Hibler at University of Utah.
- * it was then ported to the i386 by William Jolitz of UUNET
- * Technologies, Inc. Then Charles M. Hannum of the NetBSD
- * project fixed some bugs and provided some speed ups.
- *
- * [2] the FreeBSD i386 pmap. this pmap seems to be the
- * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
- * and David Greenman.
- *
- * [3] the Mach pmap. this pmap, from CMU, seems to have migrated
- * between several processors. the VAX version was done by
- * Avadis Tevanian, Jr., and Michael Wayne Young. the i386
- * version was done by Lance Berc, Mike Kupfer, Bob Baron,
- * David Golub, and Richard Draves. the alpha version was
- * done by Alessandro Forin (CMU/Mach) and Chris Demetriou
- * (NetBSD/alpha).
- */
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.1.2.1 2004/05/22 15:57:52 he Exp $");
-
-#include "opt_cputype.h"
-#include "opt_user_ldt.h"
-#include "opt_largepages.h"
-#include "opt_lockdebug.h"
-#include "opt_multiprocessor.h"
-#include "opt_kstack_dr0.h"
-#include "opt_xen.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/malloc.h>
-#include <sys/pool.h>
-#include <sys/user.h>
-#include <sys/kernel.h>
-
-#include <uvm/uvm.h>
-
-#include <machine/atomic.h>
-#include <machine/cpu.h>
-#include <machine/specialreg.h>
-#include <machine/gdt.h>
-
-#include <dev/isa/isareg.h>
-#include <machine/isa_machdep.h>
-
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-#include <machine/xenpmap.h>
-
-void xpmap_find_pte(paddr_t);
-
-/* #define XENDEBUG */
-
-#ifdef XENDEBUG
-#define XENPRINTF(x) printf x
-#define XENPRINTK(x) printf x
-#else
-#define XENPRINTF(x)
-#define XENPRINTK(x)
-#endif
-#define PRINTF(x) printf x
-#define PRINTK(x) printf x
-
-
-/*
- * general info:
- *
- * - for an explanation of how the i386 MMU hardware works see
- * the comments in <machine/pte.h>.
- *
- * - for an explanation of the general memory structure used by
- * this pmap (including the recursive mapping), see the comments
- * in <machine/pmap.h>.
- *
- * this file contains the code for the "pmap module." the module's
- * job is to manage the hardware's virtual to physical address mappings.
- * note that there are two levels of mapping in the VM system:
- *
- * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
- * to map ranges of virtual address space to objects/files. for
- * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
- * to the file /bin/ls starting at offset zero." note that
- * the upper layer mapping is not concerned with how individual
- * vm_pages are mapped.
- *
- * [2] the lower layer of the VM system (the pmap) maintains the mappings
- * from virtual addresses. it is concerned with which vm_page is
- * mapped where. for example, when you run /bin/ls and start
- * at page 0x1000 the fault routine may lookup the correct page
- * of the /bin/ls file and then ask the pmap layer to establish
- * a mapping for it.
- *
- * note that information in the lower layer of the VM system can be
- * thrown away since it can easily be reconstructed from the info
- * in the upper layer.
- *
- * data structures we use include:
- *
- * - struct pmap: describes the address space of one thread
- * - struct pv_entry: describes one <PMAP,VA> mapping of a PA
- * - struct pv_head: there is one pv_head per managed page of
- * physical memory. the pv_head points to a list of pv_entry
- * structures which describe all the <PMAP,VA> pairs that this
- * page is mapped in. this is critical for page based operations
- * such as pmap_page_protect() [change protection on _all_ mappings
- * of a page]
- * - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's.
- * if we run out of pv_entry's we allocate a new pv_page and free
- * its pv_entrys.
- * - pmap_remove_record: a list of virtual addresses whose mappings
- * have been changed. used for TLB flushing.
- */
-
-/*
- * memory allocation
- *
- * - there are three data structures that we must dynamically allocate:
- *
- * [A] new process' page directory page (PDP)
- * - plan 1: done at pmap_create() we use
- * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this
- * allocation.
- *
- * if we are low in free physical memory then we sleep in
- * uvm_km_alloc -- in this case this is ok since we are creating
- * a new pmap and should not be holding any locks.
- *
- * if the kernel is totally out of virtual space
- * (i.e. uvm_km_alloc returns NULL), then we panic.
- *
- * XXX: the fork code currently has no way to return an "out of
- * memory, try again" error code since uvm_fork [fka vm_fork]
- * is a void function.
- *
- * [B] new page tables pages (PTP)
- * - call uvm_pagealloc()
- * => success: zero page, add to pm_pdir
- * => failure: we are out of free vm_pages, let pmap_enter()
- * tell UVM about it.
- *
- * note: for kernel PTPs, we start with NKPTP of them. as we map
- * kernel memory (at uvm_map time) we check to see if we've grown
- * the kernel pmap. if so, we call the optional function
- * pmap_growkernel() to grow the kernel PTPs in advance.
- *
- * [C] pv_entry structures
- * - plan 1: try to allocate one off the free list
- * => success: done!
- * => failure: no more free pv_entrys on the list
- * - plan 2: try to allocate a new pv_page to add a chunk of
- * pv_entrys to the free list
- * [a] obtain a free, unmapped, VA in kmem_map. either
- * we have one saved from a previous call, or we allocate
- * one now using a "vm_map_lock_try" in uvm_map
- * => success: we have an unmapped VA, continue to [b]
- * => failure: unable to lock kmem_map or out of VA in it.
- * move on to plan 3.
- * [b] allocate a page in kmem_object for the VA
- * => success: map it in, free the pv_entry's, DONE!
- * => failure: kmem_object locked, no free vm_pages, etc.
- * save VA for later call to [a], go to plan 3.
- * If we fail, we simply let pmap_enter() tell UVM about it.
- */
-
-/*
- * locking
- *
- * we have the following locks that we must contend with:
- *
- * "normal" locks:
- *
- * - pmap_main_lock
- * this lock is used to prevent deadlock and/or provide mutex
- * access to the pmap system. most operations lock the pmap
- * structure first, then they lock the pv_lists (if needed).
- * however, some operations such as pmap_page_protect lock
- * the pv_lists and then lock pmaps. in order to prevent a
- * cycle, we require a mutex lock when locking the pv_lists
- * first. thus, the "pmap = >pv_list" lockers must gain a
- * read-lock on pmap_main_lock before locking the pmap. and
- * the "pv_list => pmap" lockers must gain a write-lock on
- * pmap_main_lock before locking. since only one thread
- * can write-lock a lock at a time, this provides mutex.
- *
- * "simple" locks:
- *
- * - pmap lock (per pmap, part of uvm_object)
- * this lock protects the fields in the pmap structure including
- * the non-kernel PDEs in the PDP, and the PTEs. it also locks
- * in the alternate PTE space (since that is determined by the
- * entry in the PDP).
- *
- * - pvh_lock (per pv_head)
- * this lock protects the pv_entry list which is chained off the
- * pv_head structure for a specific managed PA. it is locked
- * when traversing the list (e.g. adding/removing mappings,
- * syncing R/M bits, etc.)
- *
- * - pvalloc_lock
- * this lock protects the data structures which are used to manage
- * the free list of pv_entry structures.
- *
- * - pmaps_lock
- * this lock protects the list of active pmaps (headed by "pmaps").
- * we lock it when adding or removing pmaps from this list.
- *
- */
-
-/*
- * locking data structures
- */
-
-static struct simplelock pvalloc_lock;
-static struct simplelock pmaps_lock;
-
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
-static struct lock pmap_main_lock;
-
-#define PMAP_MAP_TO_HEAD_LOCK() \
- (void) spinlockmgr(&pmap_main_lock, LK_SHARED, NULL)
-#define PMAP_MAP_TO_HEAD_UNLOCK() \
- (void) spinlockmgr(&pmap_main_lock, LK_RELEASE, NULL)
-
-#define PMAP_HEAD_TO_MAP_LOCK() \
- (void) spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, NULL)
-#define PMAP_HEAD_TO_MAP_UNLOCK() \
- spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0)
-
-#else
-
-#define PMAP_MAP_TO_HEAD_LOCK() /* null */
-#define PMAP_MAP_TO_HEAD_UNLOCK() /* null */
-
-#define PMAP_HEAD_TO_MAP_LOCK() /* null */
-#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */
-
-#endif
-
-#define COUNT(x) /* nothing */
-
-/*
- * TLB Shootdown:
- *
- * When a mapping is changed in a pmap, the TLB entry corresponding to
- * the virtual address must be invalidated on all processors. In order
- * to accomplish this on systems with multiple processors, messages are
- * sent from the processor which performs the mapping change to all
- * processors on which the pmap is active. For other processors, the
- * ASN generation numbers for that processor is invalidated, so that
- * the next time the pmap is activated on that processor, a new ASN
- * will be allocated (which implicitly invalidates all TLB entries).
- *
- * Shootdown job queue entries are allocated using a simple special-
- * purpose allocator for speed.
- */
-struct pmap_tlb_shootdown_job {
- TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
- vaddr_t pj_va; /* virtual address */
- pmap_t pj_pmap; /* the pmap which maps the address */
- pt_entry_t pj_pte; /* the PTE bits */
- struct pmap_tlb_shootdown_job *pj_nextfree;
-};
-
-#define PMAP_TLB_SHOOTDOWN_JOB_ALIGN 32
-union pmap_tlb_shootdown_job_al {
- struct pmap_tlb_shootdown_job pja_job;
- char pja_align[PMAP_TLB_SHOOTDOWN_JOB_ALIGN];
-};
-
-struct pmap_tlb_shootdown_q {
- TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;
- int pq_pte; /* aggregate PTE bits */
- int pq_count; /* number of pending requests */
- __cpu_simple_lock_t pq_slock; /* spin lock on queue */
- int pq_flushg; /* pending flush global */
- int pq_flushu; /* pending flush user */
-} pmap_tlb_shootdown_q[X86_MAXPROCS];
-
-#define PMAP_TLB_MAXJOBS 16
-
-void pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *);
-struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get
- (struct pmap_tlb_shootdown_q *);
-void pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *,
- struct pmap_tlb_shootdown_job *);
-
-__cpu_simple_lock_t pmap_tlb_shootdown_job_lock;
-union pmap_tlb_shootdown_job_al *pj_page, *pj_free;
-
-/*
- * global data structures
- */
-
-struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
-
-/*
- * nkpde is the number of kernel PTPs allocated for the kernel at
- * boot time (NKPTP is a compile time override). this number can
- * grow dynamically as needed (but once allocated, we never free
- * kernel PTPs).
- */
-
-int nkpde = NKPTP;
-#ifdef NKPDE
-#error "obsolete NKPDE: use NKPTP"
-#endif
-
-/*
- * pmap_pg_g: if our processor supports PG_G in the PTE then we
- * set pmap_pg_g to PG_G (otherwise it is zero).
- */
-
-int pmap_pg_g = 0;
-
-#ifdef LARGEPAGES
-/*
- * pmap_largepages: if our processor supports PG_PS and we are
- * using it, this is set to TRUE.
- */
-
-int pmap_largepages;
-#endif
-
-/*
- * i386 physical memory comes in a big contig chunk with a small
- * hole toward the front of it... the following two paddr_t's
- * (shared with machdep.c) describe the physical address space
- * of this machine.
- */
-paddr_t avail_start; /* PA of first available physical page */
-paddr_t avail_end; /* PA of last available physical page */
-
-paddr_t pmap_pa_start; /* PA of first physical page for this domain */
-paddr_t pmap_pa_end; /* PA of last physical page for this domain */
-
- /* MA of last physical page of the machine */
-paddr_t pmap_mem_end = HYPERVISOR_VIRT_START; /* updated for domain-0 */
-
-/*
- * other data structures
- */
-
-static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */
-static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */
-
-/*
- * the following two vaddr_t's are used during system startup
- * to keep track of how much of the kernel's VM space we have used.
- * once the system is started, the management of the remaining kernel
- * VM space is turned over to the kernel_map vm_map.
- */
-
-static vaddr_t virtual_avail; /* VA of first free KVA */
-static vaddr_t virtual_end; /* VA of last free KVA */
-
-
-/*
- * pv_page management structures: locked by pvalloc_lock
- */
-
-TAILQ_HEAD(pv_pagelist, pv_page);
-static struct pv_pagelist pv_freepages; /* list of pv_pages with free
entrys */
-static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */
-static int pv_nfpvents; /* # of free pv entries */
-static struct pv_page *pv_initpage; /* bootstrap page from kernel_map */
-static vaddr_t pv_cachedva; /* cached VA for later use */
-
-#define PVE_LOWAT (PVE_PER_PVPAGE / 2) /* free pv_entry low water mark */
-#define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2))
- /* high water mark */
-
-static __inline int
-pv_compare(struct pv_entry *a, struct pv_entry *b)
-{
- if (a->pv_pmap < b->pv_pmap)
- return (-1);
- else if (a->pv_pmap > b->pv_pmap)
- return (1);
- else if (a->pv_va < b->pv_va)
- return (-1);
- else if (a->pv_va > b->pv_va)
- return (1);
- else
- return (0);
-}
-
-SPLAY_PROTOTYPE(pvtree, pv_entry, pv_node, pv_compare);
-SPLAY_GENERATE(pvtree, pv_entry, pv_node, pv_compare);
-
-/*
- * linked list of all non-kernel pmaps
- */
-
-static struct pmap_head pmaps;
-
-/*
- * pool that pmap structures are allocated from
- */
-
-struct pool pmap_pmap_pool;
-
-/*
- * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a
- * X86_MAXPROCS*NPTECL array of PTE's, to avoid cache line thrashing
- * due to false sharing.
- */
-
-#ifdef MULTIPROCESSOR
-#define PTESLEW(pte, id) ((pte)+(id)*NPTECL)
-#define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE)
-#else
-#define PTESLEW(pte, id) (pte)
-#define VASLEW(va,id) (va)
-#endif
-
-/*
- * special VAs and the PTEs that map them
- */
-static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte;
-static caddr_t csrcp, cdstp, zerop, ptpp;
-
-/*
- * pool and cache that PDPs are allocated from
- */
-
-struct pool pmap_pdp_pool;
-struct pool_cache pmap_pdp_cache;
-u_int pmap_pdp_cache_generation;
-
-int pmap_pdp_ctor(void *, void *, int);
-void pmap_pdp_dtor(void *, void *);
-
-caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */
-
-extern vaddr_t msgbuf_vaddr;
-extern paddr_t msgbuf_paddr;
-
-extern vaddr_t idt_vaddr; /* we allocate IDT early */
-extern paddr_t idt_paddr;
-
-#if defined(I586_CPU)
-/* stuff to fix the pentium f00f bug */
-extern vaddr_t pentium_idt_vaddr;
-#endif
-
-
-/*
- * local prototypes
- */
-
-static struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t);
-static struct vm_page *pmap_alloc_ptp(struct pmap *, int);
-static struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below
*/
-#define ALLOCPV_NEED 0 /* need PV now */
-#define ALLOCPV_TRY 1 /* just try to allocate, don't steal */
-#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */
-static struct pv_entry *pmap_alloc_pvpage(struct pmap *, int);
-static void pmap_enter_pv(struct pv_head *,
- struct pv_entry *, struct pmap *,
- vaddr_t, struct vm_page *);
-static void pmap_free_pv(struct pmap *, struct pv_entry *);
-static void pmap_free_pvs(struct pmap *, struct pv_entry *);
-static void pmap_free_pv_doit(struct pv_entry *);
-static void pmap_free_pvpage(void);
-static struct vm_page *pmap_get_ptp(struct pmap *, int);
-static boolean_t pmap_is_curpmap(struct pmap *);
-static boolean_t pmap_is_active(struct pmap *, int);
-static pt_entry_t *pmap_map_ptes(struct pmap *);
-static struct pv_entry *pmap_remove_pv(struct pv_head *, struct pmap *,
- vaddr_t);
-static void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);
-static boolean_t pmap_remove_pte(struct pmap *, struct vm_page *,
- pt_entry_t *, vaddr_t, int32_t *, int);
-static void pmap_remove_ptes(struct pmap *, struct vm_page *,
- vaddr_t, vaddr_t, vaddr_t, int32_t *,
- int);
-#define PMAP_REMOVE_ALL 0 /* remove all mappings */
-#define PMAP_REMOVE_SKIPWIRED 1 /* skip wired mappings */
-
-static vaddr_t pmap_tmpmap_pa(paddr_t);
-static pt_entry_t *pmap_tmpmap_pvepte(struct pv_entry *);
-static void pmap_tmpunmap_pa(void);
-static void pmap_tmpunmap_pvepte(struct pv_entry *);
-static void pmap_unmap_ptes(struct pmap *);
-
-static boolean_t pmap_reactivate(struct pmap *);
-
-#ifdef DEBUG
-u_int curapdp;
-#endif
-
-/*
- * p m a p i n l i n e h e l p e r f u n c t i o n s
- */
-
-/*
- * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
- * of course the kernel is always loaded
- */
-
-__inline static boolean_t
-pmap_is_curpmap(pmap)
- struct pmap *pmap;
-{
-
- return((pmap == pmap_kernel()) ||
- (pmap == curcpu()->ci_pmap));
-}
-
-/*
- * pmap_is_active: is this pmap loaded into the specified processor's %cr3?
- */
-
-__inline static boolean_t
-pmap_is_active(pmap, cpu_id)
- struct pmap *pmap;
- int cpu_id;
-{
-
- return (pmap == pmap_kernel() ||
- (pmap->pm_cpus & (1U << cpu_id)) != 0);
-}
-
-/*
- * pmap_tmpmap_pa: map a page in for tmp usage
- */
-
-__inline static vaddr_t
-pmap_tmpmap_pa(pa)
- paddr_t pa;
-{
-#ifdef MULTIPROCESSOR
- int id = cpu_number();
-#endif
- pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
- pt_entry_t *maptp;
- caddr_t ptpva = VASLEW(ptpp, id);
-#if defined(DIAGNOSTIC)
- if (*ptpte)
- panic("pmap_tmpmap_pa: ptp_pte in use?");
-#endif
- maptp = (pt_entry_t *)vtomach((vaddr_t)ptpte);
- PTE_SET(ptpte, maptp, PG_V | PG_RW | pa); /* always a new mapping */
- return((vaddr_t)ptpva);
-}
-
-/*
- * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa)
- */
-
-__inline static void
-pmap_tmpunmap_pa()
-{
-#ifdef MULTIPROCESSOR
- int id = cpu_number();
-#endif
- pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
- pt_entry_t *maptp;
- caddr_t ptpva = VASLEW(ptpp, id);
-#if defined(DIAGNOSTIC)
- if (!pmap_valid_entry(*ptp_pte))
- panic("pmap_tmpunmap_pa: our pte invalid?");
-#endif
- maptp = (pt_entry_t *)vtomach((vaddr_t)ptpte);
- PTE_CLEAR(ptpte, maptp); /* zap! */
- pmap_update_pg((vaddr_t)ptpva);
-#ifdef MULTIPROCESSOR
- /*
- * No need for tlb shootdown here, since ptp_pte is per-CPU.
- */
-#endif
-}
-
-/*
- * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry
- *
- * => do NOT use this on kernel mappings [why? because pv_ptp may be NULL]
- */
-
-__inline static pt_entry_t *
-pmap_tmpmap_pvepte(pve)
- struct pv_entry *pve;
-{
-#ifdef DIAGNOSTIC
- if (pve->pv_pmap == pmap_kernel())
- panic("pmap_tmpmap_pvepte: attempt to map kernel");
-#endif
-
- /* is it current pmap? use direct mapping... */
- if (pmap_is_curpmap(pve->pv_pmap))
- return(vtopte(pve->pv_va));
-
- return(((pt_entry_t *)pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp)))
- + ptei((unsigned)pve->pv_va));
-}
-
-/*
- * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte
- */
-
-__inline static void
-pmap_tmpunmap_pvepte(pve)
- struct pv_entry *pve;
-{
- /* was it current pmap? if so, return */
- if (pmap_is_curpmap(pve->pv_pmap))
- return;
-
- pmap_tmpunmap_pa();
-}
-
-__inline static void
-pmap_apte_flush(struct pmap *pmap)
-{
-#if defined(MULTIPROCESSOR)
- struct pmap_tlb_shootdown_q *pq;
- struct cpu_info *ci, *self = curcpu();
- CPU_INFO_ITERATOR cii;
- int s;
-#endif
-
- tlbflush(); /* flush TLB on current processor */
-#if defined(MULTIPROCESSOR)
- /*
- * Flush the APTE mapping from all other CPUs that
- * are using the pmap we are using (who's APTE space
- * is the one we've just modified).
- *
- * XXXthorpej -- find a way to defer the IPI.
- */
- for (CPU_INFO_FOREACH(cii, ci)) {
- if (ci == self)
- continue;
- if (pmap_is_active(pmap, ci->ci_cpuid)) {
- pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
- s = splipi();
- __cpu_simple_lock(&pq->pq_slock);
- pq->pq_flushu++;
- __cpu_simple_unlock(&pq->pq_slock);
- splx(s);
- x86_send_ipi(ci, X86_IPI_TLB);
- }
- }
-#endif
-}
-
-/*
- * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
- *
- * => we lock enough pmaps to keep things locked in
- * => must be undone with pmap_unmap_ptes before returning
- */
-
-__inline static pt_entry_t *
-pmap_map_ptes(pmap)
- struct pmap *pmap;
-{
- pd_entry_t opde;
- pd_entry_t *mapdp;
- struct pmap *ourpmap;
- struct cpu_info *ci;
-
- /* the kernel's pmap is always accessible */
- if (pmap == pmap_kernel()) {
- return(PTE_BASE);
- }
-
- ci = curcpu();
- if (ci->ci_want_pmapload &&
- vm_map_pmap(&ci->ci_curlwp->l_proc->p_vmspace->vm_map) == pmap)
- pmap_load();
-
- /* if curpmap then we are always mapped */
- if (pmap_is_curpmap(pmap)) {
- simple_lock(&pmap->pm_obj.vmobjlock);
- return(PTE_BASE);
- }
-
- ourpmap = ci->ci_pmap;
-
- /* need to lock both curpmap and pmap: use ordered locking */
- if ((unsigned) pmap < (unsigned) ourpmap) {
- simple_lock(&pmap->pm_obj.vmobjlock);
- simple_lock(&ourpmap->pm_obj.vmobjlock);
- } else {
- simple_lock(&ourpmap->pm_obj.vmobjlock);
- simple_lock(&pmap->pm_obj.vmobjlock);
- }
-
- /* need to load a new alternate pt space into curpmap? */
- COUNT(apdp_pde_map);
- opde = PDE_GET(APDP_PDE);
- if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) {
- XENPRINTF(("APDP_PDE %p %p/%p set %p/%p\n",
- pmap,
- (void *)vtophys((vaddr_t)APDP_PDE),
- (void *)xpmap_ptom(vtophys((vaddr_t)APDP_PDE)),
- (void *)pmap->pm_pdirpa,
- (void *)xpmap_ptom(pmap->pm_pdirpa)));
- mapdp = (pt_entry_t *)vtomach((vaddr_t)APDP_PDE);
- PDE_SET(APDP_PDE, mapdp, pmap->pm_pdirpa /* | PG_RW */ | PG_V);
-#ifdef DEBUG
- curapdp = pmap->pm_pdirpa;
-#endif
- if (pmap_valid_entry(opde))
- pmap_apte_flush(ourpmap);
- XENPRINTF(("APDP_PDE set done\n"));
- }
- return(APTE_BASE);
-}
-
-/*
- * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
- */
-
-__inline static void
-pmap_unmap_ptes(pmap)
- struct pmap *pmap;
-{
-#if defined(MULTIPROCESSOR)
- pd_entry_t *mapdp;
-#endif
-
- if (pmap == pmap_kernel()) {
- return;
- }
- if (pmap_is_curpmap(pmap)) {
- simple_unlock(&pmap->pm_obj.vmobjlock);
- } else {
- struct pmap *ourpmap = curcpu()->ci_pmap;
-
-#if defined(MULTIPROCESSOR)
- mapdp = (pt_entry_t *)vtomach((vaddr_t)APDP_PDE);
- PDE_CLEAR(APDP_PDE, mapdp);
- pmap_apte_flush(ourpmap);
-#endif
-#ifdef DEBUG
- curapdp = 0;
-#endif
- XENPRINTF(("APDP_PDE clear %p/%p set %p/%p\n",
- (void *)vtophys((vaddr_t)APDP_PDE),
- (void *)xpmap_ptom(vtophys((vaddr_t)APDP_PDE)),
- (void *)pmap->pm_pdirpa,
- (void *)xpmap_ptom(pmap->pm_pdirpa)));
- COUNT(apdp_pde_unmap);
- simple_unlock(&pmap->pm_obj.vmobjlock);
- simple_unlock(&ourpmap->pm_obj.vmobjlock);
- }
-}
-
-__inline static void
-pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t
npte)
-{
- if (curproc == NULL || curproc->p_vmspace == NULL ||
- pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
- return;
-
- if ((opte ^ npte) & PG_X)
- pmap_update_pg(va);
-
- /*
- * Executability was removed on the last executable change.
- * Reset the code segment to something conservative and
- * let the trap handler deal with setting the right limit.
- * We can't do that because of locking constraints on the vm map.
- */
-
- if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) {
- struct trapframe *tf = curlwp->l_md.md_regs;
- struct pcb *pcb = &curlwp->l_addr->u_pcb;
-
- pcb->pcb_cs = tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
- pm->pm_hiexec = I386_MAX_EXE_ADDR;
- }
-}
-
-__inline static pt_entry_t
-pte_mtop(pt_entry_t pte)
-{
- pt_entry_t ppte;
-
- KDASSERT(pmap_valid_entry(pte));
- ppte = xpmap_mtop(pte);
- if ((ppte & PG_FRAME) == XPMAP_OFFSET) {
- XENPRINTF(("pte_mtop: null page %08x -> %08x\n",
- ppte, pte));
- ppte = pte;
- }
-
- return ppte;
-}
-
-__inline static pt_entry_t
-pte_get_ma(pt_entry_t *pte)
-{
-
- return *pte;
-}
-
-__inline static pt_entry_t
-pte_get(pt_entry_t *pte)
-{
-
- if (pmap_valid_entry(*pte))
- return pte_mtop(*pte);
- return *pte;
-}
-
-__inline static pt_entry_t
-pte_atomic_update_ma(pt_entry_t *pte, pt_entry_t *mapte, pt_entry_t npte)
-{
- pt_entry_t opte;
-
- XENPRINTK(("pte_atomic_update_ma pte %p mapte %p npte %08x\n",
- pte, mapte, npte));
- opte = PTE_GET_MA(pte);
- if (opte > pmap_mem_end) {
- /* must remove opte unchecked */
- if (npte > pmap_mem_end)
- /* must set npte unchecked */
- xpq_queue_unchecked_pte_update(mapte, npte);
- else {
- /* must set npte checked */
- xpq_queue_unchecked_pte_update(mapte, 0);
- xpq_queue_pte_update(mapte, npte);
- }
- } else {
- /* must remove opte checked */
- if (npte > pmap_mem_end) {
- /* must set npte unchecked */
- xpq_queue_pte_update(mapte, 0);
- xpq_queue_unchecked_pte_update(mapte, npte);
- } else
- /* must set npte checked */
- xpq_queue_pte_update(mapte, npte);
- }
- xpq_flush_queue();
-
- return opte;
-}
-
-__inline static pt_entry_t
-pte_atomic_update(pt_entry_t *pte, pt_entry_t *mapte, pt_entry_t npte)
-{
- pt_entry_t opte;
-
- opte = pte_atomic_update_ma(pte, mapte, npte);
-
- return pte_mtop(opte);
-}
-
-/*
- * Fixup the code segment to cover all potential executable mappings.
- * returns 0 if no changes to the code segment were made.
- */
-
-int
-pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
-{
- struct vm_map_entry *ent;
- struct pmap *pm = vm_map_pmap(map);
- vaddr_t va = 0;
-
- vm_map_lock_read(map);
- for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
-
- /*
- * This entry has greater va than the entries before.
- * We need to make it point to the last page, not past it.
- */
-
- if (ent->protection & VM_PROT_EXECUTE)
- va = trunc_page(ent->end) - PAGE_SIZE;
- }
- vm_map_unlock_read(map);
- if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL))
- return (0);
-
- pm->pm_hiexec = va;
- if (pm->pm_hiexec > I386_MAX_EXE_ADDR) {
- pcb->pcb_cs = tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
- } else {
- pcb->pcb_cs = tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
- return (0);
- }
- return (1);
-}
-
-/*
- * p m a p k e n t e r f u n c t i o n s
- *
- * functions to quickly enter/remove pages from the kernel address
- * space. pmap_kremove is exported to MI kernel. we make use of
- * the recursive PTE mappings.
- */
-
-/*
- * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
- *
- * => no need to lock anything, assume va is already allocated
- * => should be faster than normal pmap enter function
- */
-
-void
-pmap_kenter_pa(va, pa, prot)
- vaddr_t va;
- paddr_t pa;
- vm_prot_t prot;
-{
- pt_entry_t *pte, opte, npte;
- pt_entry_t *maptp;
-
- if (va < VM_MIN_KERNEL_ADDRESS)
- pte = vtopte(va);
- else
- pte = kvtopte(va);
-
- npte = ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) |
- PG_V | pmap_pg_g;
-
- if (pa >= pmap_pa_start && pa < pmap_pa_end) {
- npte |= xpmap_ptom(pa);
- } else {
- XENPRINTF(("pmap_kenter: va %08lx outside pa range %08lx\n",
- va, pa));
- npte |= pa;
- }
-
- maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
- opte = pte_atomic_update_ma(pte, maptp, npte); /* zap! */
- XENPRINTK(("pmap_kenter_pa(%p,%p) %p, was %08x now %08x\n", (void *)va,
- (void *)pa, pte, opte, npte));
-#ifdef LARGEPAGES
- /* XXX For now... */
- if (opte & PG_PS)
- panic("pmap_kenter_pa: PG_PS");
-#endif
- if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
-#if defined(MULTIPROCESSOR)
- int32_t cpumask = 0;
-
- pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);
- pmap_tlb_shootnow(cpumask);
-#else
- /* Don't bother deferring in the single CPU case. */
- pmap_update_pg(va);
-#endif
- }
-}
-
-/*
- * pmap_kenter_ma: enter a kernel mapping without R/M (pv_entry) tracking
- *
- * => no need to lock anything, assume va is already allocated
- * => should be faster than normal pmap enter function
- */
-
-void pmap_kenter_ma __P((vaddr_t, paddr_t, vm_prot_t));
-
-void
-pmap_kenter_ma(va, ma, prot)
- vaddr_t va;
- paddr_t ma;
- vm_prot_t prot;
-{
- pt_entry_t *pte, opte, npte;
- pt_entry_t *maptp;
-
- KASSERT (va >= VM_MIN_KERNEL_ADDRESS);
- pte = kvtopte(va);
-
- npte = ma | ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) |
- PG_V | pmap_pg_g;
-
- maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
- opte = pte_atomic_update_ma(pte, maptp, npte); /* zap! */
- XENPRINTK(("pmap_kenter_ma(%p,%p) %p, was %08x\n", (void *)va,
- (void *)ma, pte, opte));
-#ifdef LARGEPAGES
- /* XXX For now... */
- if (opte & PG_PS)
- panic("pmap_kenter_ma: PG_PS");
-#endif
- if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
-#if defined(MULTIPROCESSOR)
- int32_t cpumask = 0;
-
- pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);
- pmap_tlb_shootnow(cpumask);
-#else
- /* Don't bother deferring in the single CPU case. */
- pmap_update_pg(va);
-#endif
- }
-}
-
-/*
- * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
- *
- * => no need to lock anything
- * => caller must dispose of any vm_page mapped in the va range
- * => note: not an inline function
- * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
- * => we assume kernel only unmaps valid addresses and thus don't bother
- * checking the valid bit before doing TLB flushing
- */
-
-void
-pmap_kremove(va, len)
- vaddr_t va;
- vsize_t len;
-{
- pt_entry_t *pte, opte;
- pt_entry_t *maptp;
- int32_t cpumask = 0;
-
- XENPRINTK(("pmap_kremove va %p, len %08lx\n", (void *)va, len));
- len >>= PAGE_SHIFT;
- for ( /* null */ ; len ; len--, va += PAGE_SIZE) {
- if (va < VM_MIN_KERNEL_ADDRESS)
- pte = vtopte(va);
- else
- pte = kvtopte(va);
- maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
- opte = pte_atomic_update_ma(pte, maptp, 0); /* zap! */
- XENPRINTK(("pmap_kremove pte %p, was %08x\n", pte, opte));
-#ifdef LARGEPAGES
- /* XXX For now... */
- if (opte & PG_PS)
- panic("pmap_kremove: PG_PS");
-#endif
-#ifdef DIAGNOSTIC
- if (opte & PG_PVLIST)
- panic("pmap_kremove: PG_PVLIST mapping for 0x%lx",
- va);
-#endif
- if ((opte & (PG_V | PG_U)) == (PG_V | PG_U))
- pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);
- }
- pmap_tlb_shootnow(cpumask);
-}
-
-/*
- * p m a p i n i t f u n c t i o n s
- *
- * pmap_bootstrap and pmap_init are called during system startup
- * to init the pmap module. pmap_bootstrap() does a low level
- * init just to get things rolling. pmap_init() finishes the job.
- */
-
-/*
- * pmap_bootstrap: get the system in a state where it can run with VM
- * properly enabled (called before main()). the VM system is
- * fully init'd later...
- *
- * => on i386, locore.s has already enabled the MMU by allocating
- * a PDP for the kernel, and nkpde PTP's for the kernel.
- * => kva_start is the first free virtual address in kernel space
- */
-
-void
-pmap_bootstrap(kva_start)
- vaddr_t kva_start;
-{
- struct pmap *kpm;
- vaddr_t kva;
- pt_entry_t *pte;
- pt_entry_t *maptp;
- int i;
-
- /*
- * set up our local static global vars that keep track of the
- * usage of KVM before kernel_map is set up
- */
-
- virtual_avail = kva_start; /* first free KVA */
- virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */
-
- /*
- * find out where physical memory ends on the real hardware.
- */
-
- if (xen_start_info.flags & SIF_PRIVILEGED)
- pmap_mem_end = find_pmap_mem_end(kva_start);
-
- /*
- * set up protection_codes: we need to be able to convert from
- * a MI protection code (some combo of VM_PROT...) to something
- * we can jam into a i386 PTE.
- */
-
- protection_codes[VM_PROT_NONE] = 0; /* --- */
- protection_codes[VM_PROT_EXECUTE] = PG_X; /* --x */
- protection_codes[VM_PROT_READ] = PG_RO; /* -r- */
- protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO|PG_X;/* -rx */
- protection_codes[VM_PROT_WRITE] = PG_RW; /* w-- */
- protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW|PG_X;/* w-x */
- protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW; /* wr- */
- protection_codes[VM_PROT_ALL] = PG_RW|PG_X; /* wrx */
-
- /*
- * now we init the kernel's pmap
- *
- * the kernel pmap's pm_obj is not used for much. however, in
- * user pmaps the pm_obj contains the list of active PTPs.
- * the pm_obj currently does not have a pager. it might be possible
- * to add a pager that would allow a process to read-only mmap its
- * own page tables (fast user level vtophys?). this may or may not
- * be useful.
- */
-
- kpm = pmap_kernel();
- simple_lock_init(&kpm->pm_obj.vmobjlock);
- kpm->pm_obj.pgops = NULL;
- TAILQ_INIT(&kpm->pm_obj.memq);
- kpm->pm_obj.uo_npages = 0;
- kpm->pm_obj.uo_refs = 1;
- memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */
- kpm->pm_pdir = (pd_entry_t *)(lwp0.l_addr->u_pcb.pcb_cr3 + KERNBASE);
- XENPRINTF(("pm_pdirpa %p PTDpaddr %p\n",
- (void *)lwp0.l_addr->u_pcb.pcb_cr3, (void *)PTDpaddr));
- kpm->pm_pdirpa = (u_int32_t) lwp0.l_addr->u_pcb.pcb_cr3;
- kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
- x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS);
-
- /*
- * the above is just a rough estimate and not critical to the proper
- * operation of the system.
- */
-
- /*
- * Begin to enable global TLB entries if they are supported.
- * The G bit has no effect until the CR4_PGE bit is set in CR4,
- * which happens in cpu_init(), which is run on each cpu
- * (and happens later)
- */
-
- if (cpu_feature & CPUID_PGE) {
- pmap_pg_g = PG_G; /* enable software */
-
- /* add PG_G attribute to already mapped kernel pages */
- for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ;
- kva += PAGE_SIZE)
- if (pmap_valid_entry(PTE_BASE[x86_btop(kva)])) {
-#if !defined(XEN)
- PTE_BASE[x86_btop(kva)] |= PG_G;
-#else
- maptp = (pt_entry_t *)vtomach(
- (vaddr_t)&PTE_BASE[x86_btop(kva)]);
- PTE_SETBITS(&PTE_BASE[x86_btop(kva)], maptp,
- PG_G);
- }
- PTE_UPDATES_FLUSH();
-#endif
- }
-
-#ifdef LARGEPAGES
- /*
- * enable large pages if they are supported.
- */
-
- if (cpu_feature & CPUID_PSE) {
- paddr_t pa;
- vaddr_t kva_end;
- pd_entry_t *pde;
- pd_entry_t *mapdp;
- extern char _etext;
-
- lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */
- pmap_largepages = 1; /* enable software */
-
- /*
- * the TLB must be flushed after enabling large pages
- * on Pentium CPUs, according to section 3.6.2.2 of
- * "Intel Architecture Software Developer's Manual,
- * Volume 3: System Programming".
- */
- tlbflush();
-
- /*
- * now, remap the kernel text using large pages. we
- * assume that the linker has properly aligned the
- * .data segment to a 4MB boundary.
- */
- kva_end = roundup((vaddr_t)&_etext, NBPD);
- for (pa = 0, kva = KERNBASE; kva < kva_end;
- kva += NBPD, pa += NBPD) {
- pde = &kpm->pm_pdir[pdei(kva)];
- mapdp = (pt_entry_t *)vtomach((vaddr_t)pde);
- PDE_SET(pde, mapdp, pa | pmap_pg_g | PG_PS |
- PG_KR | PG_V); /* zap! */
- tlbflush();
- }
- }
-#endif /* LARGEPAGES */
-
- /*
- * now we allocate the "special" VAs which are used for tmp mappings
- * by the pmap (and other modules). we allocate the VAs by advancing
- * virtual_avail (note that there are no pages mapped at these VAs).
- * we find the PTE that maps the allocated VA via the linear PTE
- * mapping.
- */
-
- pte = PTE_BASE + x86_btop(virtual_avail);
-
-#ifdef MULTIPROCESSOR
- /*
- * Waste some VA space to avoid false sharing of cache lines
- * for page table pages: Give each possible CPU a cache line
- * of PTE's (8) to play with, though we only need 4. We could
- * recycle some of this waste by putting the idle stacks here
- * as well; we could waste less space if we knew the largest
- * CPU ID beforehand.
- */
- csrcp = (caddr_t) virtual_avail; csrc_pte = pte;
-
- cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1;
-
- zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2;
-
- ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3;
-
- virtual_avail += PAGE_SIZE * X86_MAXPROCS * NPTECL;
- pte += X86_MAXPROCS * NPTECL;
-#else
- csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */
- virtual_avail += PAGE_SIZE; pte++; /* advance
*/
-
- cdstp = (caddr_t) virtual_avail; cdst_pte = pte;
- virtual_avail += PAGE_SIZE; pte++;
-
- zerop = (caddr_t) virtual_avail; zero_pte = pte;
- virtual_avail += PAGE_SIZE; pte++;
-
- ptpp = (caddr_t) virtual_avail; ptp_pte = pte;
- virtual_avail += PAGE_SIZE; pte++;
-#endif
-
- XENPRINTK(("pmap_bootstrap csrcp %p cdstp %p zerop %p ptpp %p\n",
- csrc_pte, cdst_pte, zero_pte, ptp_pte));
- /*
- * Nothing after this point actually needs pte;
- */
- pte = (void *)0xdeadbeef;
-
- /* XXX: vmmap used by mem.c... should be uvm_map_reserve */
- vmmap = (char *)virtual_avail; /* don't need pte */
- virtual_avail += PAGE_SIZE;
-
- msgbuf_vaddr = virtual_avail; /* don't need pte */
- virtual_avail += round_page(MSGBUFSIZE);
-
- idt_vaddr = virtual_avail; /* don't need pte */
- virtual_avail += PAGE_SIZE;
- idt_paddr = avail_start; /* steal a page */
- avail_start += PAGE_SIZE;
-
-#if defined(I586_CPU)
- /* pentium f00f bug stuff */
- pentium_idt_vaddr = virtual_avail; /* don't need pte */
- virtual_avail += PAGE_SIZE;
-#endif
-
- /*
- * now we reserve some VM for mapping pages when doing a crash dump
- */
-
- virtual_avail = reserve_dumppages(virtual_avail);
-
- /*
- * init the static-global locks and global lists.
- */
-
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
- spinlockinit(&pmap_main_lock, "pmaplk", 0);
-#endif
- simple_lock_init(&pvalloc_lock);
- simple_lock_init(&pmaps_lock);
- LIST_INIT(&pmaps);
- TAILQ_INIT(&pv_freepages);
- TAILQ_INIT(&pv_unusedpgs);
-
- /*
- * initialize the pmap pool.
- */
-
- pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
- &pool_allocator_nointr);
-
- /*
- * Initialize the TLB shootdown queues.
- */
-
- __cpu_simple_lock_init(&pmap_tlb_shootdown_job_lock);
-
- for (i = 0; i < X86_MAXPROCS; i++) {
- TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
- __cpu_simple_lock_init(&pmap_tlb_shootdown_q[i].pq_slock);
- }
-
- /*
- * initialize the PDE pool and cache.
- */
- pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, 0, 0, "pdppl",
- &pool_allocator_nointr);
- pool_cache_init(&pmap_pdp_cache, &pmap_pdp_pool,
- pmap_pdp_ctor, pmap_pdp_dtor, NULL);
-
- /*
- * ensure the TLB is sync'd with reality by flushing it...
- */
-
- tlbflush();
-}
-
-/*
- * pmap_init: called from uvm_init, our job is to get the pmap
- * system ready to manage mappings... this mainly means initing
- * the pv_entry stuff.
- */
-
-void
-pmap_init()
-{
- int i;
-
- /*
- * now we need to free enough pv_entry structures to allow us to get
- * the kmem_map/kmem_object allocated and inited (done after this
- * function is finished). to do this we allocate one bootstrap page out
- * of kernel_map and use it to provide an initial pool of pv_entry
- * structures. we never free this page.
- */
-
- pv_initpage = (struct pv_page *) uvm_km_alloc(kernel_map, PAGE_SIZE);
- if (pv_initpage == NULL)
- panic("pmap_init: pv_initpage");
- pv_cachedva = 0; /* a VA we have allocated but not used yet */
- pv_nfpvents = 0;
- (void) pmap_add_pvpage(pv_initpage, FALSE);
-
- pj_page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE);
- if (pj_page == NULL)
- panic("pmap_init: pj_page");
-
- for (i = 0;
- i < (PAGE_SIZE / sizeof (union pmap_tlb_shootdown_job_al) - 1);
- i++)
- pj_page[i].pja_job.pj_nextfree = &pj_page[i + 1].pja_job;
- pj_page[i].pja_job.pj_nextfree = NULL;
- pj_free = &pj_page[0];
-
- /*
- * done: pmap module is up (and ready for business)
- */
-
- pmap_initialized = TRUE;
-}
-
-/*
- * p v _ e n t r y f u n c t i o n s
- */
-
-/*
- * pv_entry allocation functions:
- * the main pv_entry allocation functions are:
- * pmap_alloc_pv: allocate a pv_entry structure
- * pmap_free_pv: free one pv_entry
- * pmap_free_pvs: free a list of pv_entrys
- *
- * the rest are helper functions
- */
-
-/*
- * pmap_alloc_pv: inline function to allocate a pv_entry structure
- * => we lock pvalloc_lock
- * => if we fail, we call out to pmap_alloc_pvpage
- * => 3 modes:
- * ALLOCPV_NEED = we really need a pv_entry, even if we have to steal it
- * ALLOCPV_TRY = we want a pv_entry, but not enough to steal
- * ALLOCPV_NONEED = we are trying to grow our free list, don't really need
- * one now
- *
- * "try" is for optional functions like pmap_copy().
- */
-
-__inline static struct pv_entry *
-pmap_alloc_pv(pmap, mode)
- struct pmap *pmap;
- int mode;
-{
- struct pv_page *pvpage;
- struct pv_entry *pv;
-
- simple_lock(&pvalloc_lock);
-
- pvpage = TAILQ_FIRST(&pv_freepages);
- if (pvpage != NULL) {
- pvpage->pvinfo.pvpi_nfree--;
- if (pvpage->pvinfo.pvpi_nfree == 0) {
- /* nothing left in this one? */
- TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list);
- }
- pv = pvpage->pvinfo.pvpi_pvfree;
- KASSERT(pv);
- pvpage->pvinfo.pvpi_pvfree = SPLAY_RIGHT(pv, pv_node);
- pv_nfpvents--; /* took one from pool */
- } else {
- pv = NULL; /* need more of them */
- }
-
- /*
- * if below low water mark or we didn't get a pv_entry we try and
- * create more pv_entrys ...
- */
-
- if (pv_nfpvents < PVE_LOWAT || pv == NULL) {
- if (pv == NULL)
- pv = pmap_alloc_pvpage(pmap, (mode == ALLOCPV_TRY) ?
- mode : ALLOCPV_NEED);
- else
- (void) pmap_alloc_pvpage(pmap, ALLOCPV_NONEED);
- }
- simple_unlock(&pvalloc_lock);
- return(pv);
-}
-
-/*
- * pmap_alloc_pvpage: maybe allocate a new pvpage
- *
- * if need_entry is false: try and allocate a new pv_page
- * if need_entry is true: try and allocate a new pv_page and return a
- * new pv_entry from it. if we are unable to allocate a pv_page
- * we make a last ditch effort to steal a pv_page from some other
- * mapping. if that fails, we panic...
- *
- * => we assume that the caller holds pvalloc_lock
- */
-
-static struct pv_entry *
-pmap_alloc_pvpage(pmap, mode)
- struct pmap *pmap;
- int mode;
-{
- struct vm_page *pg;
- struct pv_page *pvpage;
- struct pv_entry *pv;
- int s;
-
- /*
- * if we need_entry and we've got unused pv_pages, allocate from there
- */
-
- pvpage = TAILQ_FIRST(&pv_unusedpgs);
- if (mode != ALLOCPV_NONEED && pvpage != NULL) {
-
- /* move it to pv_freepages list */
- TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list);
- TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list);
-
- /* allocate a pv_entry */
- pvpage->pvinfo.pvpi_nfree--; /* can't go to zero */
- pv = pvpage->pvinfo.pvpi_pvfree;
- KASSERT(pv);
- pvpage->pvinfo.pvpi_pvfree = SPLAY_RIGHT(pv, pv_node);
- pv_nfpvents--; /* took one from pool */
- return(pv);
- }
-
- /*
- * see if we've got a cached unmapped VA that we can map a page in.
- * if not, try to allocate one.
- */
-
- if (pv_cachedva == 0) {
- s = splvm(); /* must protect kmem_map with splvm! */
- pv_cachedva = uvm_km_kmemalloc(kmem_map, NULL, PAGE_SIZE,
- UVM_KMF_TRYLOCK|UVM_KMF_VALLOC);
- splx(s);
- if (pv_cachedva == 0) {
- return (NULL);
- }
- }
-
- pg = uvm_pagealloc(NULL, pv_cachedva - vm_map_min(kernel_map), NULL,
- UVM_PGA_USERESERVE);
- if (pg == NULL)
- return (NULL);
- pg->flags &= ~PG_BUSY; /* never busy */
-
- /*
- * add a mapping for our new pv_page and free its entrys (save one!)
- *
- * NOTE: If we are allocating a PV page for the kernel pmap, the
- * pmap is already locked! (...but entering the mapping is safe...)
- */
-
- pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg),
- VM_PROT_READ | VM_PROT_WRITE);
- pmap_update(pmap_kernel());
- pvpage = (struct pv_page *) pv_cachedva;
- pv_cachedva = 0;
- return (pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED));
-}
-
-/*
- * pmap_add_pvpage: add a pv_page's pv_entrys to the free list
- *
- * => caller must hold pvalloc_lock
- * => if need_entry is true, we allocate and return one pv_entry
- */
-
-static struct pv_entry *
-pmap_add_pvpage(pvp, need_entry)
- struct pv_page *pvp;
- boolean_t need_entry;
-{
- int tofree, lcv;
-
- /* do we need to return one? */
- tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE;
-
- pvp->pvinfo.pvpi_pvfree = NULL;
- pvp->pvinfo.pvpi_nfree = tofree;
- for (lcv = 0 ; lcv < tofree ; lcv++) {
- SPLAY_RIGHT(&pvp->pvents[lcv], pv_node) =
- pvp->pvinfo.pvpi_pvfree;
- pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv];
- }
- if (need_entry)
- TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list);
- else
- TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
- pv_nfpvents += tofree;
- return((need_entry) ? &pvp->pvents[lcv] : NULL);
-}
-
-/*
- * pmap_free_pv_doit: actually free a pv_entry
- *
- * => do not call this directly! instead use either
- * 1. pmap_free_pv ==> free a single pv_entry
- * 2. pmap_free_pvs => free a list of pv_entrys
- * => we must be holding pvalloc_lock
- */
-
-__inline static void
-pmap_free_pv_doit(pv)
- struct pv_entry *pv;
-{
- struct pv_page *pvp;
-
- pvp = (struct pv_page *) x86_trunc_page(pv);
- pv_nfpvents++;
- pvp->pvinfo.pvpi_nfree++;
-
- /* nfree == 1 => fully allocated page just became partly allocated */
- if (pvp->pvinfo.pvpi_nfree == 1) {
- TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list);
- }
-
- /* free it */
- SPLAY_RIGHT(pv, pv_node) = pvp->pvinfo.pvpi_pvfree;
- pvp->pvinfo.pvpi_pvfree = pv;
-
- /*
- * are all pv_page's pv_entry's free? move it to unused queue.
- */
-
- if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) {
- TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list);
- TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
- }
-}
-
-/*
- * pmap_free_pv: free a single pv_entry
- *
- * => we gain the pvalloc_lock
- */
-
-__inline static void
-pmap_free_pv(pmap, pv)
- struct pmap *pmap;
- struct pv_entry *pv;
-{
- simple_lock(&pvalloc_lock);
- pmap_free_pv_doit(pv);
-
- /*
- * Can't free the PV page if the PV entries were associated with
- * the kernel pmap; the pmap is already locked.
- */
- if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
- pmap != pmap_kernel())
- pmap_free_pvpage();
-
- simple_unlock(&pvalloc_lock);
-}
-
-/*
- * pmap_free_pvs: free a list of pv_entrys
- *
- * => we gain the pvalloc_lock
- */
-
-__inline static void
-pmap_free_pvs(pmap, pvs)
- struct pmap *pmap;
- struct pv_entry *pvs;
-{
- struct pv_entry *nextpv;
-
- simple_lock(&pvalloc_lock);
-
- for ( /* null */ ; pvs != NULL ; pvs = nextpv) {
- nextpv = SPLAY_RIGHT(pvs, pv_node);
- pmap_free_pv_doit(pvs);
- }
-
- /*
- * Can't free the PV page if the PV entries were associated with
- * the kernel pmap; the pmap is already locked.
- */
- if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
- pmap != pmap_kernel())
- pmap_free_pvpage();
-
- simple_unlock(&pvalloc_lock);
-}
-
-
-/*
- * pmap_free_pvpage: try and free an unused pv_page structure
- *
- * => assume caller is holding the pvalloc_lock and that
- * there is a page on the pv_unusedpgs list
- * => if we can't get a lock on the kmem_map we try again later
- */
-
-static void
-pmap_free_pvpage()
-{
- int s;
- struct vm_map *map;
- struct vm_map_entry *dead_entries;
- struct pv_page *pvp;
-
- s = splvm(); /* protect kmem_map */
-
- pvp = TAILQ_FIRST(&pv_unusedpgs);
-
- /*
- * note: watch out for pv_initpage which is allocated out of
- * kernel_map rather than kmem_map.
- */
-
- if (pvp == pv_initpage)
- map = kernel_map;
- else
- map = kmem_map;
- if (vm_map_lock_try(map)) {
-
- /* remove pvp from pv_unusedpgs */
- TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
-
- /* unmap the page */
- dead_entries = NULL;
- uvm_unmap_remove(map, (vaddr_t)pvp, ((vaddr_t)pvp) + PAGE_SIZE,
- &dead_entries);
- vm_map_unlock(map);
-
- if (dead_entries != NULL)
- uvm_unmap_detach(dead_entries, 0);
-
- pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */
- }
- if (pvp == pv_initpage)
- /* no more initpage, we've freed it */
- pv_initpage = NULL;
-
- splx(s);
-}
-
-/*
- * pmap_lock_pvhs: Lock pvh1 and optional pvh2
- * Observe locking order when locking both pvhs
- */
-
-__inline static void
-pmap_lock_pvhs(struct pv_head *pvh1, struct pv_head *pvh2)
-{
-
- if (pvh2 == NULL) {
- simple_lock(&pvh1->pvh_lock);
- return;
- }
-
- if (pvh1 < pvh2) {
- simple_lock(&pvh1->pvh_lock);
- simple_lock(&pvh2->pvh_lock);
- } else {
- simple_lock(&pvh2->pvh_lock);
- simple_lock(&pvh1->pvh_lock);
- }
-}
-
-
-/*
- * main pv_entry manipulation functions:
- * pmap_enter_pv: enter a mapping onto a pv_head list
- * pmap_remove_pv: remove a mappiing from a pv_head list
- *
- * NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock
- * the pvh before calling
- */
-
-/*
- * pmap_enter_pv: enter a mapping onto a pv_head lst
- *
- * => caller should hold the proper lock on pmap_main_lock
- * => caller should have pmap locked
- * => caller should have the pv_head locked
- * => caller should adjust ptp's wire_count before calling
- */
-
-__inline static void
-pmap_enter_pv(pvh, pve, pmap, va, ptp)
- struct pv_head *pvh;
- struct pv_entry *pve; /* preallocated pve for us to use */
- struct pmap *pmap;
- vaddr_t va;
- struct vm_page *ptp; /* PTP in pmap that maps this VA */
-{
- pve->pv_pmap = pmap;
- pve->pv_va = va;
- pve->pv_ptp = ptp; /* NULL for kernel pmap */
- SPLAY_INSERT(pvtree, &pvh->pvh_root, pve); /* add to locked list */
-}
-
-/*
- * pmap_remove_pv: try to remove a mapping from a pv_list
- *
- * => caller should hold proper lock on pmap_main_lock
- * => pmap should be locked
- * => caller should hold lock on pv_head [so that attrs can be adjusted]
- * => caller should adjust ptp's wire_count and free PTP if needed
- * => we return the removed pve
- */
-
-__inline static struct pv_entry *
-pmap_remove_pv(pvh, pmap, va)
- struct pv_head *pvh;
- struct pmap *pmap;
- vaddr_t va;
-{
- struct pv_entry tmp, *pve;
-
- tmp.pv_pmap = pmap;
- tmp.pv_va = va;
- pve = SPLAY_FIND(pvtree, &pvh->pvh_root, &tmp);
- if (pve == NULL)
- return (NULL);
- SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve);
- return(pve); /* return removed pve */
-}
-
-/*
- * p t p f u n c t i o n s
- */
-
-/*
- * pmap_alloc_ptp: allocate a PTP for a PMAP
- *
- * => pmap should already be locked by caller
- * => we use the ptp's wire_count to count the number of active mappings
- * in the PTP (we start it at one to prevent any chance this PTP
- * will ever leak onto the active/inactive queues)
- */
-
-__inline static struct vm_page *
-pmap_alloc_ptp(pmap, pde_index)
- struct pmap *pmap;
- int pde_index;
-{
- struct vm_page *ptp;
- pd_entry_t *mapdp;
-
- ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
- UVM_PGA_USERESERVE|UVM_PGA_ZERO);
- if (ptp == NULL)
- return(NULL);
-
- /* got one! */
- ptp->flags &= ~PG_BUSY; /* never busy */
- ptp->wire_count = 1; /* no mappings yet */
- mapdp = (pt_entry_t *)vtomach((vaddr_t)&pmap->pm_pdir[pde_index]);
- PDE_SET(&pmap->pm_pdir[pde_index], mapdp,
- (pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V));
- pmap->pm_stats.resident_count++; /* count PTP as resident */
- pmap->pm_ptphint = ptp;
- return(ptp);
-}
-
-/*
- * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
- *
- * => pmap should NOT be pmap_kernel()
- * => pmap should be locked
- */
-
-static struct vm_page *
-pmap_get_ptp(pmap, pde_index)
- struct pmap *pmap;
- int pde_index;
-{
- struct vm_page *ptp;
-
- if (pmap_valid_entry(pmap->pm_pdir[pde_index])) {
-
- /* valid... check hint (saves us a PA->PG lookup) */
- if (pmap->pm_ptphint &&
- (PDE_GET(&pmap->pm_pdir[pde_index]) & PG_FRAME) ==
- VM_PAGE_TO_PHYS(pmap->pm_ptphint))
- return(pmap->pm_ptphint);
-
- ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index));
-#ifdef DIAGNOSTIC
- if (ptp == NULL)
- panic("pmap_get_ptp: unmanaged user PTP");
-#endif
- pmap->pm_ptphint = ptp;
- return(ptp);
- }
-
- /* allocate a new PTP (updates ptphint) */
- return(pmap_alloc_ptp(pmap, pde_index));
-}
-
-/*
- * p m a p l i f e c y c l e f u n c t i o n s
- */
-
-/*
- * pmap_pdp_ctor: constructor for the PDP cache.
- */
-
-int
-pmap_pdp_ctor(void *arg, void *object, int flags)
-{
- pd_entry_t *pdir = object;
- paddr_t pdirpa;
-
- /*
- * NOTE: The `pmap_lock' is held when the PDP is allocated.
- * WE MUST NOT BLOCK!
- */
-
- /* fetch the physical address of the page directory. */
- (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa);
-
- XENPRINTF(("pmap_pdp_ctor %p %p\n", pdir, (void *)pdirpa));
-
- /* zero init area */
- memset(pdir, 0, PDSLOT_PTE * sizeof(pd_entry_t));
-
- /* put in recursive PDE to map the PTEs */
- pdir[PDSLOT_PTE] = xpmap_ptom(pdirpa | PG_V /* | PG_KW */);
-
- /* put in kernel VM PDEs */
- memcpy(&pdir[PDSLOT_KERN], &PDP_BASE[PDSLOT_KERN],
- nkpde * sizeof(pd_entry_t));
-
- /* zero the rest */
- memset(&pdir[PDSLOT_KERN + nkpde], 0,
- PAGE_SIZE - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
-
- pmap_enter(pmap_kernel(), (vaddr_t)pdir, pdirpa, VM_PROT_READ,
- VM_PROT_READ);
- pmap_update(pmap_kernel());
-
- /* pin page type */
- xpq_queue_pin_table(xpmap_ptom(pdirpa), XPQ_PIN_L2_TABLE);
- xpq_flush_queue();
-
- return (0);
-}
-
-void
-pmap_pdp_dtor(void *arg, void *object)
-{
- pd_entry_t *pdir = object;
- paddr_t pdirpa;
-
- /* fetch the physical address of the page directory. */
- pdirpa = PDE_GET(&pdir[PDSLOT_PTE]) & PG_FRAME;
-
- XENPRINTF(("pmap_pdp_dtor %p %p\n", pdir, (void *)pdirpa));
-
- /* unpin page type */
- xpq_queue_unpin_table(xpmap_ptom(pdirpa));
- xpq_flush_queue();
-}
-
-/*
- * pmap_create: create a pmap
- *
- * => note: old pmap interface took a "size" args which allowed for
- * the creation of "software only" pmaps (not in bsd).
- */
-
-struct pmap *
-pmap_create()
-{
- struct pmap *pmap;
- u_int gen;
-
- XENPRINTF(("pmap_create\n"));
- pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
-
- /* init uvm_object */
- simple_lock_init(&pmap->pm_obj.vmobjlock);
- pmap->pm_obj.pgops = NULL; /* currently not a mappable object */
- TAILQ_INIT(&pmap->pm_obj.memq);
- pmap->pm_obj.uo_npages = 0;
- pmap->pm_obj.uo_refs = 1;
- pmap->pm_stats.wired_count = 0;
- pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */
- pmap->pm_ptphint = NULL;
- pmap->pm_hiexec = 0;
- pmap->pm_flags = 0;
- pmap->pm_cpus = 0;
-
- /* init the LDT */
- pmap->pm_ldt = NULL;
- pmap->pm_ldt_len = 0;
- pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
-
- /* allocate PDP */
-
- /*
- * we need to lock pmaps_lock to prevent nkpde from changing on
- * us. note that there is no need to splvm to protect us from
- * malloc since malloc allocates out of a submap and we should
- * have already allocated kernel PTPs to cover the range...
- *
- * NOTE: WE MUST NOT BLOCK WHILE HOLDING THE `pmap_lock', nor
- * must we call pmap_growkernel() while holding it!
- */
-
- try_again:
- gen = pmap_pdp_cache_generation;
- pmap->pm_pdir = pool_cache_get(&pmap_pdp_cache, PR_WAITOK);
-
- simple_lock(&pmaps_lock);
-
- if (gen != pmap_pdp_cache_generation) {
- simple_unlock(&pmaps_lock);
- pool_cache_destruct_object(&pmap_pdp_cache, pmap->pm_pdir);
- goto try_again;
- }
-
- pmap->pm_pdirpa = PDE_GET(&pmap->pm_pdir[PDSLOT_PTE]) & PG_FRAME;
- XENPRINTF(("pmap_create %p set pm_pdirpa %p/%p slotval %p\n", pmap,
- (void *)pmap->pm_pdirpa,
- (void *)xpmap_ptom(pmap->pm_pdirpa),
- (void *)pmap->pm_pdir[PDSLOT_PTE]));
-
- LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
-
- simple_unlock(&pmaps_lock);
-
- return (pmap);
-}
-
-/*
- * pmap_destroy: drop reference count on pmap. free pmap if
- * reference count goes to zero.
- */
-
-void
-pmap_destroy(pmap)
- struct pmap *pmap;
-{
- int refs;
-#ifdef DIAGNOSTIC
- struct cpu_info *ci;
- CPU_INFO_ITERATOR cii;
-#endif /* DIAGNOSTIC */
-
- /*
- * drop reference count
- */
-
- simple_lock(&pmap->pm_obj.vmobjlock);
- refs = --pmap->pm_obj.uo_refs;
- simple_unlock(&pmap->pm_obj.vmobjlock);
- if (refs > 0) {
- return;
- }
-
-#ifdef DIAGNOSTIC
- for (CPU_INFO_FOREACH(cii, ci))
- if (ci->ci_pmap == pmap)
- panic("destroying pmap being used");
-#endif /* DIAGNOSTIC */
-
- /*
- * reference count is zero, free pmap resources and then free pmap.
- */
-
- XENPRINTF(("pmap_destroy %p pm_pdirpa %p/%p\n", pmap,
- (void *)pmap->pm_pdirpa,
- (void *)xpmap_ptom(pmap->pm_pdirpa)));
-
- /*
- * remove it from global list of pmaps
- */
-
- simple_lock(&pmaps_lock);
- LIST_REMOVE(pmap, pm_list);
- simple_unlock(&pmaps_lock);
-
- /*
- * destroyed pmap shouldn't have remaining PTPs
- */
-
- KASSERT(pmap->pm_obj.uo_npages == 0);
- KASSERT(TAILQ_EMPTY(&pmap->pm_obj.memq));
-
- /*
- * MULTIPROCESSOR -- no need to flush out of other processors'
- * APTE space because we do that in pmap_unmap_ptes().
- */
- pool_cache_put(&pmap_pdp_cache, pmap->pm_pdir);
-
-#ifdef USER_LDT
- if (pmap->pm_flags & PMF_USER_LDT) {
- /*
- * no need to switch the LDT; this address space is gone,
- * nothing is using it.
- *
- * No need to lock the pmap for ldt_free (or anything else),
- * we're the last one to use it.
- */
- ldt_free(pmap);
- uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt,
- pmap->pm_ldt_len * sizeof(union descriptor));
- }
-#endif
-
- pool_put(&pmap_pmap_pool, pmap);
-}
-
-/*
- * Add a reference to the specified pmap.
- */
-
-void
-pmap_reference(pmap)
- struct pmap *pmap;
-{
- simple_lock(&pmap->pm_obj.vmobjlock);
- pmap->pm_obj.uo_refs++;
- simple_unlock(&pmap->pm_obj.vmobjlock);
-}
-
-#if defined(PMAP_FORK)
-/*
- * pmap_fork: perform any necessary data structure manipulation when
- * a VM space is forked.
- */
-
-void
-pmap_fork(pmap1, pmap2)
- struct pmap *pmap1, *pmap2;
-{
- simple_lock(&pmap1->pm_obj.vmobjlock);
- simple_lock(&pmap2->pm_obj.vmobjlock);
-
-#ifdef USER_LDT
- /* Copy the LDT, if necessary. */
- if (pmap1->pm_flags & PMF_USER_LDT) {
- union descriptor *new_ldt;
- size_t len;
-
- len = pmap1->pm_ldt_len * sizeof(union descriptor);
- new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len);
- memcpy(new_ldt, pmap1->pm_ldt, len);
- pmap2->pm_ldt = new_ldt;
- pmap2->pm_ldt_len = pmap1->pm_ldt_len;
- pmap2->pm_flags |= PMF_USER_LDT;
- ldt_alloc(pmap2, new_ldt, len);
- }
-#endif /* USER_LDT */
-
- simple_unlock(&pmap2->pm_obj.vmobjlock);
- simple_unlock(&pmap1->pm_obj.vmobjlock);
-}
-#endif /* PMAP_FORK */
-
-#ifdef USER_LDT
-/*
- * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and
- * restore the default.
- */
-
-void
-pmap_ldt_cleanup(l)
- struct lwp *l;
-{
- struct pcb *pcb = &l->l_addr->u_pcb;
- pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
- union descriptor *old_ldt = NULL;
- size_t len = 0;
-
- simple_lock(&pmap->pm_obj.vmobjlock);
-
- if (pmap->pm_flags & PMF_USER_LDT) {
- ldt_free(pmap);
- pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
- pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
- if (pcb == curpcb)
- lldt(pcb->pcb_ldt_sel);
- old_ldt = pmap->pm_ldt;
- len = pmap->pm_ldt_len * sizeof(union descriptor);
- pmap->pm_ldt = NULL;
- pmap->pm_ldt_len = 0;
- pmap->pm_flags &= ~PMF_USER_LDT;
- }
-
- simple_unlock(&pmap->pm_obj.vmobjlock);
-
- if (old_ldt != NULL)
- uvm_km_free(kernel_map, (vaddr_t)old_ldt, len);
-}
-#endif /* USER_LDT */
-
-/*
- * pmap_activate: activate a process' pmap
- *
- * => called from cpu_switch()
- * => if lwp is the curlwp, then set ci_want_pmapload so that
- * actual MMU context switch will be done by pmap_load() later
- */
-
-void
-pmap_activate(l)
- struct lwp *l;
-{
- struct cpu_info *ci = curcpu();
- struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
-
- if (l == ci->ci_curlwp) {
- struct pcb *pcb;
-
- KASSERT(ci->ci_want_pmapload == 0);
- KASSERT(ci->ci_tlbstate != TLBSTATE_VALID);
-#ifdef KSTACK_CHECK_DR0
- /*
- * setup breakpoint on the top of stack
- */
- if (l == &lwp0)
- dr0(0, 0, 0, 0);
- else
- dr0(KSTACK_LOWEST_ADDR(l), 1, 3, 1);
-#endif
-
- /*
- * no need to switch to kernel vmspace because
- * it's a subset of any vmspace.
- */
-
- if (pmap == pmap_kernel()) {
- ci->ci_want_pmapload = 0;
- return;
- }
-
- pcb = &l->l_addr->u_pcb;
- pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
-
- ci->ci_want_pmapload = 1;
- }
-}
-
-/*
- * pmap_reactivate: try to regain reference to the pmap.
- */
-
-static boolean_t
-pmap_reactivate(struct pmap *pmap)
-{
- struct cpu_info *ci = curcpu();
- u_int32_t cpumask = 1U << ci->ci_cpuid;
- int s;
- boolean_t result;
- u_int32_t oldcpus;
-
- /*
- * if we still have a lazy reference to this pmap,
- * we can assume that there was no tlb shootdown
- * for this pmap in the meantime.
- */
-
- s = splipi(); /* protect from tlb shootdown ipis. */
- oldcpus = pmap->pm_cpus;
- x86_atomic_setbits_l(&pmap->pm_cpus, cpumask);
- if (oldcpus & cpumask) {
- KASSERT(ci->ci_tlbstate == TLBSTATE_LAZY);
- /* got it */
- result = TRUE;
- } else {
- KASSERT(ci->ci_tlbstate == TLBSTATE_STALE);
- result = FALSE;
- }
- ci->ci_tlbstate = TLBSTATE_VALID;
- splx(s);
-
- return result;
-}
-
-/*
- * pmap_load: actually switch pmap. (fill in %cr3 and LDT info)
- */
-
-void
-pmap_load()
-{
- struct cpu_info *ci = curcpu();
- u_int32_t cpumask = 1U << ci->ci_cpuid;
- struct pmap *pmap;
- struct pmap *oldpmap;
- struct lwp *l;
- struct pcb *pcb;
- pd_entry_t *mapdp;
- int s;
-
- KASSERT(ci->ci_want_pmapload);
-
- l = ci->ci_curlwp;
- KASSERT(l != NULL);
- pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
- KASSERT(pmap != pmap_kernel());
- oldpmap = ci->ci_pmap;
-
- pcb = ci->ci_curpcb;
- KASSERT(pcb == &l->l_addr->u_pcb);
- /* loaded by pmap_activate */
- KASSERT(pcb->pcb_ldt_sel == pmap->pm_ldt_sel);
-
- if (pmap == oldpmap) {
- if (!pmap_reactivate(pmap)) {
-
- /*
- * pmap has been changed during deactivated.
- * our tlb may be stale.
- */
-
- tlbflush();
- }
-
- ci->ci_want_pmapload = 0;
- return;
- }
-
- /*
- * actually switch pmap.
- */
-
- x86_atomic_clearbits_l(&oldpmap->pm_cpus, cpumask);
-
- KASSERT((pmap->pm_cpus & cpumask) == 0);
-
- KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE);
- pmap_reference(pmap);
- KERNEL_UNLOCK();
-
- /*
- * mark the pmap in use by this processor.
- */
-
- s = splipi();
- x86_atomic_setbits_l(&pmap->pm_cpus, cpumask);
- ci->ci_pmap = pmap;
- ci->ci_tlbstate = TLBSTATE_VALID;
- splx(s);
-
- /*
- * clear apdp slot before loading %cr3 since Xen only allows
- * linear pagetable mappings in the current pagetable.
- */
- KDASSERT(curapdp == 0);
- mapdp = (pt_entry_t *)vtomach((vaddr_t)APDP_PDE);
- PDE_CLEAR(APDP_PDE, mapdp);
-
- /*
- * update tss and load corresponding registers.
- */
-
- lldt(pcb->pcb_ldt_sel);
- pcb->pcb_cr3 = pmap->pm_pdirpa;
- lcr3(pcb->pcb_cr3);
-
- ci->ci_want_pmapload = 0;
-
- KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE);
- pmap_destroy(oldpmap);
- KERNEL_UNLOCK();
-}
-
-/*
- * pmap_deactivate: deactivate a process' pmap
- */
-
-void
-pmap_deactivate(l)
- struct lwp *l;
-{
-
- if (l == curlwp)
- pmap_deactivate2(l);
-}
-
-/*
- * pmap_deactivate2: context switch version of pmap_deactivate.
- * always treat l as curlwp.
- */
-
-void
-pmap_deactivate2(l)
- struct lwp *l;
-{
- struct pmap *pmap;
- struct cpu_info *ci = curcpu();
-
- if (ci->ci_want_pmapload) {
- KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)
- != pmap_kernel());
- KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)
- != ci->ci_pmap || ci->ci_tlbstate != TLBSTATE_VALID);
-
- /*
- * userspace has not been touched.
- * nothing to do here.
- */
-
- ci->ci_want_pmapload = 0;
- return;
- }
-
- pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
-
- if (pmap == pmap_kernel()) {
- return;
- }
-
- KASSERT(ci->ci_pmap == pmap);
-
- KASSERT(ci->ci_tlbstate == TLBSTATE_VALID);
- ci->ci_tlbstate = TLBSTATE_LAZY;
- XENPRINTF(("pmap_deactivate %p ebp %p esp %p\n",
- l, (void *)l->l_addr->u_pcb.pcb_ebp,
- (void *)l->l_addr->u_pcb.pcb_esp));
-}
-
-/*
- * end of lifecycle functions
- */
-
-/*
- * some misc. functions
- */
-
-/*
- * pmap_extract: extract a PA for the given VA
- */
-
-boolean_t
-pmap_extract(pmap, va, pap)
- struct pmap *pmap;
- vaddr_t va;
- paddr_t *pap;
-{
- pt_entry_t *ptes, pte;
- pd_entry_t pde;
-
- if (__predict_true((pde = PDE_GET(&pmap->pm_pdir[pdei(va)])) != 0)) {
-#ifdef LARGEPAGES
- if (pde & PG_PS) {
- if (pap != NULL)
- *pap = (pde & PG_LGFRAME) | (va & ~PG_LGFRAME);
- return (TRUE);
- }
-#endif
-
- ptes = pmap_map_ptes(pmap);
- pte = PTE_GET(&ptes[x86_btop(va)]);
- pmap_unmap_ptes(pmap);
-
- if (__predict_true((pte & PG_V) != 0)) {
- if (pap != NULL)
- *pap = (pte & PG_FRAME) | (va & ~PG_FRAME);
- return (TRUE);
- }
- }
- return (FALSE);
-}
-
-
-/*
- * vtophys: virtual address to physical address. For use by
- * machine-dependent code only.
- */
-
-paddr_t
-vtophys(va)
- vaddr_t va;
-{
- paddr_t pa;
-
- if (pmap_extract(pmap_kernel(), va, &pa) == TRUE)
- return (pa);
- return (0);
-}
-
-
-/*
- * pmap_virtual_space: used during bootup [pmap_steal_memory] to
- * determine the bounds of the kernel virtual addess space.
- */
-
-void
-pmap_virtual_space(startp, endp)
- vaddr_t *startp;
- vaddr_t *endp;
-{
- *startp = virtual_avail;
- *endp = virtual_end;
-}
-
-/*
- * pmap_map: map a range of PAs into kvm
- *
- * => used during crash dump
- * => XXX: pmap_map() should be phased out?
- */
-
-vaddr_t
-pmap_map(va, spa, epa, prot)
- vaddr_t va;
- paddr_t spa, epa;
- vm_prot_t prot;
-{
- while (spa < epa) {
- pmap_enter(pmap_kernel(), va, spa, prot, 0);
- va += PAGE_SIZE;
- spa += PAGE_SIZE;
- }
- pmap_update(pmap_kernel());
- return va;
-}
-
-/*
- * pmap_zero_page: zero a page
- */
-
-void
-pmap_zero_page(pa)
- paddr_t pa;
-{
-#ifdef MULTIPROCESSOR
- int id = cpu_number();
-#endif
- pt_entry_t *zpte = PTESLEW(zero_pte, id);
- pt_entry_t *maptp;
- caddr_t zerova = VASLEW(zerop, id);
-
-#ifdef DIAGNOSTIC
- if (PTE_GET(zpte))
- panic("pmap_zero_page: lock botch");
-#endif
-
- maptp = (pt_entry_t *)vtomach((vaddr_t)zpte);
- PTE_SET(zpte, maptp, (pa & PG_FRAME) | PG_V | PG_RW); /* map in */
- pmap_update_pg((vaddr_t)zerova); /* flush TLB */
-
- memset(zerova, 0, PAGE_SIZE); /* zero */
- PTE_CLEAR(zpte, maptp); /* zap! */
-}
-
-/*
- * pmap_pagezeroidle: the same, for the idle loop page zero'er.
- * Returns TRUE if the page was zero'd, FALSE if we aborted for
- * some reason.
- */
-
-boolean_t
-pmap_pageidlezero(pa)
- paddr_t pa;
-{
-#ifdef MULTIPROCESSOR
- int id = cpu_number();
-#endif
- pt_entry_t *zpte = PTESLEW(zero_pte, id);
- pt_entry_t *maptp;
- caddr_t zerova = VASLEW(zerop, id);
- boolean_t rv = TRUE;
- int i, *ptr;
-
-#ifdef DIAGNOSTIC
- if (PTE_GET(zpte))
- panic("pmap_zero_page_uncached: lock botch");
-#endif
- maptp = (pt_entry_t *)vtomach((vaddr_t)zpte);
- PTE_SET(zpte, maptp, (pa & PG_FRAME) | PG_V | PG_RW); /* map in */
- pmap_update_pg((vaddr_t)zerova); /* flush TLB */
- for (i = 0, ptr = (int *) zerova; i < PAGE_SIZE / sizeof(int); i++) {
- if (sched_whichqs != 0) {
-
- /*
- * A process has become ready. Abort now,
- * so we don't keep it waiting while we
- * do slow memory access to finish this
- * page.
- */
-
- rv = FALSE;
- break;
- }
- *ptr++ = 0;
- }
-
- PTE_CLEAR(zpte, maptp); /* zap! */
- return (rv);
-}
-
-/*
- * pmap_copy_page: copy a page
- */
-
-void
-pmap_copy_page(srcpa, dstpa)
- paddr_t srcpa, dstpa;
-{
-#ifdef MULTIPROCESSOR
- int id = cpu_number();
-#endif
- pt_entry_t *spte = PTESLEW(csrc_pte,id), *maspte;
- pt_entry_t *dpte = PTESLEW(cdst_pte,id), *madpte;
- caddr_t csrcva = VASLEW(csrcp, id);
- caddr_t cdstva = VASLEW(cdstp, id);
-
-#ifdef DIAGNOSTIC
- if (PTE_GET(spte) || PTE_GET(dpte))
- panic("pmap_copy_page: lock botch");
-#endif
-
- maspte = (pt_entry_t *)vtomach((vaddr_t)spte);
- madpte = (pt_entry_t *)vtomach((vaddr_t)dpte);
- PTE_SET(spte, maspte, (srcpa & PG_FRAME) | PG_V | PG_RW);
- PTE_SET(dpte, madpte, (dstpa & PG_FRAME) | PG_V | PG_RW);
- pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
- memcpy(cdstva, csrcva, PAGE_SIZE);
- PTE_CLEAR(spte, maspte); /* zap! */
- PTE_CLEAR(dpte, madpte); /* zap! */
-}
-
-/*
- * p m a p r e m o v e f u n c t i o n s
- *
- * functions that remove mappings
- */
-
-/*
- * pmap_remove_ptes: remove PTEs from a PTP
- *
- * => must have proper locking on pmap_master_lock
- * => caller must hold pmap's lock
- * => PTP must be mapped into KVA
- * => PTP should be null if pmap == pmap_kernel()
- */
-
-static void
-pmap_remove_ptes(pmap, ptp, ptpva, startva, endva, cpumaskp, flags)
- struct pmap *pmap;
- struct vm_page *ptp;
- vaddr_t ptpva;
- vaddr_t startva, endva;
- int32_t *cpumaskp;
- int flags;
-{
- struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */
- struct pv_entry *pve;
- pt_entry_t *pte = (pt_entry_t *) ptpva;
- pt_entry_t opte;
- pt_entry_t *maptp;
-
- /*
- * note that ptpva points to the PTE that maps startva. this may
- * or may not be the first PTE in the PTP.
- *
- * we loop through the PTP while there are still PTEs to look at
- * and the wire_count is greater than 1 (because we use the wire_count
- * to keep track of the number of real PTEs in the PTP).
- */
-
- for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1)
- ; pte++, startva += PAGE_SIZE) {
- struct vm_page *pg;
- struct vm_page_md *mdpg;
-
- if (!pmap_valid_entry(*pte))
- continue; /* VA not mapped */
- if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) {
- continue;
- }
-
- /* atomically save the old PTE and zap! it */
- maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
- opte = pte_atomic_update(pte, maptp, 0);
- pmap_exec_account(pmap, startva, opte, 0);
-
- if (opte & PG_W)
- pmap->pm_stats.wired_count--;
- pmap->pm_stats.resident_count--;
-
- if (opte & PG_U)
- pmap_tlb_shootdown(pmap, startva, opte, cpumaskp);
-
- if (ptp) {
- ptp->wire_count--; /* dropping a PTE */
- /* Make sure that the PDE is flushed */
- if ((ptp->wire_count <= 1) && !(opte & PG_U))
- pmap_tlb_shootdown(pmap, startva, opte,
- cpumaskp);
- }
-
- /*
- * if we are not on a pv_head list we are done.
- */
-
- if ((opte & PG_PVLIST) == 0) {
-#if defined(DIAGNOSTIC) && !defined(DOM0OPS)
- if (PHYS_TO_VM_PAGE(opte & PG_FRAME) != NULL)
- panic("pmap_remove_ptes: managed page without "
- "PG_PVLIST for 0x%lx", startva);
-#endif
- continue;
- }
-
- pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
-#ifdef DIAGNOSTIC
- if (pg == NULL)
- panic("pmap_remove_ptes: unmanaged page marked "
- "PG_PVLIST, va = 0x%lx, pa = 0x%lx",
- startva, (u_long)(opte & PG_FRAME));
-#endif
- mdpg = &pg->mdpage;
-
- /* sync R/M bits */
- simple_lock(&mdpg->mp_pvhead.pvh_lock);
- mdpg->mp_attrs |= (opte & (PG_U|PG_M));
- pve = pmap_remove_pv(&mdpg->mp_pvhead, pmap, startva);
- simple_unlock(&mdpg->mp_pvhead.pvh_lock);
-
- if (pve) {
- SPLAY_RIGHT(pve, pv_node) = pv_tofree;
- pv_tofree = pve;
- }
-
- /* end of "for" loop: time for next pte */
- }
- if (pv_tofree)
- pmap_free_pvs(pmap, pv_tofree);
-}
-
-
-/*
- * pmap_remove_pte: remove a single PTE from a PTP
- *
- * => must have proper locking on pmap_master_lock
- * => caller must hold pmap's lock
- * => PTP must be mapped into KVA
- * => PTP should be null if pmap == pmap_kernel()
- * => returns true if we removed a mapping
- */
-
-static boolean_t
-pmap_remove_pte(pmap, ptp, pte, va, cpumaskp, flags)
- struct pmap *pmap;
- struct vm_page *ptp;
- pt_entry_t *pte;
- vaddr_t va;
- int32_t *cpumaskp;
- int flags;
-{
- pt_entry_t opte;
- pt_entry_t *maptp;
- struct pv_entry *pve;
- struct vm_page *pg;
- struct vm_page_md *mdpg;
-
- if (!pmap_valid_entry(*pte))
- return(FALSE); /* VA not mapped */
- if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) {
- return(FALSE);
- }
-
- /* atomically save the old PTE and zap! it */
- maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
- opte = pte_atomic_update(pte, maptp, 0);
-
- XENPRINTK(("pmap_remove_pte %p, was %08x\n", pte, opte));
- pmap_exec_account(pmap, va, opte, 0);
-
- if (opte & PG_W)
- pmap->pm_stats.wired_count--;
- pmap->pm_stats.resident_count--;
-
- if (opte & PG_U)
- pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
-
- if (ptp) {
- ptp->wire_count--; /* dropping a PTE */
- /* Make sure that the PDE is flushed */
- if ((ptp->wire_count <= 1) && !(opte & PG_U))
- pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
-
- }
- /*
- * if we are not on a pv_head list we are done.
- */
-
- if ((opte & PG_PVLIST) == 0) {
-#if defined(DIAGNOSTIC) && !defined(DOM0OPS)
- if (PHYS_TO_VM_PAGE(opte & PG_FRAME) != NULL)
- panic("pmap_remove_pte: managed page without "
- "PG_PVLIST for 0x%lx", va);
-#endif
- return(TRUE);
- }
-
- pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
-#ifdef DIAGNOSTIC
- if (pg == NULL)
- panic("pmap_remove_pte: unmanaged page marked "
- "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va,
- (u_long)(opte & PG_FRAME));
-#endif
- mdpg = &pg->mdpage;
-
- /* sync R/M bits */
- simple_lock(&mdpg->mp_pvhead.pvh_lock);
- mdpg->mp_attrs |= (opte & (PG_U|PG_M));
- pve = pmap_remove_pv(&mdpg->mp_pvhead, pmap, va);
- simple_unlock(&mdpg->mp_pvhead.pvh_lock);
-
- if (pve)
- pmap_free_pv(pmap, pve);
- return(TRUE);
-}
-
-/*
- * pmap_remove: top level mapping removal function
- *
- * => caller should not be holding any pmap locks
- */
-
-void
-pmap_remove(pmap, sva, eva)
- struct pmap *pmap;
- vaddr_t sva, eva;
-{
- pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL);
-}
-
-/*
- * pmap_do_remove: mapping removal guts
- *
- * => caller should not be holding any pmap locks
- */
-
-static void
-pmap_do_remove(pmap, sva, eva, flags)
- struct pmap *pmap;
- vaddr_t sva, eva;
- int flags;
-{
- pt_entry_t *ptes, opte;
- pt_entry_t *maptp;
- boolean_t result;
- paddr_t ptppa;
- vaddr_t blkendva;
- struct vm_page *ptp;
- int32_t cpumask = 0;
- TAILQ_HEAD(, vm_page) empty_ptps;
- struct cpu_info *ci;
- struct pmap *curpmap;
-
- /*
- * we lock in the pmap => pv_head direction
- */
-
- TAILQ_INIT(&empty_ptps);
-
- PMAP_MAP_TO_HEAD_LOCK();
-
- ptes = pmap_map_ptes(pmap); /* locks pmap */
-
- ci = curcpu();
- curpmap = ci->ci_pmap;
-
- /*
- * removing one page? take shortcut function.
- */
-
- if (sva + PAGE_SIZE == eva) {
- if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) {
-
- /* PA of the PTP */
- ptppa = PDE_GET(&pmap->pm_pdir[pdei(sva)]) & PG_FRAME;
-
- /* get PTP if non-kernel mapping */
- if (pmap == pmap_kernel()) {
- /* we never free kernel PTPs */
- ptp = NULL;
- } else {
- if (pmap->pm_ptphint &&
- VM_PAGE_TO_PHYS(pmap->pm_ptphint) ==
- ptppa) {
- ptp = pmap->pm_ptphint;
- } else {
- ptp = PHYS_TO_VM_PAGE(ptppa);
-#ifdef DIAGNOSTIC
- if (ptp == NULL)
- panic("pmap_remove: unmanaged "
- "PTP detected");
-#endif
- }
- }
-
- /* do it! */
- result = pmap_remove_pte(pmap, ptp,
- &ptes[x86_btop(sva)], sva, &cpumask, flags);
-
- /*
- * if mapping removed and the PTP is no longer
- * being used, free it!
- */
-
- if (result && ptp && ptp->wire_count <= 1) {
- /* zap! */
- maptp = (pt_entry_t *)vtomach(
- (vaddr_t)&pmap->pm_pdir[pdei(sva)]);
- PTE_ATOMIC_CLEAR(&pmap->pm_pdir[pdei(sva)],
- maptp, opte);
-#if defined(MULTIPROCESSOR)
- /*
- * XXXthorpej Redundant shootdown can happen
- * here if we're using APTE space.
- */
-#endif
- pmap_tlb_shootdown(curpmap,
- ((vaddr_t)ptes) + ptp->offset, opte,
- &cpumask);
-#if defined(MULTIPROCESSOR)
- /*
- * Always shoot down the pmap's self-mapping
- * of the PTP.
- * XXXthorpej Redundant shootdown can happen
- * here if pmap == curpmap (not APTE space).
- */
- pmap_tlb_shootdown(pmap,
- ((vaddr_t)PTE_BASE) + ptp->offset, opte,
- &cpumask);
-#endif
- pmap->pm_stats.resident_count--;
- if (pmap->pm_ptphint == ptp)
- pmap->pm_ptphint =
- TAILQ_FIRST(&pmap->pm_obj.memq);
- ptp->wire_count = 0;
- ptp->flags |= PG_ZERO;
- uvm_pagerealloc(ptp, NULL, 0);
- TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
- }
- }
- pmap_tlb_shootnow(cpumask);
- pmap_unmap_ptes(pmap); /* unlock pmap */
- PMAP_MAP_TO_HEAD_UNLOCK();
- /* Now we can free unused ptps */
- TAILQ_FOREACH(ptp, &empty_ptps, listq)
- uvm_pagefree(ptp);
- return;
- }
-
- cpumask = 0;
-
- for (/* null */ ; sva < eva ; sva = blkendva) {
-
- /* determine range of block */
- blkendva = x86_round_pdr(sva+1);
- if (blkendva > eva)
- blkendva = eva;
-
- /*
- * XXXCDC: our PTE mappings should never be removed
- * with pmap_remove! if we allow this (and why would
- * we?) then we end up freeing the pmap's page
- * directory page (PDP) before we are finished using
- * it when we hit in in the recursive mapping. this
- * is BAD.
- *
- * long term solution is to move the PTEs out of user
- * address space. and into kernel address space (up
- * with APTE). then we can set VM_MAXUSER_ADDRESS to
- * be VM_MAX_ADDRESS.
- */
-
- if (pdei(sva) == PDSLOT_PTE)
- /* XXXCDC: ugly hack to avoid freeing PDP here */
- continue;
-
- if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
- /* valid block? */
- continue;
-
- /* PA of the PTP */
- ptppa = (PDE_GET(&pmap->pm_pdir[pdei(sva)]) & PG_FRAME);
-
- /* get PTP if non-kernel mapping */
- if (pmap == pmap_kernel()) {
- /* we never free kernel PTPs */
- ptp = NULL;
- } else {
- if (pmap->pm_ptphint &&
- VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) {
- ptp = pmap->pm_ptphint;
- } else {
- ptp = PHYS_TO_VM_PAGE(ptppa);
-#ifdef DIAGNOSTIC
- if (ptp == NULL)
- panic("pmap_remove: unmanaged PTP "
- "detected");
-#endif
- }
- }
- pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[x86_btop(sva)],
- sva, blkendva, &cpumask, flags);
-
- /* if PTP is no longer being used, free it! */
- if (ptp && ptp->wire_count <= 1) {
- /* zap! */
- maptp = (pt_entry_t *)vtomach(
- (vaddr_t)&pmap->pm_pdir[pdei(sva)]);
- PTE_ATOMIC_CLEAR(&pmap->pm_pdir[pdei(sva)],
- maptp, opte);
-#if defined(MULTIPROCESSOR)
- /*
- * XXXthorpej Redundant shootdown can happen here
- * if we're using APTE space.
- */
-#endif
- pmap_tlb_shootdown(curpmap,
- ((vaddr_t)ptes) + ptp->offset, opte, &cpumask);
-#if defined(MULTIPROCESSOR)
- /*
- * Always shoot down the pmap's self-mapping
- * of the PTP.
- * XXXthorpej Redundant shootdown can happen here
- * if pmap == curpmap (not APTE space).
- */
- pmap_tlb_shootdown(pmap,
- ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask);
-#endif
- pmap->pm_stats.resident_count--;
- if (pmap->pm_ptphint == ptp) /* update hint? */
- pmap->pm_ptphint = pmap->pm_obj.memq.tqh_first;
- ptp->wire_count = 0;
- ptp->flags |= PG_ZERO;
- /* Postpone free to shootdown */
- uvm_pagerealloc(ptp, NULL, 0);
- TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
- }
- }
-
- pmap_tlb_shootnow(cpumask);
- pmap_unmap_ptes(pmap);
- PMAP_MAP_TO_HEAD_UNLOCK();
- /* Now we can free unused ptps */
- TAILQ_FOREACH(ptp, &empty_ptps, listq)
- uvm_pagefree(ptp);
-}
-
-/*
- * pmap_page_remove: remove a managed vm_page from all pmaps that map it
- *
- * => we set pv_head => pmap locking
- * => R/M bits are sync'd back to attrs
- */
-
-void
-pmap_page_remove(pg)
- struct vm_page *pg;
-{
- struct pv_head *pvh;
- struct pv_entry *pve, *npve, *killlist = NULL;
- pt_entry_t *ptes, opte;
- pt_entry_t *maptp;
- int32_t cpumask = 0;
- TAILQ_HEAD(, vm_page) empty_ptps;
- struct vm_page *ptp;
- struct cpu_info *ci;
- struct pmap *curpmap;
-
-#ifdef DIAGNOSTIC
- int bank, off;
-
- bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
- if (bank == -1)
- panic("pmap_page_remove: unmanaged page?");
-#endif
-
- pvh = &pg->mdpage.mp_pvhead;
- if (SPLAY_ROOT(&pvh->pvh_root) == NULL) {
- return;
- }
-
- TAILQ_INIT(&empty_ptps);
-
- /* set pv_head => pmap locking */
- PMAP_HEAD_TO_MAP_LOCK();
-
- ci = curcpu();
- curpmap = ci->ci_pmap;
-
- /* XXX: needed if we hold head->map lock? */
- simple_lock(&pvh->pvh_lock);
-
- for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root); pve != NULL; pve = npve) {
- npve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve);
- ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */
-
-#ifdef DIAGNOSTIC
- if (pve->pv_ptp &&
- (PDE_GET(&pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]) &
- PG_FRAME) != VM_PAGE_TO_PHYS(pve->pv_ptp)) {
- printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n",
- pg, pve->pv_va, pve->pv_ptp);
- printf("pmap_page_remove: PTP's phys addr: "
- "actual=%lx, recorded=%lx\n",
- (PDE_GET(&pve->pv_pmap->pm_pdir[pdei(pve->pv_va)])
- & PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp));
- panic("pmap_page_remove: mapped managed page has "
- "invalid pv_ptp field");
- }
-#endif
-
- /* atomically save the old PTE and zap! it */
- maptp = (pt_entry_t *)vtomach(
- (vaddr_t)&ptes[x86_btop(pve->pv_va)]);
- opte = pte_atomic_update(&ptes[x86_btop(pve->pv_va)],
- maptp, 0);
-
- if (opte & PG_W)
- pve->pv_pmap->pm_stats.wired_count--;
- pve->pv_pmap->pm_stats.resident_count--;
-
- /* Shootdown only if referenced */
- if (opte & PG_U)
- pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte,
- &cpumask);
-
- /* sync R/M bits */
- pg->mdpage.mp_attrs |= (opte & (PG_U|PG_M));
-
- /* update the PTP reference count. free if last reference. */
- if (pve->pv_ptp) {
- pve->pv_ptp->wire_count--;
- if (pve->pv_ptp->wire_count <= 1) {
- /*
- * Do we have to shootdown the page just to
- * get the pte out of the TLB ?
- */
- if(!(opte & PG_U))
- pmap_tlb_shootdown(pve->pv_pmap,
- pve->pv_va, opte, &cpumask);
-
- /* zap! */
- maptp = (pt_entry_t *)vtomach((vaddr_t)
- &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]);
- PTE_ATOMIC_CLEAR(&pve->pv_pmap->pm_pdir
- [pdei(pve->pv_va)], maptp, opte);
- pmap_tlb_shootdown(curpmap,
- ((vaddr_t)ptes) + pve->pv_ptp->offset,
- opte, &cpumask);
-#if defined(MULTIPROCESSOR)
- /*
- * Always shoot down the other pmap's
- * self-mapping of the PTP.
- */
- pmap_tlb_shootdown(pve->pv_pmap,
- ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset,
- opte, &cpumask);
-#endif
- pve->pv_pmap->pm_stats.resident_count--;
- /* update hint? */
- if (pve->pv_pmap->pm_ptphint == pve->pv_ptp)
- pve->pv_pmap->pm_ptphint =
- pve->pv_pmap->pm_obj.memq.tqh_first;
- pve->pv_ptp->wire_count = 0;
- pve->pv_ptp->flags |= PG_ZERO;
- /* Free only after the shootdown */
- uvm_pagerealloc(pve->pv_ptp, NULL, 0);
- TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
- listq);
- }
- }
- pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
- SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve); /* remove it */
- SPLAY_RIGHT(pve, pv_node) = killlist; /* mark it for death */
- killlist = pve;
- }
- pmap_free_pvs(NULL, killlist);
- simple_unlock(&pvh->pvh_lock);
- PMAP_HEAD_TO_MAP_UNLOCK();
- pmap_tlb_shootnow(cpumask);
-
- /* Now we can free unused ptps */
- TAILQ_FOREACH(ptp, &empty_ptps, listq)
- uvm_pagefree(ptp);
-}
-
-/*
- * p m a p a t t r i b u t e f u n c t i o n s
- * functions that test/change managed page's attributes
- * since a page can be mapped multiple times we must check each PTE that
- * maps it by going down the pv lists.
- */
-
-/*
- * pmap_test_attrs: test a page's attributes
- *
- * => we set pv_head => pmap locking
- */
-
-boolean_t
-pmap_test_attrs(pg, testbits)
- struct vm_page *pg;
- int testbits;
-{
- struct vm_page_md *mdpg;
- int *myattrs;
- struct pv_head *pvh;
- struct pv_entry *pve;
- volatile pt_entry_t *ptes;
- pt_entry_t pte;
-
-#if DIAGNOSTIC
- int bank, off;
-
- bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
- if (bank == -1)
- panic("pmap_test_attrs: unmanaged page?");
-#endif
- mdpg = &pg->mdpage;
-
- /*
- * before locking: see if attributes are already set and if so,
- * return!
- */
-
- myattrs = &mdpg->mp_attrs;
- if (*myattrs & testbits)
- return(TRUE);
-
- /* test to see if there is a list before bothering to lock */
- pvh = &mdpg->mp_pvhead;
- if (SPLAY_ROOT(&pvh->pvh_root) == NULL) {
- return(FALSE);
- }
-
- /* nope, gonna have to do it the hard way */
- PMAP_HEAD_TO_MAP_LOCK();
- /* XXX: needed if we hold head->map lock? */
- simple_lock(&pvh->pvh_lock);
-
- for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root);
- pve != NULL && (*myattrs & testbits) == 0;
- pve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve)) {
- ptes = pmap_map_ptes(pve->pv_pmap);
- pte = PTE_GET(&ptes[x86_btop(pve->pv_va)]); /* XXX flags only?
*/
- pmap_unmap_ptes(pve->pv_pmap);
- *myattrs |= pte;
- }
-
- /*
- * note that we will exit the for loop with a non-null pve if
- * we have found the bits we are testing for.
- */
-
- simple_unlock(&pvh->pvh_lock);
- PMAP_HEAD_TO_MAP_UNLOCK();
- return((*myattrs & testbits) != 0);
-}
-
-/*
- * pmap_clear_attrs: clear the specified attribute for a page.
- *
- * => we set pv_head => pmap locking
- * => we return TRUE if we cleared one of the bits we were asked to
- */
-
-boolean_t
-pmap_clear_attrs(pg, clearbits)
- struct vm_page *pg;
- int clearbits;
-{
- struct vm_page_md *mdpg;
- u_int32_t result;
- struct pv_head *pvh;
- struct pv_entry *pve;
- pt_entry_t *ptes, opte;
- pt_entry_t *maptp;
- int *myattrs;
- int32_t cpumask = 0;
-
-#ifdef DIAGNOSTIC
- int bank, off;
-
- bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
- if (bank == -1)
- panic("pmap_change_attrs: unmanaged page?");
-#endif
- mdpg = &pg->mdpage;
-
- PMAP_HEAD_TO_MAP_LOCK();
- pvh = &mdpg->mp_pvhead;
- /* XXX: needed if we hold head->map lock? */
- simple_lock(&pvh->pvh_lock);
-
- myattrs = &mdpg->mp_attrs;
- result = *myattrs & clearbits;
- *myattrs &= ~clearbits;
-
- SPLAY_FOREACH(pve, pvtree, &pvh->pvh_root) {
-#ifdef DIAGNOSTIC
- if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]))
- panic("pmap_change_attrs: mapping without PTP "
- "detected");
-#endif
-
- ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */
- opte = PTE_GET(&ptes[x86_btop(pve->pv_va)]);
- if (opte & clearbits) {
- /* We need to do something */
- if (clearbits == PG_RW) {
- result |= PG_RW;
-
- /*
- * On write protect we might not need to flush
- * the TLB
- */
-
- /* First zap the RW bit! */
- maptp = (pt_entry_t *)vtomach(
- (vaddr_t)&ptes[x86_btop(pve->pv_va)]);
- PTE_ATOMIC_CLEARBITS(
- &ptes[x86_btop(pve->pv_va)],
- maptp, PG_RW);
- opte = PTE_GET(&ptes[x86_btop(pve->pv_va)]);
-
- /*
- * Then test if it is not cached as RW the TLB
- */
- if (!(opte & PG_M))
- goto no_tlb_shootdown;
- }
-
- /*
- * Since we need a shootdown me might as well
- * always clear PG_U AND PG_M.
- */
-
- /* zap! */
- maptp = (pt_entry_t *)vtomach(
- (vaddr_t)&ptes[x86_btop(pve->pv_va)]);
- PTE_ATOMIC_SET(&ptes[x86_btop(pve->pv_va)], maptp,
- (opte & ~(PG_U | PG_M)), opte);
-
- result |= (opte & clearbits);
- *myattrs |= (opte & ~(clearbits));
-
- pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte,
- &cpumask);
- }
-no_tlb_shootdown:
- pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
- }
-
- simple_unlock(&pvh->pvh_lock);
- PMAP_HEAD_TO_MAP_UNLOCK();
-
- pmap_tlb_shootnow(cpumask);
- return(result != 0);
-}
-
-
-/*
- * p m a p p r o t e c t i o n f u n c t i o n s
- */
-
-/*
- * pmap_page_protect: change the protection of all recorded mappings
- * of a managed page
- *
- * => NOTE: this is an inline function in pmap.h
- */
-
-/* see pmap.h */
-
-/*
- * pmap_protect: set the protection in of the pages in a pmap
- *
- * => NOTE: this is an inline function in pmap.h
- */
-
-/* see pmap.h */
-
-/*
- * pmap_write_protect: write-protect pages in a pmap
- */
-
-void
-pmap_write_protect(pmap, sva, eva, prot)
- struct pmap *pmap;
- vaddr_t sva, eva;
- vm_prot_t prot;
-{
- pt_entry_t *ptes, *epte;
- pt_entry_t *maptp;
-#ifndef XEN
- volatile
-#endif
- pt_entry_t *spte;
- vaddr_t blockend;
- int32_t cpumask = 0;
-
- ptes = pmap_map_ptes(pmap); /* locks pmap */
-
- /* should be ok, but just in case ... */
- sva &= PG_FRAME;
- eva &= PG_FRAME;
-
- for (/* null */ ; sva < eva ; sva = blockend) {
-
- blockend = (sva & PD_MASK) + NBPD;
- if (blockend > eva)
- blockend = eva;
-
- /*
- * XXXCDC: our PTE mappings should never be write-protected!
- *
- * long term solution is to move the PTEs out of user
- * address space. and into kernel address space (up
- * with APTE). then we can set VM_MAXUSER_ADDRESS to
- * be VM_MAX_ADDRESS.
- */
-
- /* XXXCDC: ugly hack to avoid freeing PDP here */
- if (pdei(sva) == PDSLOT_PTE)
- continue;
-
- /* empty block? */
- if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
- continue;
-
-#ifdef DIAGNOSTIC
- if (sva >= VM_MAXUSER_ADDRESS &&
- sva < VM_MAX_ADDRESS)
- panic("pmap_write_protect: PTE space");
-#endif
-
- spte = &ptes[x86_btop(sva)];
- epte = &ptes[x86_btop(blockend)];
-
- for (/*null */; spte < epte ; spte++) {
- if ((PTE_GET(spte) & (PG_RW|PG_V)) == (PG_RW|PG_V)) {
- maptp = (pt_entry_t *)vtomach((vaddr_t)spte);
- PTE_ATOMIC_CLEARBITS(spte, maptp, PG_RW);
- if (PTE_GET(spte) & PG_M)
- pmap_tlb_shootdown(pmap,
- x86_ptob(spte - ptes),
- PTE_GET(spte), &cpumask);
- }
- }
- }
-
- /*
- * if we kept a removal record and removed some pages update the TLB
- */
-
- pmap_tlb_shootnow(cpumask);
- pmap_unmap_ptes(pmap); /* unlocks pmap */
-}
-
-/*
- * end of protection functions
- */
-
-/*
- * pmap_unwire: clear the wired bit in the PTE
- *
- * => mapping should already be in map
- */
-
-void
-pmap_unwire(pmap, va)
- struct pmap *pmap;
- vaddr_t va;
-{
- pt_entry_t *ptes;
- pt_entry_t *maptp;
-
- if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) {
- ptes = pmap_map_ptes(pmap); /* locks pmap */
-
-#ifdef DIAGNOSTIC
- if (!pmap_valid_entry(ptes[x86_btop(va)]))
- panic("pmap_unwire: invalid (unmapped) va 0x%lx", va);
-#endif
- if ((ptes[x86_btop(va)] & PG_W) != 0) {
- maptp = (pt_entry_t *)vtomach(
- (vaddr_t)&ptes[x86_btop(va)]);
- PTE_ATOMIC_CLEARBITS(&ptes[x86_btop(va)], maptp, PG_W);
- pmap->pm_stats.wired_count--;
- }
-#ifdef DIAGNOSTIC
- else {
- printf("pmap_unwire: wiring for pmap %p va 0x%lx "
- "didn't change!\n", pmap, va);
- }
-#endif
- pmap_unmap_ptes(pmap); /* unlocks map */
- }
-#ifdef DIAGNOSTIC
- else {
- panic("pmap_unwire: invalid PDE");
- }
-#endif
-}
-
-/*
- * pmap_collect: free resources held by a pmap
- *
- * => optional function.
- * => called when a process is swapped out to free memory.
- */
-
-void
-pmap_collect(pmap)
- struct pmap *pmap;
-{
- /*
- * free all of the pt pages by removing the physical mappings
- * for its entire address space.
- */
-
- pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS,
- PMAP_REMOVE_SKIPWIRED);
-}
-
-/*
- * pmap_copy: copy mappings from one pmap to another
- *
- * => optional function
- * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
- */
-
-/*
- * defined as macro in pmap.h
- */
-
-/*
- * pmap_enter: enter a mapping into a pmap
- *
- * => must be done "now" ... no lazy-evaluation
- * => we set pmap => pv_head locking
- */
-
-int
-pmap_enter(pmap, va, pa, prot, flags)
- struct pmap *pmap;
- vaddr_t va;
- paddr_t pa;
- vm_prot_t prot;
- int flags;
-{
- pt_entry_t *ptes, opte, npte;
- struct vm_page *ptp, *pg;
- struct vm_page_md *mdpg;
- struct pv_head *old_pvh, *new_pvh;
- struct pv_entry *pve = NULL; /* XXX gcc */
- int error;
- boolean_t wired = (flags & PMAP_WIRED) != 0;
- pt_entry_t *maptp;
-
- XENPRINTK(("pmap_enter(%p, %p, %p, %08x, %08x)\n",
- pmap, (void *)va, (void *)pa, prot, flags));
-
-#ifdef DIAGNOSTIC
- /* sanity check: totally out of range? */
- if (va >= VM_MAX_KERNEL_ADDRESS)
- panic("pmap_enter: too big");
-
- if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE)
- panic("pmap_enter: trying to map over PDP/APDP!");
-
- /* sanity check: kernel PTPs should already have been pre-allocated */
- if (va >= VM_MIN_KERNEL_ADDRESS &&
- !pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
- panic("pmap_enter: missing kernel PTP!");
-#endif
-
- npte = protection_codes[prot] | PG_V;
-
- if (pa >= pmap_pa_start && pa < pmap_pa_end)
- npte |= xpmap_ptom(pa);
- else {
- XENPRINTF(("pmap_enter: va %08lx outside pa range %08lx\n",
- va, pa));
- npte |= pa;
- }
-
- /* XENPRINTK(("npte %p\n", npte)); */
-
- if (wired)
- npte |= PG_W;
-
- if (va < VM_MAXUSER_ADDRESS)
- npte |= PG_u;
- else if (va < VM_MAX_ADDRESS)
- npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */
- if (pmap == pmap_kernel())
- npte |= pmap_pg_g;
-
- /* get lock */
- PMAP_MAP_TO_HEAD_LOCK();
-
- ptes = pmap_map_ptes(pmap); /* locks pmap */
- if (pmap == pmap_kernel()) {
- ptp = NULL;
- } else {
- ptp = pmap_get_ptp(pmap, pdei(va));
- if (ptp == NULL) {
- if (flags & PMAP_CANFAIL) {
- error = ENOMEM;
- goto out;
- }
- panic("pmap_enter: get ptp failed");
- }
- }
-
- /*
- * Get first view on old PTE
- * on SMP the PTE might gain PG_U and PG_M flags
- * before we zap it later
- */
- opte = pte_get(&ptes[x86_btop(va)]); /* old PTE */
- XENPRINTK(("npte %p opte %p ptes %p idx %03x\n",
- (void *)npte, (void *)opte, ptes, x86_btop(va)));
-
- /*
- * is there currently a valid mapping at our VA and does it
- * map to the same PA as the one we want to map ?
- */
-
- if (pmap_valid_entry(opte) && ((opte & PG_FRAME) == pa)) {
-
- /*
- * first, calculate pm_stats updates. resident count will not
- * change since we are replacing/changing a valid mapping.
- * wired count might change...
- */
- pmap->pm_stats.wired_count +=
- ((npte & PG_W) ? 1 : 0 - (opte & PG_W) ? 1 : 0);
-
- npte |= (opte & PG_PVLIST);
-
- XENPRINTK(("pmap update opte == pa"));
- /* zap! */
- maptp = (pt_entry_t *)vtomach((vaddr_t)&ptes[x86_btop(va)]);
- opte = pte_atomic_update_ma(&ptes[x86_btop(va)], maptp, npte);
-
- /*
- * Any change in the protection level that the CPU
- * should know about ?
- */
- if ((npte & PG_RW)
- || ((opte & (PG_M | PG_RW)) != (PG_M | PG_RW))) {
- XENPRINTK(("pmap update opte == pa, prot change"));
- /*
- * No need to flush the TLB.
- * Just add old PG_M, ... flags in new entry.
- */
- PTE_ATOMIC_SETBITS(&ptes[x86_btop(va)], maptp,
- opte & (PG_M | PG_U));
- goto out_ok;
- }
-
- /*
- * Might be cached in the TLB as being writable
- * if this is on the PVLIST, sync R/M bit
- */
- if (opte & PG_PVLIST) {
- pg = PHYS_TO_VM_PAGE(pa);
-#ifdef DIAGNOSTIC
- if (pg == NULL)
- panic("pmap_enter: same pa PG_PVLIST "
- "mapping with unmanaged page "
- "pa = 0x%lx (0x%lx)", pa,
- atop(pa));
-#endif
- mdpg = &pg->mdpage;
- old_pvh = &mdpg->mp_pvhead;
- simple_lock(&old_pvh->pvh_lock);
- mdpg->mp_attrs |= opte;
- simple_unlock(&old_pvh->pvh_lock);
- }
- goto shootdown_now;
- }
-
- pg = PHYS_TO_VM_PAGE(pa);
- XENPRINTK(("pg %p from %p, init %d\n", pg, (void *)pa,
- pmap_initialized));
- if (pmap_initialized && pg != NULL) {
- /* This is a managed page */
- npte |= PG_PVLIST;
- mdpg = &pg->mdpage;
- new_pvh = &mdpg->mp_pvhead;
- if ((opte & (PG_PVLIST | PG_V)) != (PG_PVLIST | PG_V)) {
- /* We can not steal a pve - allocate one */
- pve = pmap_alloc_pv(pmap, ALLOCPV_NEED);
- if (pve == NULL) {
- if (!(flags & PMAP_CANFAIL))
- panic("pmap_enter: "
- "no pv entries available");
- error = ENOMEM;
- goto out;
- }
- }
- } else {
- new_pvh = NULL;
- }
-
- /*
- * is there currently a valid mapping at our VA?
- */
-
- if (pmap_valid_entry(opte)) {
-
- /*
- * changing PAs: we must remove the old one first
- */
-
- /*
- * first, calculate pm_stats updates. resident count will not
- * change since we are replacing/changing a valid mapping.
- * wired count might change...
- */
- pmap->pm_stats.wired_count +=
- ((npte & PG_W) ? 1 : 0 - (opte & PG_W) ? 1 : 0);
-
- if (opte & PG_PVLIST) {
- pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
-#ifdef DIAGNOSTIC
- if (pg == NULL)
- panic("pmap_enter: PG_PVLIST mapping with "
- "unmanaged page "
- "pa = 0x%lx (0x%lx)", pa, atop(pa));
-#endif
- mdpg = &pg->mdpage;
- old_pvh = &mdpg->mp_pvhead;
-
- /* new_pvh is NULL if page will not be managed */
- pmap_lock_pvhs(old_pvh, new_pvh);
-
- XENPRINTK(("pmap change pa"));
- /* zap! */
- maptp = (pt_entry_t *)vtomach(
- (vaddr_t)&ptes[x86_btop(va)]);
- opte = pte_atomic_update_ma(&ptes[x86_btop(va)], maptp,
- npte);
-
- pve = pmap_remove_pv(old_pvh, pmap, va);
- KASSERT(pve != 0);
- mdpg->mp_attrs |= opte;
-
- if (new_pvh) {
- pmap_enter_pv(new_pvh, pve, pmap, va, ptp);
- simple_unlock(&new_pvh->pvh_lock);
- } else
- pmap_free_pv(pmap, pve);
- simple_unlock(&old_pvh->pvh_lock);
-
- goto shootdown_test;
- }
- } else { /* opte not valid */
- pmap->pm_stats.resident_count++;
- if (wired)
- pmap->pm_stats.wired_count++;
- if (ptp)
- ptp->wire_count++;
- }
-
- if (new_pvh) {
- simple_lock(&new_pvh->pvh_lock);
- pmap_enter_pv(new_pvh, pve, pmap, va, ptp);
- simple_unlock(&new_pvh->pvh_lock);
- }
-
- XENPRINTK(("pmap initial setup\n"));
- maptp = (pt_entry_t *)vtomach((vaddr_t)&ptes[x86_btop(va)]);
- opte = pte_atomic_update_ma(&ptes[x86_btop(va)],
- maptp, npte); /* zap! */
-
-shootdown_test:
- /* Update page attributes if needed */
- if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
-#if defined(MULTIPROCESSOR)
- int32_t cpumask = 0;
-#endif
-shootdown_now:
-#if defined(MULTIPROCESSOR)
- pmap_tlb_shootdown(pmap, va, opte, &cpumask);
- pmap_tlb_shootnow(cpumask);
-#else
- /* Don't bother deferring in the single CPU case. */
- if (pmap_is_curpmap(pmap))
- pmap_update_pg(va);
-#endif
- }
-
-out_ok:
- error = 0;
-
-out:
- pmap_unmap_ptes(pmap);
- PMAP_MAP_TO_HEAD_UNLOCK();
-
- XENPRINTK(("pmap_enter: %d\n", error));
- return error;
-}
-
-/*
- * pmap_enter_ma: enter a mapping into a pmap
- *
- * => must be done "now" ... no lazy-evaluation
- * => we set pmap => pv_head locking
- */
-
-int
-pmap_enter_ma(pmap, va, pa, prot, flags)
- struct pmap *pmap;
- vaddr_t va;
- paddr_t pa;
- vm_prot_t prot;
- int flags;
-{
- pt_entry_t *ptes, opte, npte;
- pt_entry_t *maptp;
- struct vm_page *ptp, *pg;
- struct vm_page_md *mdpg;
- struct pv_head *old_pvh;
- struct pv_entry *pve = NULL; /* XXX gcc */
- int error;
- boolean_t wired = (flags & PMAP_WIRED) != 0;
-
- XENPRINTK(("pmap_enter_ma(%p, %p, %p, %08x, %08x)\n",
- pmap, (void *)va, (void *)pa, prot, flags));
-
-#ifdef DIAGNOSTIC
- /* sanity check: totally out of range? */
- if (va >= VM_MAX_KERNEL_ADDRESS)
- panic("pmap_enter: too big");
-
- if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE)
- panic("pmap_enter: trying to map over PDP/APDP!");
-
- /* sanity check: kernel PTPs should already have been pre-allocated */
- if (va >= VM_MIN_KERNEL_ADDRESS &&
- !pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
- panic("pmap_enter: missing kernel PTP!");
-#endif
-
- npte = pa | protection_codes[prot] | PG_V;
- /* XENPRINTK(("npte %p\n", npte)); */
-
- if (wired)
- npte |= PG_W;
-
- if (va < VM_MAXUSER_ADDRESS)
- npte |= PG_u;
- else if (va < VM_MAX_ADDRESS)
- npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */
- if (pmap == pmap_kernel())
- npte |= pmap_pg_g;
-
- /* get lock */
- PMAP_MAP_TO_HEAD_LOCK();
-
- ptes = pmap_map_ptes(pmap); /* locks pmap */
- if (pmap == pmap_kernel()) {
- ptp = NULL;
- } else {
- ptp = pmap_get_ptp(pmap, pdei(va));
- if (ptp == NULL) {
- if (flags & PMAP_CANFAIL) {
- error = ENOMEM;
- goto out;
- }
- panic("pmap_enter: get ptp failed");
- }
- }
-
- /*
- * Get first view on old PTE
- * on SMP the PTE might gain PG_U and PG_M flags
- * before we zap it later
- */
- opte = pte_get_ma(&ptes[x86_btop(va)]); /* old PTE */
- XENPRINTK(("npte %p opte %p ptes %p idx %03x\n",
- (void *)npte, (void *)opte, ptes, x86_btop(va)));
- XENPRINTF(("pmap_enter_ma pa %08lx va %08lx opte %08x npte %08x "
- "wired %d count %ld\n", pa, va, opte, npte, wired,
- pmap->pm_stats.wired_count));
-
- /*
- * is there currently a valid mapping at our VA and does it
- * map to the same MA as the one we want to map ?
- */
-
- if (pmap_valid_entry(opte) && ((opte & PG_FRAME) == pa)) {
-
- /*
- * first, calculate pm_stats updates. resident count will not
- * change since we are replacing/changing a valid mapping.
- * wired count might change...
- */
- pmap->pm_stats.wired_count +=
- ((npte & PG_W) ? 1 : 0 - (opte & PG_W) ? 1 : 0);
-
- XENPRINTK(("pmap update opte == pa"));
- /* zap! */
- maptp = (pt_entry_t *)vtomach((vaddr_t)&ptes[x86_btop(va)]);
- opte = pte_atomic_update_ma(&ptes[x86_btop(va)], maptp, npte);
-
- /*
- * Any change in the protection level that the CPU
- * should know about ?
- */
- if ((npte & PG_RW)
- || ((opte & (PG_M | PG_RW)) != (PG_M | PG_RW))) {
- XENPRINTK(("pmap update opte == pa, prot change"));
- /*
- * No need to flush the TLB.
- * Just add old PG_M, ... flags in new entry.
- */
- PTE_ATOMIC_SETBITS(&ptes[x86_btop(va)], maptp,
- opte & (PG_M | PG_U));
- goto out_ok;
- }
-
- /*
- * Might be cached in the TLB as being writable
- * if this is on the PVLIST, sync R/M bit
- */
- KDASSERT((opte & PG_PVLIST) == 0);
- goto shootdown_now;
- }
-
- /*
- * no managed mapping for pages mapped through pmap_enter_ma.
- */
-
- /*
- * is there currently a valid mapping at our VA?
- */
-
- if (pmap_valid_entry(opte)) {
-
- /*
- * changing PAs: we must remove the old one first
- */
-
- /*
- * first, calculate pm_stats updates. resident count will not
- * change since we are replacing/changing a valid mapping.
- * wired count might change...
- */
- pmap->pm_stats.wired_count +=
- ((npte & PG_W) ? 1 : 0 - (opte & PG_W) ? 1 : 0);
-
- if (opte & PG_PVLIST) {
- opte = xpmap_mtop(opte);
- KDASSERT((opte & PG_FRAME) !=
- (KERNTEXTOFF - KERNBASE_LOCORE));
-
- pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
-#ifdef DIAGNOSTIC
- if (pg == NULL)
- panic("pmap_enter: PG_PVLIST mapping with "
- "unmanaged page "
- "pa = 0x%lx (0x%lx)", pa, atop(pa));
-#endif
- mdpg = &pg->mdpage;
- old_pvh = &mdpg->mp_pvhead;
-
- /* NULL new_pvh since page will not be managed */
- pmap_lock_pvhs(old_pvh, NULL);
-
- XENPRINTK(("pmap change pa"));
- /* zap! */
- maptp = (pt_entry_t *)vtomach(
- (vaddr_t)&ptes[x86_btop(va)]);
- opte = pte_atomic_update_ma(&ptes[x86_btop(va)], maptp,
- npte);
-
- pve = pmap_remove_pv(old_pvh, pmap, va);
- KASSERT(pve != 0);
- mdpg->mp_attrs |= opte;
-
- pmap_free_pv(pmap, pve);
- simple_unlock(&old_pvh->pvh_lock);
-
- goto shootdown_test;
- }
- } else { /* opte not valid */
- pmap->pm_stats.resident_count++;
- if (wired)
- pmap->pm_stats.wired_count++;
- if (ptp)
- ptp->wire_count++;
- }
-
- XENPRINTK(("pmap initial setup"));
- maptp = (pt_entry_t *)vtomach((vaddr_t)&ptes[x86_btop(va)]);
- opte = pte_atomic_update_ma(&ptes[x86_btop(va)],
- maptp, npte); /* zap! */
-
-shootdown_test:
- /* Update page attributes if needed */
- if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
-#if defined(MULTIPROCESSOR)
- int32_t cpumask = 0;
-#endif
-shootdown_now:
-#if defined(MULTIPROCESSOR)
- pmap_tlb_shootdown(pmap, va, opte, &cpumask);
- pmap_tlb_shootnow(cpumask);
-#else
- /* Don't bother deferring in the single CPU case. */
- if (pmap_is_curpmap(pmap))
- pmap_update_pg(va);
-#endif
- }
-
-out_ok:
- error = 0;
-
-out:
- pmap_unmap_ptes(pmap);
- PMAP_MAP_TO_HEAD_UNLOCK();
-
- XENPRINTK(("pmap_enter: %d\n", error));
- return error;
-}
-
-/*
- * pmap_growkernel: increase usage of KVM space
- *
- * => we allocate new PTPs for the kernel and install them in all
- * the pmaps on the system.
- */
-
-vaddr_t
-pmap_growkernel(maxkvaddr)
- vaddr_t maxkvaddr;
-{
- struct pmap *kpm = pmap_kernel(), *pm;
- pd_entry_t *mapdp;
- pt_entry_t *maptp;
- int needed_kpde; /* needed number of kernel PTPs */
- int s;
- paddr_t ptaddr;
-
- needed_kpde = (u_int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1))
- / NBPD;
- XENPRINTF(("pmap_growkernel %p: %d -> %d\n", (void *)maxkvaddr,
- nkpde, needed_kpde));
- if (needed_kpde <= nkpde)
- goto out; /* we are OK */
-
- /*
- * whoops! we need to add kernel PTPs
- */
-
- s = splhigh(); /* to be safe */
- simple_lock(&kpm->pm_obj.vmobjlock);
-
- for (/*null*/ ; nkpde < needed_kpde ; nkpde++) {
-
- mapdp = (pt_entry_t
*)vtomach((vaddr_t)&kpm->pm_pdir[PDSLOT_KERN + nkpde]);
- if (uvm.page_init_done == FALSE) {
-
- /*
- * we're growing the kernel pmap early (from
- * uvm_pageboot_alloc()). this case must be
- * handled a little differently.
- */
-
- if (uvm_page_physget(&ptaddr) == FALSE)
- panic("pmap_growkernel: out of memory");
- pmap_zero_page(ptaddr);
-
- XENPRINTF(("xxxx maybe not PG_RW\n"));
- PDE_SET(&kpm->pm_pdir[PDSLOT_KERN + nkpde], mapdp,
ptaddr | PG_RW | PG_V);
-
- /* count PTP as resident */
- kpm->pm_stats.resident_count++;
- continue;
- }
-
- /*
- * THIS *MUST* BE CODED SO AS TO WORK IN THE
- * pmap_initialized == FALSE CASE! WE MAY BE
- * INVOKED WHILE pmap_init() IS RUNNING!
- */
-
- if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde) == NULL) {
- panic("pmap_growkernel: alloc ptp failed");
- }
-
- /* PG_u not for kernel */
- PDE_CLEARBITS(&kpm->pm_pdir[PDSLOT_KERN + nkpde], mapdp, PG_u);
-
- /* distribute new kernel PTP to all active pmaps */
- simple_lock(&pmaps_lock);
- for (pm = pmaps.lh_first; pm != NULL;
- pm = pm->pm_list.le_next) {
- XENPRINTF(("update\n"));
- maptp = (pt_entry_t *)vtomach(
- (vaddr_t)&pm->pm_pdir[PDSLOT_KERN + nkpde]);
- PDE_COPY(&pm->pm_pdir[PDSLOT_KERN + nkpde], maptp,
- &kpm->pm_pdir[PDSLOT_KERN + nkpde]);
- }
-
- /* Invalidate the PDP cache. */
- pool_cache_invalidate(&pmap_pdp_cache);
- pmap_pdp_cache_generation++;
-
- simple_unlock(&pmaps_lock);
- }
-
- simple_unlock(&kpm->pm_obj.vmobjlock);
- splx(s);
-
-out:
- XENPRINTF(("pmap_growkernel return %d %p\n", nkpde,
- (void *)(VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD))));
- return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD));
-}
-
-#ifdef DEBUG
-void pmap_dump(struct pmap *, vaddr_t, vaddr_t);
-
-/*
- * pmap_dump: dump all the mappings from a pmap
- *
- * => caller should not be holding any pmap locks
- */
-
-void
-pmap_dump(pmap, sva, eva)
- struct pmap *pmap;
- vaddr_t sva, eva;
-{
- pt_entry_t *ptes, *pte;
- vaddr_t blkendva;
-
- /*
- * if end is out of range truncate.
- * if (end == start) update to max.
- */
-
- if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
- eva = VM_MAXUSER_ADDRESS;
-
- /*
- * we lock in the pmap => pv_head direction
- */
-
- PMAP_MAP_TO_HEAD_LOCK();
- ptes = pmap_map_ptes(pmap); /* locks pmap */
-
- /*
- * dumping a range of pages: we dump in PTP sized blocks (4MB)
- */
-
- for (/* null */ ; sva < eva ; sva = blkendva) {
-
- /* determine range of block */
- blkendva = x86_round_pdr(sva+1);
- if (blkendva > eva)
- blkendva = eva;
-
- /* valid block? */
- if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
- continue;
-
- pte = &ptes[x86_btop(sva)];
- for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) {
- if (!pmap_valid_entry(*pte))
- continue;
- XENPRINTF(("va %#lx -> pa %#lx (pte=%#lx)\n",
- sva, PTE_GET(pte), PTE_GET(pte) & PG_FRAME));
- }
- }
- pmap_unmap_ptes(pmap);
- PMAP_MAP_TO_HEAD_UNLOCK();
-}
-#endif
-
-/******************** TLB shootdown code ********************/
-
-
-void
-pmap_tlb_shootnow(int32_t cpumask)
-{
- struct cpu_info *self;
-#ifdef MULTIPROCESSOR
- struct cpu_info *ci;
- CPU_INFO_ITERATOR cii;
- int s;
-#ifdef DIAGNOSTIC
- int count = 0;
-#endif
-#endif
-
- if (cpumask == 0)
- return;
-
- self = curcpu();
-#ifdef MULTIPROCESSOR
- s = splipi();
- self->ci_tlb_ipi_mask = cpumask;
-#endif
-
- pmap_do_tlb_shootdown(self); /* do *our* work. */
-
-#ifdef MULTIPROCESSOR
- splx(s);
-
- /*
- * Send the TLB IPI to other CPUs pending shootdowns.
- */
- for (CPU_INFO_FOREACH(cii, ci)) {
- if (ci == self)
- continue;
- if (cpumask & (1U << ci->ci_cpuid))
- if (x86_send_ipi(ci, X86_IPI_TLB) != 0)
- x86_atomic_clearbits_l(&self->ci_tlb_ipi_mask,
- (1U << ci->ci_cpuid));
- }
-
- while (self->ci_tlb_ipi_mask != 0) {
-#ifdef DIAGNOSTIC
- if (count++ > 10000000)
- panic("TLB IPI rendezvous failed (mask %x)",
- self->ci_tlb_ipi_mask);
-#endif
- x86_pause();
- }
-#endif
-}
-
-/*
- * pmap_tlb_shootdown:
- *
- * Cause the TLB entry for pmap/va to be shot down.
- */
-void
-pmap_tlb_shootdown(pmap, va, pte, cpumaskp)
- pmap_t pmap;
- vaddr_t va;
- pt_entry_t pte;
- int32_t *cpumaskp;
-{
- struct cpu_info *ci, *self;
- struct pmap_tlb_shootdown_q *pq;
- struct pmap_tlb_shootdown_job *pj;
- CPU_INFO_ITERATOR cii;
- int s;
-
-#ifdef LARGEPAGES
- if (pte & PG_PS)
- va &= PG_LGFRAME;
-#endif
-
- if (pmap_initialized == FALSE || cpus_attached == 0) {
- pmap_update_pg(va);
- return;
- }
-
- self = curcpu();
-
- s = splipi();
-#if 0
- printf("dshootdown %lx\n", va);
-#endif
-
- for (CPU_INFO_FOREACH(cii, ci)) {
- /* Note: we queue shootdown events for ourselves here! */
- if (pmap_is_active(pmap, ci->ci_cpuid) == 0)
- continue;
- if (ci != self && !(ci->ci_flags & CPUF_RUNNING))
- continue;
- pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
- __cpu_simple_lock(&pq->pq_slock);
-
- /*
- * If there's a global flush already queued, or a
- * non-global flush, and this pte doesn't have the G
- * bit set, don't bother.
- */
- if (pq->pq_flushg > 0 ||
- (pq->pq_flushu > 0 && (pte & pmap_pg_g) == 0)) {
- __cpu_simple_unlock(&pq->pq_slock);
- continue;
- }
-
-#ifdef I386_CPU
- /*
- * i386 CPUs can't invalidate a single VA, only
- * flush the entire TLB, so don't bother allocating
- * jobs for them -- just queue a `flushu'.
- *
- * XXX note that this can be executed for non-i386
- * when called * early (before identifycpu() has set
- * cpu_class)
- */
- if (cpu_class == CPUCLASS_386) {
- pq->pq_flushu++;
- *cpumaskp |= 1U << ci->ci_cpuid;
- __cpu_simple_unlock(&pq->pq_slock);
- continue;
- }
-#endif
-
- pj = pmap_tlb_shootdown_job_get(pq);
- pq->pq_pte |= pte;
- if (pj == NULL) {
- /*
- * Couldn't allocate a job entry.
- * Kill it now for this CPU, unless the failure
- * was due to too many pending flushes; otherwise,
- * tell other cpus to kill everything..
- */
- if (ci == self && pq->pq_count < PMAP_TLB_MAXJOBS) {
- pmap_update_pg(va);
- __cpu_simple_unlock(&pq->pq_slock);
- continue;
- } else {
- if (pq->pq_pte & pmap_pg_g)
- pq->pq_flushg++;
- else
- pq->pq_flushu++;
- /*
- * Since we've nailed the whole thing,
- * drain the job entries pending for that
- * processor.
- */
- pmap_tlb_shootdown_q_drain(pq);
- *cpumaskp |= 1U << ci->ci_cpuid;
- }
- } else {
- pj->pj_pmap = pmap;
- pj->pj_va = va;
- pj->pj_pte = pte;
- TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
- *cpumaskp |= 1U << ci->ci_cpuid;
- }
- __cpu_simple_unlock(&pq->pq_slock);
- }
- splx(s);
-}
-
-/*
- * pmap_do_tlb_shootdown_checktlbstate: check and update ci_tlbstate.
- *
- * => called at splipi.
- * => return TRUE if we need to maintain user tlbs.
- */
-static __inline boolean_t
-pmap_do_tlb_shootdown_checktlbstate(struct cpu_info *ci)
-{
-
- KASSERT(ci == curcpu());
-
- if (ci->ci_tlbstate == TLBSTATE_LAZY) {
- KASSERT(ci->ci_pmap != pmap_kernel());
- /*
- * mostly KASSERT(ci->ci_pmap->pm_cpus & (1U << ci->ci_cpuid));
- */
-
- /*
- * we no longer want tlb shootdown ipis for this pmap.
- * mark the pmap no longer in use by this processor.
- */
-
- x86_atomic_clearbits_l(&ci->ci_pmap->pm_cpus,
- 1U << ci->ci_cpuid);
- ci->ci_tlbstate = TLBSTATE_STALE;
- }
-
- if (ci->ci_tlbstate == TLBSTATE_STALE)
- return FALSE;
-
- return TRUE;
-}
-
-/*
- * pmap_do_tlb_shootdown:
- *
- * Process pending TLB shootdown operations for this processor.
- */
-void
-pmap_do_tlb_shootdown(struct cpu_info *self)
-{
- u_long cpu_id = self->ci_cpuid;
- struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
- struct pmap_tlb_shootdown_job *pj;
- int s;
-#ifdef MULTIPROCESSOR
- struct cpu_info *ci;
- CPU_INFO_ITERATOR cii;
-#endif
- KASSERT(self == curcpu());
-
- s = splipi();
-
- __cpu_simple_lock(&pq->pq_slock);
-
- if (pq->pq_flushg) {
- COUNT(flushg);
- pmap_do_tlb_shootdown_checktlbstate(self);
- tlbflushg();
- pq->pq_flushg = 0;
- pq->pq_flushu = 0;
- pmap_tlb_shootdown_q_drain(pq);
- } else {
- /*
- * TLB flushes for PTEs with PG_G set may be in the queue
- * after a flushu, they need to be dealt with.
- */
- if (pq->pq_flushu) {
- COUNT(flushu);
- pmap_do_tlb_shootdown_checktlbstate(self);
- tlbflush();
- }
- while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
- TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
-
- if ((pj->pj_pte & pmap_pg_g) ||
- pj->pj_pmap == pmap_kernel()) {
- pmap_update_pg(pj->pj_va);
- } else if (!pq->pq_flushu &&
- pj->pj_pmap == self->ci_pmap) {
- if (pmap_do_tlb_shootdown_checktlbstate(self))
- pmap_update_pg(pj->pj_va);
- }
-
- pmap_tlb_shootdown_job_put(pq, pj);
- }
-
- pq->pq_flushu = pq->pq_pte = 0;
- }
-
-#ifdef MULTIPROCESSOR
- for (CPU_INFO_FOREACH(cii, ci))
- x86_atomic_clearbits_l(&ci->ci_tlb_ipi_mask,
- (1U << cpu_id));
-#endif
- __cpu_simple_unlock(&pq->pq_slock);
-
- splx(s);
-}
-
-
-/*
- * pmap_tlb_shootdown_q_drain:
- *
- * Drain a processor's TLB shootdown queue. We do not perform
- * the shootdown operations. This is merely a convenience
- * function.
- *
- * Note: We expect the queue to be locked.
- */
-void
-pmap_tlb_shootdown_q_drain(pq)
- struct pmap_tlb_shootdown_q *pq;
-{
- struct pmap_tlb_shootdown_job *pj;
-
- while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
- TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
- pmap_tlb_shootdown_job_put(pq, pj);
- }
- pq->pq_pte = 0;
-}
-
-/*
- * pmap_tlb_shootdown_job_get:
- *
- * Get a TLB shootdown job queue entry. This places a limit on
- * the number of outstanding jobs a processor may have.
- *
- * Note: We expect the queue to be locked.
- */
-struct pmap_tlb_shootdown_job *
-pmap_tlb_shootdown_job_get(pq)
- struct pmap_tlb_shootdown_q *pq;
-{
- struct pmap_tlb_shootdown_job *pj;
-
- if (pq->pq_count >= PMAP_TLB_MAXJOBS)
- return (NULL);
-
- __cpu_simple_lock(&pmap_tlb_shootdown_job_lock);
- if (pj_free == NULL) {
- __cpu_simple_unlock(&pmap_tlb_shootdown_job_lock);
- return NULL;
- }
- pj = &pj_free->pja_job;
- pj_free =
- (union pmap_tlb_shootdown_job_al *)pj_free->pja_job.pj_nextfree;
- __cpu_simple_unlock(&pmap_tlb_shootdown_job_lock);
-
- pq->pq_count++;
- return (pj);
-}
-
-/*
- * pmap_tlb_shootdown_job_put:
- *
- * Put a TLB shootdown job queue entry onto the free list.
- *
- * Note: We expect the queue to be locked.
- */
-void
-pmap_tlb_shootdown_job_put(pq, pj)
- struct pmap_tlb_shootdown_q *pq;
- struct pmap_tlb_shootdown_job *pj;
-{
-
-#ifdef DIAGNOSTIC
- if (pq->pq_count == 0)
- panic("pmap_tlb_shootdown_job_put: queue length inconsistency");
-#endif
- __cpu_simple_lock(&pmap_tlb_shootdown_job_lock);
- pj->pj_nextfree = &pj_free->pja_job;
- pj_free = (union pmap_tlb_shootdown_job_al *)pj;
- __cpu_simple_unlock(&pmap_tlb_shootdown_job_lock);
-
- pq->pq_count--;
-}
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/i386/sys_machdep.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/sys_machdep.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,550 +0,0 @@
-/* $NetBSD: sys_machdep.c,v 1.1.2.1 2004/05/22 15:59:21 he Exp $ */
-/* NetBSD: sys_machdep.c,v 1.70 2003/10/27 14:11:47 junyoung Exp */
-
-/*-
- * Copyright (c) 1998 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Charles M. Hannum.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_machdep.c,v 1.1.2.1 2004/05/22 15:59:21 he Exp
$");
-
-#include "opt_compat_netbsd.h"
-#include "opt_mtrr.h"
-#include "opt_perfctrs.h"
-#include "opt_user_ldt.h"
-#include "opt_vm86.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/ioctl.h>
-#include <sys/file.h>
-#include <sys/time.h>
-#include <sys/proc.h>
-#include <sys/user.h>
-#include <sys/uio.h>
-#include <sys/kernel.h>
-#include <sys/buf.h>
-#include <sys/signal.h>
-#include <sys/malloc.h>
-
-#include <sys/mount.h>
-#include <sys/sa.h>
-#include <sys/syscallargs.h>
-
-#include <uvm/uvm_extern.h>
-
-#include <machine/cpu.h>
-#include <machine/cpufunc.h>
-#include <machine/gdt.h>
-#include <machine/psl.h>
-#include <machine/reg.h>
-#include <machine/sysarch.h>
-#include <machine/mtrr.h>
-
-#ifdef VM86
-#include <machine/vm86.h>
-#endif
-
-#ifdef PERFCTRS
-#include <machine/pmc.h>
-#endif
-
-extern struct vm_map *kernel_map;
-
-int i386_iopl(struct lwp *, void *, register_t *);
-int i386_get_ioperm(struct lwp *, void *, register_t *);
-int i386_set_ioperm(struct lwp *, void *, register_t *);
-int i386_get_mtrr(struct lwp *, void *, register_t *);
-int i386_set_mtrr(struct lwp *, void *, register_t *);
-
-#ifdef USER_LDT
-
-#ifdef LDT_DEBUG
-static void i386_print_ldt(int, const struct segment_descriptor *);
-
-static void
-i386_print_ldt(i, d)
- int i;
- const struct segment_descriptor *d;
-{
- printf("[%d] lolimit=0x%x, lobase=0x%x, type=%u, dpl=%u, p=%u, "
- "hilimit=0x%x, xx=%x, def32=%u, gran=%u, hibase=0x%x\n",
- i, d->sd_lolimit, d->sd_lobase, d->sd_type, d->sd_dpl, d->sd_p,
- d->sd_hilimit, d->sd_xx, d->sd_def32, d->sd_gran, d->sd_hibase);
-}
-#endif
-
-int
-i386_get_ldt(l, args, retval)
- struct lwp *l;
- void *args;
- register_t *retval;
-{
- int error;
- struct proc *p = l->l_proc;
- pmap_t pmap = p->p_vmspace->vm_map.pmap;
- int nldt, num;
- union descriptor *lp, *cp;
- struct i386_get_ldt_args ua;
-
- if ((error = copyin(args, &ua, sizeof(ua))) != 0)
- return (error);
-
-#ifdef LDT_DEBUG
- printf("i386_get_ldt: start=%d num=%d descs=%p\n", ua.start,
- ua.num, ua.desc);
-#endif
-
- if (ua.start < 0 || ua.num < 0 || ua.start > 8192 || ua.num > 8192 ||
- ua.start + ua.num > 8192)
- return (EINVAL);
-
- cp = malloc(ua.num * sizeof(union descriptor), M_TEMP, M_WAITOK);
- if (cp == NULL)
- return ENOMEM;
-
- simple_lock(&pmap->pm_lock);
-
- if (pmap->pm_flags & PMF_USER_LDT) {
- nldt = pmap->pm_ldt_len;
- lp = pmap->pm_ldt;
- } else {
- nldt = NLDT;
- lp = ldt;
- }
-
- if (ua.start > nldt) {
- simple_unlock(&pmap->pm_lock);
- free(cp, M_TEMP);
- return (EINVAL);
- }
-
- lp += ua.start;
- num = min(ua.num, nldt - ua.start);
-#ifdef LDT_DEBUG
- {
- int i;
- for (i = 0; i < num; i++)
- i386_print_ldt(i, &lp[i].sd);
- }
-#endif
-
- memcpy(cp, lp, num * sizeof(union descriptor));
- simple_unlock(&pmap->pm_lock);
-
- error = copyout(cp, ua.desc, num * sizeof(union descriptor));
- if (error == 0)
- *retval = num;
-
- free(cp, M_TEMP);
- return (error);
-}
-
-int
-i386_set_ldt(l, args, retval)
- struct lwp *l;
- void *args;
- register_t *retval;
-{
- int error, i, n;
- struct proc *p = l->l_proc;
- struct pcb *pcb = &l->l_addr->u_pcb;
- pmap_t pmap = p->p_vmspace->vm_map.pmap;
- struct i386_set_ldt_args ua;
- union descriptor *descv;
- size_t old_len, new_len, ldt_len;
- union descriptor *old_ldt, *new_ldt;
-
- if ((error = copyin(args, &ua, sizeof(ua))) != 0)
- return (error);
-
- if (ua.start < 0 || ua.num < 0 || ua.start > 8192 || ua.num > 8192 ||
- ua.start + ua.num > 8192)
- return (EINVAL);
-
- descv = malloc(sizeof (*descv) * ua.num, M_TEMP, M_NOWAIT);
- if (descv == NULL)
- return (ENOMEM);
-
- if ((error = copyin(ua.desc, descv, sizeof (*descv) * ua.num)) != 0)
- goto out;
-
- /* Check descriptors for access violations. */
- for (i = 0; i < ua.num; i++) {
- union descriptor *desc = &descv[i];
-
- switch (desc->sd.sd_type) {
- case SDT_SYSNULL:
- desc->sd.sd_p = 0;
- break;
- case SDT_SYS286CGT:
- case SDT_SYS386CGT:
- /*
- * Only allow call gates targeting a segment
- * in the LDT or a user segment in the fixed
- * part of the gdt. Segments in the LDT are
- * constrained (below) to be user segments.
- */
- if (desc->gd.gd_p != 0 &&
- !ISLDT(desc->gd.gd_selector) &&
- ((IDXSEL(desc->gd.gd_selector) >= NGDT) ||
- (gdt[IDXSEL(desc->gd.gd_selector)].sd.sd_dpl !=
- SEL_UPL))) {
- error = EACCES;
- goto out;
- }
- break;
- case SDT_MEMEC:
- case SDT_MEMEAC:
- case SDT_MEMERC:
- case SDT_MEMERAC:
- /* Must be "present" if executable and conforming. */
- if (desc->sd.sd_p == 0) {
- error = EACCES;
- goto out;
- }
- break;
- case SDT_MEMRO:
- case SDT_MEMROA:
- case SDT_MEMRW:
- case SDT_MEMRWA:
- case SDT_MEMROD:
- case SDT_MEMRODA:
- case SDT_MEMRWD:
- case SDT_MEMRWDA:
- case SDT_MEME:
- case SDT_MEMEA:
- case SDT_MEMER:
- case SDT_MEMERA:
- break;
- default:
- /*
- * Make sure that unknown descriptor types are
- * not marked present.
- */
- if (desc->sd.sd_p != 0) {
- error = EACCES;
- goto out;
- }
- break;
- }
-
- if (desc->sd.sd_p != 0) {
- /* Only user (ring-3) descriptors may be present. */
- if (desc->sd.sd_dpl != SEL_UPL) {
- error = EACCES;
- goto out;
- }
- }
- }
-
- /* allocate user ldt */
- simple_lock(&pmap->pm_lock);
- if (pmap->pm_ldt == 0 || (ua.start + ua.num) > pmap->pm_ldt_len) {
- if (pmap->pm_flags & PMF_USER_LDT)
- ldt_len = pmap->pm_ldt_len;
- else
- ldt_len = 512;
- while ((ua.start + ua.num) > ldt_len)
- ldt_len *= 2;
- new_len = ldt_len * sizeof(union descriptor);
-
- simple_unlock(&pmap->pm_lock);
- new_ldt = (union descriptor *)uvm_km_alloc(kernel_map,
- new_len);
- simple_lock(&pmap->pm_lock);
-
- if (pmap->pm_ldt != NULL && ldt_len <= pmap->pm_ldt_len) {
- /*
- * Another thread (re)allocated the LDT to
- * sufficient size while we were blocked in
- * uvm_km_alloc. Oh well. The new entries
- * will quite probably not be right, but
- * hey.. not our problem if user applications
- * have race conditions like that.
- */
- uvm_km_free(kernel_map, (vaddr_t)new_ldt, new_len);
- goto copy;
- }
-
- old_ldt = pmap->pm_ldt;
-
- if (old_ldt != NULL) {
- old_len = pmap->pm_ldt_len * sizeof(union descriptor);
- } else {
- old_len = NLDT * sizeof(union descriptor);
- old_ldt = ldt;
- }
-
- memcpy(new_ldt, old_ldt, old_len);
- memset((caddr_t)new_ldt + old_len, 0, new_len - old_len);
-
- if (old_ldt != ldt)
- uvm_km_free(kernel_map, (vaddr_t)old_ldt, old_len);
-
- pmap->pm_ldt = new_ldt;
- pmap->pm_ldt_len = ldt_len;
-
- if (pmap->pm_flags & PMF_USER_LDT)
- ldt_free(pmap);
- else
- pmap->pm_flags |= PMF_USER_LDT;
- ldt_alloc(pmap, new_ldt, new_len);
- pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
- if (pcb == curpcb)
- lldt(pcb->pcb_ldt_sel);
-
- }
-copy:
- /* Now actually replace the descriptors. */
- for (i = 0, n = ua.start; i < ua.num; i++, n++)
- pmap->pm_ldt[n] = descv[i];
-
- simple_unlock(&pmap->pm_lock);
-
- *retval = ua.start;
-
-out:
- free(descv, M_TEMP);
- return (error);
-}
-#endif /* USER_LDT */
-
-int
-i386_iopl(l, args, retval)
- struct lwp *l;
- void *args;
- register_t *retval;
-{
- int error;
- struct proc *p = l->l_proc;
- struct pcb *pcb = &l->l_addr->u_pcb;
- struct i386_iopl_args ua;
- dom0_op_t op;
-
- if ((xen_start_info.flags & SIF_PRIVILEGED) == 0)
- return EPERM;
-
- if (securelevel > 1)
- return EPERM;
-
- if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
- return error;
-
- if ((error = copyin(args, &ua, sizeof(ua))) != 0)
- return error;
-
- pcb->pcb_tss.tss_ioopt &= ~SEL_RPL;
- if (ua.iopl)
- pcb->pcb_tss.tss_ioopt |= SEL_UPL; /* i/o pl */
- else
- pcb->pcb_tss.tss_ioopt |= SEL_KPL; /* i/o pl */
-
- /* Force the change at ring 0. */
- op.cmd = DOM0_IOPL;
- op.u.iopl.domain = DOMID_SELF;
- op.u.iopl.iopl = pcb->pcb_tss.tss_ioopt & SEL_RPL; /* i/o pl */
- HYPERVISOR_dom0_op(&op);
-
- return 0;
-}
-
-int
-i386_get_ioperm(l, args, retval)
- struct lwp *l;
- void *args;
- register_t *retval;
-{
- int error;
- struct pcb *pcb = &l->l_addr->u_pcb;
- struct i386_get_ioperm_args ua;
-
- if ((error = copyin(args, &ua, sizeof(ua))) != 0)
- return (error);
-
- return copyout(pcb->pcb_iomap, ua.iomap, sizeof(pcb->pcb_iomap));
-}
-
-int
-i386_set_ioperm(l, args, retval)
- struct lwp *l;
- void *args;
- register_t *retval;
-{
- int error;
- struct proc *p = l->l_proc;
- struct pcb *pcb = &l->l_addr->u_pcb;
- struct i386_set_ioperm_args ua;
-
- if (securelevel > 1)
- return EPERM;
-
- if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
- return error;
-
- if ((error = copyin(args, &ua, sizeof(ua))) != 0)
- return (error);
-
- return copyin(ua.iomap, pcb->pcb_iomap, sizeof(pcb->pcb_iomap));
-}
-
-#ifdef MTRR
-int
-i386_get_mtrr(struct lwp *l, void *args, register_t *retval)
-{
- struct i386_get_mtrr_args ua;
- int error, n;
- struct proc *p = l->l_proc;
-
- if (mtrr_funcs == NULL)
- return ENOSYS;
-
- error = copyin(args, &ua, sizeof ua);
- if (error != 0)
- return error;
-
- error = copyin(ua.n, &n, sizeof n);
- if (error != 0)
- return error;
-
- error = mtrr_get(ua.mtrrp, &n, p, MTRR_GETSET_USER);
-
- copyout(&n, ua.n, sizeof (int));
-
- return error;
-}
-
-int
-i386_set_mtrr(struct lwp *l, void *args, register_t *retval)
-{
- int error, n;
- struct i386_set_mtrr_args ua;
- struct proc *p = l->l_proc;
-
- if (mtrr_funcs == NULL)
- return ENOSYS;
-
- error = suser(p->p_ucred, &p->p_acflag);
- if (error != 0)
- return error;
-
- error = copyin(args, &ua, sizeof ua);
- if (error != 0)
- return error;
-
- error = copyin(ua.n, &n, sizeof n);
- if (error != 0)
- return error;
-
- error = mtrr_set(ua.mtrrp, &n, p, MTRR_GETSET_USER);
- if (n != 0)
- mtrr_commit();
-
- copyout(&n, ua.n, sizeof n);
-
- return error;
-}
-#endif
-
-int
-sys_sysarch(struct lwp *l, void *v, register_t *retval)
-{
- struct sys_sysarch_args /* {
- syscallarg(int) op;
- syscallarg(void *) parms;
- } */ *uap = v;
- int error = 0;
-
- switch(SCARG(uap, op)) {
-#ifdef USER_LDT
- case I386_GET_LDT:
- error = i386_get_ldt(l, SCARG(uap, parms), retval);
- break;
-
- case I386_SET_LDT:
- error = i386_set_ldt(l, SCARG(uap, parms), retval);
- break;
-#endif
-
- case I386_IOPL:
- error = i386_iopl(l, SCARG(uap, parms), retval);
- break;
-
- case I386_GET_IOPERM:
- error = i386_get_ioperm(l, SCARG(uap, parms), retval);
- break;
-
- case I386_SET_IOPERM:
- error = i386_set_ioperm(l, SCARG(uap, parms), retval);
- break;
-
-#ifdef VM86
- case I386_VM86:
- error = i386_vm86(l, SCARG(uap, parms), retval);
- break;
-#ifdef COMPAT_16
- case I386_OLD_VM86:
- error = compat_16_i386_vm86(l, SCARG(uap, parms), retval);
- break;
-#endif
-#endif
-#ifdef MTRR
- case I386_GET_MTRR:
- error = i386_get_mtrr(l, SCARG(uap, parms), retval);
- break;
- case I386_SET_MTRR:
- error = i386_set_mtrr(l, SCARG(uap, parms), retval);
- break;
-#endif
-#ifdef PERFCTRS
- case I386_PMC_INFO:
- error = pmc_info(l, SCARG(uap, parms), retval);
- break;
-
- case I386_PMC_STARTSTOP:
- error = pmc_startstop(l, SCARG(uap, parms), retval);
- break;
-
- case I386_PMC_READ:
- error = pmc_read(l, SCARG(uap, parms), retval);
- break;
-#endif
-
- default:
- error = EINVAL;
- break;
- }
- return (error);
-}
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/i386/vector.S
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/vector.S Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,1588 +0,0 @@
-/* $NetBSD: vector.S,v 1.1.2.1 2004/05/22 15:57:16 he Exp $ */
-/* NetBSD: 1.13 2004/03/11 11:39:26 yamt Exp */
-
-/*
- * Copyright 2002 (c) Wasabi Systems, Inc.
- * All rights reserved.
- *
- * Written by Frank van der Linden for Wasabi Systems, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed for the NetBSD Project by
- * Wasabi Systems, Inc.
- * 4. The name of Wasabi Systems, Inc. may not be used to endorse
- * or promote products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*-
- * Copyright (c) 1998 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Charles M. Hannum.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "opt_ddb.h"
-#include "opt_multiprocessor.h"
-#include "opt_ipkdb.h"
-#include "opt_vm86.h"
-#include "opt_xen.h"
-
-#ifndef XEN
-#include <machine/i8259.h>
-#endif
-#include <machine/i82093reg.h>
-#include <machine/i82489reg.h>
-#include <machine/asm.h>
-#include <machine/frameasm.h>
-#include <machine/segments.h>
-#include <machine/trap.h>
-#include <machine/intr.h>
-#include <machine/psl.h>
-#ifdef XEN
-#include <machine/xen.h>
-#endif
-
-#include <net/netisr.h>
-
-#include "ioapic.h"
-#include "lapic.h"
-
-#include "npx.h"
-#include "assym.h"
-
-#define __HAVE_GENERIC_SOFT_INTERRUPTS /* XXX */
-
-
-/*
- * Macros for interrupt entry, call to handler, and exit.
- *
- * XXX
- * The interrupt frame is set up to look like a trap frame. This may be a
- * waste. The only handler which needs a frame is the clock handler, and it
- * only needs a few bits. Xdoreti() needs a trap frame for handling ASTs, but
- * it could easily convert the frame on demand.
- *
- * The direct costs of setting up a trap frame are two pushl's (error code and
- * trap number), an addl to get rid of these, and pushing and popping the
- * callee-saved registers %esi, %edi, %ebx, and %ebp twice.
- *
- * If the interrupt frame is made more flexible, INTR can push %eax first and
- * decide the ipending case with less overhead, e.g., by avoiding loading the
- * segment registers.
- *
- */
-
-#define MY_COUNT _C_LABEL(uvmexp)
-
-/* XXX See comment in locore.s */
-#ifdef __ELF__
-#define XINTR(name,num) Xintr_/**/name/**/num
-#define XSTRAY(name,num) Xstray_/**/name/**/num
-#define XINTR_TSS(irq_num) Xintr_tss_ ## irq_num
-#else
-#define XINTR(name,num) _Xintr_/**/name/**/num
-#define XSTRAY(name,num) _Xstray_/**/name/**/num
-#define XINTR_TSS(irq_num) Xintr_tss_/**/irq_num
-#endif
-
-/*
- * Store address of TSS in %eax, given a selector in %eax.
- * Clobbers %eax, %ecx, %edx, but that's ok for its usage.
- * This is a bit complicated, but it's done to make as few
- * assumptions as possible about the validity of the environment.
- * The GDT and the current and previous TSS are known to be OK,
- * otherwise we would not be here. The only other thing that needs
- * to be OK is the cpu_info structure for the current CPU.
- */
-#define GET_TSS \
- andl $0xfff8,%eax ;\
- addl CPUVAR(GDT),%eax ;\
- movl 2(%eax),%edx ;\
- andl $0xffffff,%edx ;\
- movzbl 7(%eax),%eax ;\
- shl $24,%eax ;\
- orl %edx,%eax
-
-#if NLAPIC > 0
-#ifdef MULTIPROCESSOR
-IDTVEC(recurse_lapic_ipi)
- pushfl
- pushl %cs
- pushl %esi
- pushl $0
- pushl $T_ASTFLT
- INTRENTRY
-IDTVEC(resume_lapic_ipi)
- cli
- jmp 1f
-IDTVEC(intr_lapic_ipi)
- pushl $0
- pushl $T_ASTFLT
- INTRENTRY
- movl $0,_C_LABEL(local_apic)+LAPIC_EOI
- movl CPUVAR(ILEVEL),%ebx
- cmpl $IPL_IPI,%ebx
- jae 2f
-1:
- incl CPUVAR(IDEPTH)
- movl $IPL_IPI,CPUVAR(ILEVEL)
- sti
- pushl %ebx
- call _C_LABEL(x86_ipi_handler)
- jmp _C_LABEL(Xdoreti)
-2:
- orl $(1 << LIR_IPI),CPUVAR(IPENDING)
- sti
- INTRFASTEXIT
-
-#if defined(DDB)
-IDTVEC(intrddbipi)
-1:
- str %ax
- GET_TSS
- movzwl (%eax),%eax
- GET_TSS
- pushl %eax
- movl $0xff,_C_LABEL(lapic_tpr)
- movl $0,_C_LABEL(local_apic)+LAPIC_EOI
- sti
- call _C_LABEL(ddb_ipi_tss)
- addl $4,%esp
- movl $0,_C_LABEL(lapic_tpr)
- iret
- jmp 1b
-#endif /* DDB */
-#endif /* MULTIPROCESSOR */
-
- /*
- * Interrupt from the local APIC timer.
- */
-IDTVEC(recurse_lapic_ltimer)
- pushfl
- pushl %cs
- pushl %esi
- pushl $0
- pushl $T_ASTFLT
- INTRENTRY
-IDTVEC(resume_lapic_ltimer)
- cli
- jmp 1f
-IDTVEC(intr_lapic_ltimer)
- pushl $0
- pushl $T_ASTFLT
- INTRENTRY
- movl $0,_C_LABEL(local_apic)+LAPIC_EOI
- movl CPUVAR(ILEVEL),%ebx
- cmpl $IPL_CLOCK,%ebx
- jae 2f
-1:
- incl CPUVAR(IDEPTH)
- movl $IPL_CLOCK,CPUVAR(ILEVEL)
- sti
- pushl %ebx
- pushl $0
- call _C_LABEL(lapic_clockintr)
- addl $4,%esp
- jmp _C_LABEL(Xdoreti)
-2:
- orl $(1 << LIR_TIMER),CPUVAR(IPENDING)
- sti
- INTRFASTEXIT
-#endif /* NLAPIC > 0 */
-
-#ifdef MULTIPROCESSOR
-#define LOCK_KERNEL pushl %esp ; call _C_LABEL(x86_intlock) ; addl $4,%esp
-#define UNLOCK_KERNEL pushl %esp ; call _C_LABEL(x86_intunlock) ; addl $4,%esp
-#else
-#define LOCK_KERNEL
-#define UNLOCK_KERNEL
-#endif
-
-#define voidop(num)
-
-
-#define XENINTRSTUB(name, num, early_ack, late_ack, mask, unmask,
level_mask) \
-IDTVEC(recurse_/**/name/**/num)
;\
- pushfl ;\
- pushl %cs ;\
- pushl %esi ;\
- subl $4,%esp ;\
- pushl $T_ASTFLT /* trap # for doing ASTs */ ;\
- INTRENTRY ;\
-IDTVEC(resume_/**/name/**/num) \
- /*movl %esp,%ecx*/ ;\
- movl $IREENT_MAGIC,TF_ERR(%esp) ;\
- movl %ebx,%esi ;\
- movl CPUVAR(ISOURCES) + (num) * 4, %ebp ;\
- movl IS_MAXLEVEL(%ebp),%ebx ;\
- jmp 1f ;\
-IDTVEC(intr_/**/name/**/num) ;\
- pushl $0 /* dummy error code */ ;\
- pushl $T_ASTFLT /* trap # for doing ASTs */ ;\
- INTRENTRY ;\
- /*movl %esp,%ecx*/ ;\
- movl CPUVAR(ISOURCES) + (num) * 4, %ebp ;\
- mask(num) /* mask it in hardware */ ;\
- early_ack(num) /* and allow other intrs */ ;\
- testl %ebp,%ebp ;\
- jz 9f /* stray */ ;\
- movl IS_MAXLEVEL(%ebp),%ebx ;\
- movl CPUVAR(ILEVEL),%esi ;\
- cmpl %ebx,%esi ;\
- jae 10f /* currently masked; hold it */ ;\
- incl MY_COUNT+V_INTR /* statistical info */ ;\
- addl $1,IS_EVCNTLO(%ebp) /* inc event counter */ ;\
- adcl $0,IS_EVCNTHI(%ebp) ;\
-1: \
- pushl %esi ;\
- movl %ebx,CPUVAR(ILEVEL) ;\
- STI(%eax) ;\
- incl CPUVAR(IDEPTH) ;\
- movl IS_HANDLERS(%ebp),%ebx ;\
- LOCK_KERNEL ;\
-6: \
- movl IH_LEVEL(%ebx),%edi ;\
- cmpl %esi,%edi ;\
- jle 7f ;\
- pushl %esp ;\
- pushl IH_ARG(%ebx) ;\
- movl %edi,CPUVAR(ILEVEL) ;\
- call *IH_FUN(%ebx) /* call it */ ;\
- addl $8,%esp /* toss the arg */ ;\
- movl IH_NEXT(%ebx),%ebx /* next handler in chain */ ;\
- testl %ebx,%ebx ;\
- jnz 6b ;\
-5: \
- UNLOCK_KERNEL ;\
- CLI(%eax) ;\
- unmask(num) /* unmask it in hardware */ ;\
- late_ack(num) ;\
- STI(%eax) ;\
- jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\
-7: \
- UNLOCK_KERNEL ;\
- CLI(%eax) ;\
- orl $(1 << num),CPUVAR(IPENDING) ;\
- level_mask(num) ;\
- late_ack(num) ;\
- STI(%eax) ;\
- jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\
-10: \
- CLI(%eax) ;\
- orl $(1 << num),CPUVAR(IPENDING) ;\
- level_mask(num) ;\
-6: ; \
- late_ack(num) ;\
- STIC(%eax) ;\
- jz 4f ; \
- call _C_LABEL(stipending) ; \
- testl %eax,%eax ; \
- jnz 1b ; \
-4: INTRFASTEXIT ;\
-9: \
- unmask(num) ;\
- jmp 6b
-
-#define hypervisor_asm_unmask(num) \
- movl irq_to_evtchn + (num) * 4,%ecx ;\
- movl HYPERVISOR_shared_info,%eax ;\
- lock ;\
- btrl %ecx,EVENTS_MASK(%eax)
-
-XENINTRSTUB(xenev,0,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,1,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,2,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,3,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,4,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,5,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,6,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,7,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,8,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,9,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,10,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,11,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,12,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,13,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,14,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,15,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,16,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,17,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,18,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,19,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,20,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,21,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,22,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,23,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,24,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,25,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,26,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,27,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,28,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,29,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,30,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-XENINTRSTUB(xenev,31,voidop,voidop,voidop,hypervisor_asm_unmask,voidop)
-
-.globl _C_LABEL(xenev_stubs)
-_C_LABEL(xenev_stubs):
- .long _C_LABEL(Xintr_xenev0), _C_LABEL(Xrecurse_xenev0)
- .long _C_LABEL(Xresume_xenev0)
- .long _C_LABEL(Xintr_xenev1), _C_LABEL(Xrecurse_xenev1)
- .long _C_LABEL(Xresume_xenev1)
- .long _C_LABEL(Xintr_xenev2), _C_LABEL(Xrecurse_xenev2)
- .long _C_LABEL(Xresume_xenev2)
- .long _C_LABEL(Xintr_xenev3), _C_LABEL(Xrecurse_xenev3)
- .long _C_LABEL(Xresume_xenev3)
- .long _C_LABEL(Xintr_xenev4), _C_LABEL(Xrecurse_xenev4)
- .long _C_LABEL(Xresume_xenev4)
- .long _C_LABEL(Xintr_xenev5), _C_LABEL(Xrecurse_xenev5)
- .long _C_LABEL(Xresume_xenev5)
- .long _C_LABEL(Xintr_xenev6), _C_LABEL(Xrecurse_xenev6)
- .long _C_LABEL(Xresume_xenev6)
- .long _C_LABEL(Xintr_xenev7), _C_LABEL(Xrecurse_xenev7)
- .long _C_LABEL(Xresume_xenev7)
- .long _C_LABEL(Xintr_xenev8), _C_LABEL(Xrecurse_xenev8)
- .long _C_LABEL(Xresume_xenev8)
- .long _C_LABEL(Xintr_xenev9), _C_LABEL(Xrecurse_xenev9)
- .long _C_LABEL(Xresume_xenev9)
- .long _C_LABEL(Xintr_xenev10), _C_LABEL(Xrecurse_xenev10)
- .long _C_LABEL(Xresume_xenev10)
- .long _C_LABEL(Xintr_xenev11), _C_LABEL(Xrecurse_xenev11)
- .long _C_LABEL(Xresume_xenev11)
- .long _C_LABEL(Xintr_xenev12), _C_LABEL(Xrecurse_xenev12)
- .long _C_LABEL(Xresume_xenev12)
- .long _C_LABEL(Xintr_xenev13), _C_LABEL(Xrecurse_xenev13)
- .long _C_LABEL(Xresume_xenev13)
- .long _C_LABEL(Xintr_xenev14), _C_LABEL(Xrecurse_xenev14)
- .long _C_LABEL(Xresume_xenev14)
- .long _C_LABEL(Xintr_xenev15), _C_LABEL(Xrecurse_xenev15)
- .long _C_LABEL(Xresume_xenev15)
- .long _C_LABEL(Xintr_xenev16), _C_LABEL(Xrecurse_xenev16)
- .long _C_LABEL(Xresume_xenev16)
- .long _C_LABEL(Xintr_xenev17), _C_LABEL(Xrecurse_xenev17)
- .long _C_LABEL(Xresume_xenev17)
- .long _C_LABEL(Xintr_xenev18), _C_LABEL(Xrecurse_xenev18)
- .long _C_LABEL(Xresume_xenev18)
- .long _C_LABEL(Xintr_xenev19), _C_LABEL(Xrecurse_xenev19)
- .long _C_LABEL(Xresume_xenev19)
- .long _C_LABEL(Xintr_xenev20), _C_LABEL(Xrecurse_xenev20)
- .long _C_LABEL(Xresume_xenev20)
- .long _C_LABEL(Xintr_xenev21), _C_LABEL(Xrecurse_xenev21)
- .long _C_LABEL(Xresume_xenev21)
- .long _C_LABEL(Xintr_xenev22), _C_LABEL(Xrecurse_xenev22)
- .long _C_LABEL(Xresume_xenev22)
- .long _C_LABEL(Xintr_xenev23), _C_LABEL(Xrecurse_xenev23)
- .long _C_LABEL(Xresume_xenev23)
- .long _C_LABEL(Xintr_xenev24), _C_LABEL(Xrecurse_xenev24)
- .long _C_LABEL(Xresume_xenev24)
- .long _C_LABEL(Xintr_xenev25), _C_LABEL(Xrecurse_xenev25)
- .long _C_LABEL(Xresume_xenev25)
- .long _C_LABEL(Xintr_xenev26), _C_LABEL(Xrecurse_xenev26)
- .long _C_LABEL(Xresume_xenev26)
- .long _C_LABEL(Xintr_xenev27), _C_LABEL(Xrecurse_xenev27)
- .long _C_LABEL(Xresume_xenev27)
- .long _C_LABEL(Xintr_xenev28), _C_LABEL(Xrecurse_xenev28)
- .long _C_LABEL(Xresume_xenev28)
- .long _C_LABEL(Xintr_xenev29), _C_LABEL(Xrecurse_xenev29)
- .long _C_LABEL(Xresume_xenev29)
- .long _C_LABEL(Xintr_xenev30), _C_LABEL(Xrecurse_xenev30)
- .long _C_LABEL(Xresume_xenev30)
- .long _C_LABEL(Xintr_xenev31), _C_LABEL(Xrecurse_xenev31)
- .long _C_LABEL(Xresume_xenev31)
-
-#ifndef XEN
-/*
- * This macro defines the generic stub code. Its arguments modifiy it
- * for specific PICs.
- */
-
-#define INTRSTUB(name, num, early_ack, late_ack, mask, unmask,
level_mask) \
-IDTVEC(recurse_/**/name/**/num)
;\
- pushfl ;\
- pushl %cs ;\
- pushl %esi ;\
- subl $4,%esp ;\
- pushl $T_ASTFLT /* trap # for doing ASTs */ ;\
- INTRENTRY ;\
-IDTVEC(resume_/**/name/**/num) \
- movl $IREENT_MAGIC,TF_ERR(%esp) ;\
- movl %ebx,%esi ;\
- movl CPUVAR(ISOURCES) + (num) * 4, %ebp ;\
- movl IS_MAXLEVEL(%ebp),%ebx ;\
- jmp 1f ;\
-IDTVEC(intr_/**/name/**/num) ;\
- pushl $0 /* dummy error code */ ;\
- pushl $T_ASTFLT /* trap # for doing ASTs */ ;\
- INTRENTRY ;\
- movl CPUVAR(ISOURCES) + (num) * 4, %ebp ;\
- mask(num) /* mask it in hardware */ ;\
- early_ack(num) /* and allow other intrs */ ;\
- testl %ebp,%ebp ;\
- jz 9f /* stray */ ;\
- movl IS_MAXLEVEL(%ebp),%ebx ;\
- movl CPUVAR(ILEVEL),%esi ;\
- cmpl %ebx,%esi ;\
- jae 10f /* currently masked; hold it */ ;\
- incl MY_COUNT+V_INTR /* statistical info */ ;\
- addl $1,IS_EVCNTLO(%ebp) /* inc event counter */ ;\
- adcl $0,IS_EVCNTHI(%ebp) ;\
-1: \
- pushl %esi ;\
- movl %ebx,CPUVAR(ILEVEL) ;\
- STI(%eax) ;\
- incl CPUVAR(IDEPTH) ;\
- movl IS_HANDLERS(%ebp),%ebx ;\
- LOCK_KERNEL ;\
-6: \
- movl IH_LEVEL(%ebx),%edi ;\
- cmpl %esi,%edi ;\
- jle 7f ;\
- pushl IH_ARG(%ebx) ;\
- movl %edi,CPUVAR(ILEVEL) ;\
- call *IH_FUN(%ebx) /* call it */ ;\
- addl $4,%esp /* toss the arg */ ;\
- movl IH_NEXT(%ebx),%ebx /* next handler in chain */ ;\
- testl %ebx,%ebx ;\
- jnz 6b ;\
-5: \
- UNLOCK_KERNEL ;\
- CLI(%eax) ;\
- unmask(num) /* unmask it in hardware */ ;\
- late_ack(num) ;\
- STI(%eax) ;\
- jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\
-7: \
- UNLOCK_KERNEL ;\
- CLI(%eax) ;\
- orl $(1 << num),CPUVAR(IPENDING) ;\
- level_mask(num) ;\
- late_ack(num) ;\
- STI(%eax) ;\
- jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\
-10: \
- CLI(%eax) ;\
- orl $(1 << num),CPUVAR(IPENDING) ;\
- level_mask(num) ;\
- late_ack(num) ;\
- STIC(%eax) ;\
- jz 4f ; \
- call _C_LABEL(stipending) ; \
- testl %eax,%eax ; \
- jnz 1b ; \
-4: INTRFASTEXIT ;\
-9: \
- unmask(num) ;\
- late_ack(num) ;\
- STIC(%eax) ;\
- jz 4f ; \
- call _C_LABEL(stipending) ; \
- testl %eax,%eax ; \
- jnz 1b ; \
-4: INTRFASTEXIT
-
-#define ICUADDR IO_ICU1
-
-INTRSTUB(legacy,0,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,1,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,2,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,3,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,4,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,5,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,6,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,7,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-#undef ICUADDR
-#define ICUADDR IO_ICU2
-
-INTRSTUB(legacy,8,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,9,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,10,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,11,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,12,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,13,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,14,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-INTRSTUB(legacy,15,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask,
- voidop)
-#endif
-
-#if NIOAPIC > 0
-
-INTRSTUB(ioapic_edge,0,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,1,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,2,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,3,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,4,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,5,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,6,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,7,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,8,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,9,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,10,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,11,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,12,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,13,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,14,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,15,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,16,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,17,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,18,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,19,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,20,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,21,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,22,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,23,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,24,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,25,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,26,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,27,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,28,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,29,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,30,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-INTRSTUB(ioapic_edge,31,voidop,ioapic_asm_ack,voidop,voidop,voidop)
-
-INTRSTUB(ioapic_level,0,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,1,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,2,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,3,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,4,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,5,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,6,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,7,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,8,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,9,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,10,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,11,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,12,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,13,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,14,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,15,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,16,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,17,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,18,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,19,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,20,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,21,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,22,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,23,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,24,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,25,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,26,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,27,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,28,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,29,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,30,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-INTRSTUB(ioapic_level,31,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask)
-
-#endif
-
-#ifndef XEN
-.globl _C_LABEL(i8259_stubs)
-_C_LABEL(i8259_stubs):
- .long _C_LABEL(Xintr_legacy0), _C_LABEL(Xrecurse_legacy0)
- .long _C_LABEL(Xresume_legacy0)
- .long _C_LABEL(Xintr_legacy1), _C_LABEL(Xrecurse_legacy1)
- .long _C_LABEL(Xresume_legacy1)
- .long _C_LABEL(Xintr_legacy2), _C_LABEL(Xrecurse_legacy2)
- .long _C_LABEL(Xresume_legacy2)
- .long _C_LABEL(Xintr_legacy3), _C_LABEL(Xrecurse_legacy3)
- .long _C_LABEL(Xresume_legacy3)
- .long _C_LABEL(Xintr_legacy4), _C_LABEL(Xrecurse_legacy4)
- .long _C_LABEL(Xresume_legacy4)
- .long _C_LABEL(Xintr_legacy5), _C_LABEL(Xrecurse_legacy5)
- .long _C_LABEL(Xresume_legacy5)
- .long _C_LABEL(Xintr_legacy6), _C_LABEL(Xrecurse_legacy6)
- .long _C_LABEL(Xresume_legacy6)
- .long _C_LABEL(Xintr_legacy7), _C_LABEL(Xrecurse_legacy7)
- .long _C_LABEL(Xresume_legacy7)
- .long _C_LABEL(Xintr_legacy8), _C_LABEL(Xrecurse_legacy8)
- .long _C_LABEL(Xresume_legacy8)
- .long _C_LABEL(Xintr_legacy9), _C_LABEL(Xrecurse_legacy9)
- .long _C_LABEL(Xresume_legacy9)
- .long _C_LABEL(Xintr_legacy10), _C_LABEL(Xrecurse_legacy10)
- .long _C_LABEL(Xresume_legacy10)
- .long _C_LABEL(Xintr_legacy11), _C_LABEL(Xrecurse_legacy11)
- .long _C_LABEL(Xresume_legacy11)
- .long _C_LABEL(Xintr_legacy12), _C_LABEL(Xrecurse_legacy12)
- .long _C_LABEL(Xresume_legacy12)
- .long _C_LABEL(Xintr_legacy13), _C_LABEL(Xrecurse_legacy13)
- .long _C_LABEL(Xresume_legacy13)
- .long _C_LABEL(Xintr_legacy14), _C_LABEL(Xrecurse_legacy14)
- .long _C_LABEL(Xresume_legacy14)
- .long _C_LABEL(Xintr_legacy15), _C_LABEL(Xrecurse_legacy15)
- .long _C_LABEL(Xresume_legacy15)
-#endif
-
-#if NIOAPIC > 0
-.globl _C_LABEL(ioapic_edge_stubs)
-_C_LABEL(ioapic_edge_stubs):
- .long _C_LABEL(Xintr_ioapic_edge0), _C_LABEL(Xrecurse_ioapic_edge0)
- .long _C_LABEL(Xresume_ioapic_edge0)
- .long _C_LABEL(Xintr_ioapic_edge1), _C_LABEL(Xrecurse_ioapic_edge1)
- .long _C_LABEL(Xresume_ioapic_edge1)
- .long _C_LABEL(Xintr_ioapic_edge2), _C_LABEL(Xrecurse_ioapic_edge2)
- .long _C_LABEL(Xresume_ioapic_edge2)
- .long _C_LABEL(Xintr_ioapic_edge3), _C_LABEL(Xrecurse_ioapic_edge3)
- .long _C_LABEL(Xresume_ioapic_edge3)
- .long _C_LABEL(Xintr_ioapic_edge4), _C_LABEL(Xrecurse_ioapic_edge4)
- .long _C_LABEL(Xresume_ioapic_edge4)
- .long _C_LABEL(Xintr_ioapic_edge5), _C_LABEL(Xrecurse_ioapic_edge5)
- .long _C_LABEL(Xresume_ioapic_edge5)
- .long _C_LABEL(Xintr_ioapic_edge6), _C_LABEL(Xrecurse_ioapic_edge6)
- .long _C_LABEL(Xresume_ioapic_edge6)
- .long _C_LABEL(Xintr_ioapic_edge7), _C_LABEL(Xrecurse_ioapic_edge7)
- .long _C_LABEL(Xresume_ioapic_edge7)
- .long _C_LABEL(Xintr_ioapic_edge8), _C_LABEL(Xrecurse_ioapic_edge8)
- .long _C_LABEL(Xresume_ioapic_edge8)
- .long _C_LABEL(Xintr_ioapic_edge9), _C_LABEL(Xrecurse_ioapic_edge9)
- .long _C_LABEL(Xresume_ioapic_edge9)
- .long _C_LABEL(Xintr_ioapic_edge10), _C_LABEL(Xrecurse_ioapic_edge10)
- .long _C_LABEL(Xresume_ioapic_edge10)
- .long _C_LABEL(Xintr_ioapic_edge11), _C_LABEL(Xrecurse_ioapic_edge11)
- .long _C_LABEL(Xresume_ioapic_edge11)
- .long _C_LABEL(Xintr_ioapic_edge12), _C_LABEL(Xrecurse_ioapic_edge12)
- .long _C_LABEL(Xresume_ioapic_edge12)
- .long _C_LABEL(Xintr_ioapic_edge13), _C_LABEL(Xrecurse_ioapic_edge13)
- .long _C_LABEL(Xresume_ioapic_edge13)
- .long _C_LABEL(Xintr_ioapic_edge14), _C_LABEL(Xrecurse_ioapic_edge14)
- .long _C_LABEL(Xresume_ioapic_edge14)
- .long _C_LABEL(Xintr_ioapic_edge15), _C_LABEL(Xrecurse_ioapic_edge15)
- .long _C_LABEL(Xresume_ioapic_edge15)
- .long _C_LABEL(Xintr_ioapic_edge16), _C_LABEL(Xrecurse_ioapic_edge16)
- .long _C_LABEL(Xresume_ioapic_edge16)
- .long _C_LABEL(Xintr_ioapic_edge17), _C_LABEL(Xrecurse_ioapic_edge17)
- .long _C_LABEL(Xresume_ioapic_edge17)
- .long _C_LABEL(Xintr_ioapic_edge18), _C_LABEL(Xrecurse_ioapic_edge18)
- .long _C_LABEL(Xresume_ioapic_edge18)
- .long _C_LABEL(Xintr_ioapic_edge19), _C_LABEL(Xrecurse_ioapic_edge19)
- .long _C_LABEL(Xresume_ioapic_edge19)
- .long _C_LABEL(Xintr_ioapic_edge20), _C_LABEL(Xrecurse_ioapic_edge20)
- .long _C_LABEL(Xresume_ioapic_edge20)
- .long _C_LABEL(Xintr_ioapic_edge21), _C_LABEL(Xrecurse_ioapic_edge21)
- .long _C_LABEL(Xresume_ioapic_edge21)
- .long _C_LABEL(Xintr_ioapic_edge22), _C_LABEL(Xrecurse_ioapic_edge22)
- .long _C_LABEL(Xresume_ioapic_edge22)
- .long _C_LABEL(Xintr_ioapic_edge23), _C_LABEL(Xrecurse_ioapic_edge23)
- .long _C_LABEL(Xresume_ioapic_edge23)
- .long _C_LABEL(Xintr_ioapic_edge24), _C_LABEL(Xrecurse_ioapic_edge24)
- .long _C_LABEL(Xresume_ioapic_edge24)
- .long _C_LABEL(Xintr_ioapic_edge25), _C_LABEL(Xrecurse_ioapic_edge25)
- .long _C_LABEL(Xresume_ioapic_edge25)
- .long _C_LABEL(Xintr_ioapic_edge26), _C_LABEL(Xrecurse_ioapic_edge26)
- .long _C_LABEL(Xresume_ioapic_edge26)
- .long _C_LABEL(Xintr_ioapic_edge27), _C_LABEL(Xrecurse_ioapic_edge27)
- .long _C_LABEL(Xresume_ioapic_edge27)
- .long _C_LABEL(Xintr_ioapic_edge28), _C_LABEL(Xrecurse_ioapic_edge28)
- .long _C_LABEL(Xresume_ioapic_edge28)
- .long _C_LABEL(Xintr_ioapic_edge29), _C_LABEL(Xrecurse_ioapic_edge29)
- .long _C_LABEL(Xresume_ioapic_edge29)
- .long _C_LABEL(Xintr_ioapic_edge30), _C_LABEL(Xrecurse_ioapic_edge30)
- .long _C_LABEL(Xresume_ioapic_edge30)
- .long _C_LABEL(Xintr_ioapic_edge31), _C_LABEL(Xrecurse_ioapic_edge31)
- .long _C_LABEL(Xresume_ioapic_edge31)
-
-.globl _C_LABEL(ioapic_level_stubs)
-_C_LABEL(ioapic_level_stubs):
- .long _C_LABEL(Xintr_ioapic_level0), _C_LABEL(Xrecurse_ioapic_level0)
- .long _C_LABEL(Xresume_ioapic_level0)
- .long _C_LABEL(Xintr_ioapic_level1), _C_LABEL(Xrecurse_ioapic_level1)
- .long _C_LABEL(Xresume_ioapic_level1)
- .long _C_LABEL(Xintr_ioapic_level2), _C_LABEL(Xrecurse_ioapic_level2)
- .long _C_LABEL(Xresume_ioapic_level2)
- .long _C_LABEL(Xintr_ioapic_level3), _C_LABEL(Xrecurse_ioapic_level3)
- .long _C_LABEL(Xresume_ioapic_level3)
- .long _C_LABEL(Xintr_ioapic_level4), _C_LABEL(Xrecurse_ioapic_level4)
- .long _C_LABEL(Xresume_ioapic_level4)
- .long _C_LABEL(Xintr_ioapic_level5), _C_LABEL(Xrecurse_ioapic_level5)
- .long _C_LABEL(Xresume_ioapic_level5)
- .long _C_LABEL(Xintr_ioapic_level6), _C_LABEL(Xrecurse_ioapic_level6)
- .long _C_LABEL(Xresume_ioapic_level6)
- .long _C_LABEL(Xintr_ioapic_level7), _C_LABEL(Xrecurse_ioapic_level7)
- .long _C_LABEL(Xresume_ioapic_level7)
- .long _C_LABEL(Xintr_ioapic_level8), _C_LABEL(Xrecurse_ioapic_level8)
- .long _C_LABEL(Xresume_ioapic_level8)
- .long _C_LABEL(Xintr_ioapic_level9), _C_LABEL(Xrecurse_ioapic_level9)
- .long _C_LABEL(Xresume_ioapic_level9)
- .long _C_LABEL(Xintr_ioapic_level10), _C_LABEL(Xrecurse_ioapic_level10)
- .long _C_LABEL(Xresume_ioapic_level10)
- .long _C_LABEL(Xintr_ioapic_level11), _C_LABEL(Xrecurse_ioapic_level11)
- .long _C_LABEL(Xresume_ioapic_level11)
- .long _C_LABEL(Xintr_ioapic_level12), _C_LABEL(Xrecurse_ioapic_level12)
- .long _C_LABEL(Xresume_ioapic_level12)
- .long _C_LABEL(Xintr_ioapic_level13), _C_LABEL(Xrecurse_ioapic_level13)
- .long _C_LABEL(Xresume_ioapic_level13)
- .long _C_LABEL(Xintr_ioapic_level14), _C_LABEL(Xrecurse_ioapic_level14)
- .long _C_LABEL(Xresume_ioapic_level14)
- .long _C_LABEL(Xintr_ioapic_level15), _C_LABEL(Xrecurse_ioapic_level15)
- .long _C_LABEL(Xresume_ioapic_level15)
- .long _C_LABEL(Xintr_ioapic_level16), _C_LABEL(Xrecurse_ioapic_level16)
- .long _C_LABEL(Xresume_ioapic_level16)
- .long _C_LABEL(Xintr_ioapic_level17), _C_LABEL(Xrecurse_ioapic_level17)
- .long _C_LABEL(Xresume_ioapic_level17)
- .long _C_LABEL(Xintr_ioapic_level18), _C_LABEL(Xrecurse_ioapic_level18)
- .long _C_LABEL(Xresume_ioapic_level18)
- .long _C_LABEL(Xintr_ioapic_level19), _C_LABEL(Xrecurse_ioapic_level19)
- .long _C_LABEL(Xresume_ioapic_level19)
- .long _C_LABEL(Xintr_ioapic_level20), _C_LABEL(Xrecurse_ioapic_level20)
- .long _C_LABEL(Xresume_ioapic_level20)
- .long _C_LABEL(Xintr_ioapic_level21), _C_LABEL(Xrecurse_ioapic_level21)
- .long _C_LABEL(Xresume_ioapic_level21)
- .long _C_LABEL(Xintr_ioapic_level22), _C_LABEL(Xrecurse_ioapic_level22)
- .long _C_LABEL(Xresume_ioapic_level22)
- .long _C_LABEL(Xintr_ioapic_level23), _C_LABEL(Xrecurse_ioapic_level23)
- .long _C_LABEL(Xresume_ioapic_level23)
- .long _C_LABEL(Xintr_ioapic_level24), _C_LABEL(Xrecurse_ioapic_level24)
- .long _C_LABEL(Xresume_ioapic_level24)
- .long _C_LABEL(Xintr_ioapic_level25), _C_LABEL(Xrecurse_ioapic_level25)
- .long _C_LABEL(Xresume_ioapic_level25)
- .long _C_LABEL(Xintr_ioapic_level26), _C_LABEL(Xrecurse_ioapic_level26)
- .long _C_LABEL(Xresume_ioapic_level26)
- .long _C_LABEL(Xintr_ioapic_level27), _C_LABEL(Xrecurse_ioapic_level27)
- .long _C_LABEL(Xresume_ioapic_level27)
- .long _C_LABEL(Xintr_ioapic_level28), _C_LABEL(Xrecurse_ioapic_level28)
- .long _C_LABEL(Xresume_ioapic_level28)
- .long _C_LABEL(Xintr_ioapic_level29), _C_LABEL(Xrecurse_ioapic_level29)
- .long _C_LABEL(Xresume_ioapic_level29)
- .long _C_LABEL(Xintr_ioapic_level30), _C_LABEL(Xrecurse_ioapic_level30)
- .long _C_LABEL(Xresume_ioapic_level30)
- .long _C_LABEL(Xintr_ioapic_level31), _C_LABEL(Xrecurse_ioapic_level31)
- .long _C_LABEL(Xresume_ioapic_level31)
-#endif
-
-/*
- * Symbols that vmstat -i wants, even though they're not used.
- */
-.globl _C_LABEL(intrnames)
-_C_LABEL(intrnames):
-.globl _C_LABEL(eintrnames)
-_C_LABEL(eintrnames):
-
-.globl _C_LABEL(intrcnt)
-_C_LABEL(intrcnt):
-.globl _C_LABEL(eintrcnt)
-_C_LABEL(eintrcnt):
-
-/*
- * Soft interrupt handlers
- */
-
-IDTVEC(softserial)
- movl $IPL_SOFTSERIAL, CPUVAR(ILEVEL)
- incl CPUVAR(IDEPTH)
-#ifdef MULTIPROCESSOR
- call _C_LABEL(x86_softintlock)
-#endif
- movl CPUVAR(ISOURCES) + SIR_SERIAL * 4, %edi
- addl $1,IS_EVCNTLO(%edi)
- adcl $0,IS_EVCNTHI(%edi)
- pushl $X86_SOFTINTR_SOFTSERIAL
- call _C_LABEL(softintr_dispatch)
- addl $4,%esp
-#ifdef MULTIPROCESSOR
- call _C_LABEL(x86_softintunlock)
-#endif
- decl CPUVAR(IDEPTH)
- jmp *%esi
-
-IDTVEC(softnet)
- movl $IPL_SOFTNET, CPUVAR(ILEVEL)
- incl CPUVAR(IDEPTH)
-#ifdef MULTIPROCESSOR
- call _C_LABEL(x86_softintlock)
-#endif
- movl CPUVAR(ISOURCES) + SIR_NET * 4, %edi
- addl $1,IS_EVCNTLO(%edi)
- adcl $0,IS_EVCNTHI(%edi)
-
- xorl %edi,%edi
- xchgl _C_LABEL(netisr),%edi
-
- /* XXX Do the legacy netisrs here for now. */
-#define DONETISR(s, c) \
- .globl _C_LABEL(c) ;\
- testl $(1 << s),%edi ;\
- jz 1f ;\
- call _C_LABEL(c) ;\
-1:
-#include <net/netisr_dispatch.h>
-
- pushl $X86_SOFTINTR_SOFTNET
- call _C_LABEL(softintr_dispatch)
- addl $4,%esp
-#ifdef MULTIPROCESSOR
- call _C_LABEL(x86_softintunlock)
-#endif
- decl CPUVAR(IDEPTH)
- jmp *%esi
-
-IDTVEC(softclock)
- movl $IPL_SOFTCLOCK, CPUVAR(ILEVEL)
- incl CPUVAR(IDEPTH)
-#ifdef MULTIPROCESSOR
- call _C_LABEL(x86_softintlock)
-#endif
- movl CPUVAR(ISOURCES) + SIR_CLOCK * 4, %edi
- addl $1,IS_EVCNTLO(%edi)
- adcl $0,IS_EVCNTHI(%edi)
-
- pushl $X86_SOFTINTR_SOFTCLOCK
- call _C_LABEL(softintr_dispatch)
- addl $4,%esp
-#ifdef MULTIPROCESSOR
- call _C_LABEL(x86_softintunlock)
-#endif
- decl CPUVAR(IDEPTH)
- jmp *%esi
-
-/*
- * Trap and fault vector routines
- *
- * On exit from the kernel to user mode, we always need to check for ASTs. In
- * addition, we need to do this atomically; otherwise an interrupt may occur
- * which causes an AST, but it won't get processed until the next kernel entry
- * (possibly the next clock tick). Thus, we disable interrupt before checking,
- * and only enable them again on the final `iret' or before calling the AST
- * handler.
- */
-
-#define TRAP(a) pushl $(a) ; jmp _C_LABEL(alltraps)
-#define ZTRAP(a) pushl $0 ; TRAP(a)
-
-#ifdef IPKDB
-#define BPTTRAP(a) pushl $0; pushl $(a); jmp _C_LABEL(bpttraps)
-#else
-#define BPTTRAP(a) ZTRAP(a)
-#endif
-
-
- .text
-IDTVEC(trap00)
- ZTRAP(T_DIVIDE)
-IDTVEC(trap01)
- BPTTRAP(T_TRCTRAP)
-IDTVEC(trap02)
- ZTRAP(T_NMI)
-IDTVEC(trap03)
- BPTTRAP(T_BPTFLT)
-IDTVEC(trap04)
- ZTRAP(T_OFLOW)
-IDTVEC(trap05)
- ZTRAP(T_BOUND)
-IDTVEC(trap06)
- ZTRAP(T_PRIVINFLT)
-IDTVEC(trap07)
-#if NNPX > 0
- pushl $0 # dummy error code
- pushl $T_DNA
- INTRENTRY
-#ifdef XENDEBUG_LOW
- pushl %esp
-#endif
- pushl CPUVAR(SELF)
- call *_C_LABEL(npxdna_func)
- addl $4,%esp
-#ifdef XENDEBUG_LOW
- addl $4,%esp
-#endif
- testl %eax,%eax
- jz calltrap
- INTRFASTEXIT
-#else
- ZTRAP(T_DNA)
-#endif
-IDTVEC(trap08)
- TRAP(T_DOUBLEFLT)
-IDTVEC(trap09)
- ZTRAP(T_FPOPFLT)
-IDTVEC(trap0a)
- TRAP(T_TSSFLT)
-IDTVEC(trap0b)
- TRAP(T_SEGNPFLT)
-IDTVEC(trap0c)
- TRAP(T_STKFLT)
-IDTVEC(trap0d)
- TRAP(T_PROTFLT)
-#ifndef XEN
-IDTVEC(trap0e)
-#ifndef I586_CPU
- TRAP(T_PAGEFLT)
-#else
- pushl $T_PAGEFLT
- INTRENTRY
- testb $PGEX_U,TF_ERR(%esp)
- jnz calltrap
- movl %cr2,%eax
- subl _C_LABEL(pentium_idt),%eax
- cmpl $(6*8),%eax
- jne calltrap
- movb $T_PRIVINFLT,TF_TRAPNO(%esp)
- jmp calltrap
-#endif
-#endif
-
-IDTVEC(intrspurious)
-IDTVEC(trap0f)
- /*
- * The Pentium Pro local APIC may erroneously call this vector for a
- * default IR7. Just ignore it.
- *
- * (The local APIC does this when CPL is raised while it's on the
- * way to delivering an interrupt.. presumably enough has been set
- * up that it's inconvenient to abort delivery completely..)
- */
- iret
-
-IDTVEC(trap10)
-#if NNPX > 0
- /*
- * Handle like an interrupt so that we can call npxintr to clear the
- * error. It would be better to handle npx interrupts as traps but
- * this is difficult for nested interrupts.
- */
- pushl $0 # dummy error code
- pushl $T_ASTFLT
- INTRENTRY
- pushl CPUVAR(ILEVEL)
- pushl %esp
- incl _C_LABEL(uvmexp)+V_TRAP
- call _C_LABEL(npxintr)
- addl $8,%esp
- INTRFASTEXIT
-#else
- ZTRAP(T_ARITHTRAP)
-#endif
-IDTVEC(trap11)
- TRAP(T_ALIGNFLT)
-IDTVEC(trap12)
-IDTVEC(trap13)
-IDTVEC(trap14)
-IDTVEC(trap15)
-IDTVEC(trap16)
-IDTVEC(trap17)
-IDTVEC(trap18)
-IDTVEC(trap19)
-IDTVEC(trap1a)
-IDTVEC(trap1b)
-IDTVEC(trap1c)
-IDTVEC(trap1d)
-IDTVEC(trap1e)
-IDTVEC(trap1f)
- /* 18 - 31 reserved for future exp */
- ZTRAP(T_RESERVED)
-
-IDTVEC(exceptions)
-#ifndef XENDEBUG_LOW
- .long _C_LABEL(Xtrap00), _C_LABEL(Xtrap01)
- .long _C_LABEL(Xtrap02), _C_LABEL(Xtrap03)
- .long _C_LABEL(Xtrap04), _C_LABEL(Xtrap05)
- .long _C_LABEL(Xtrap06), _C_LABEL(Xtrap07)
- .long _C_LABEL(Xtrap08), _C_LABEL(Xtrap09)
- .long _C_LABEL(Xtrap0a), _C_LABEL(Xtrap0b)
- .long _C_LABEL(Xtrap0c), _C_LABEL(Xtrap0d)
- .long _C_LABEL(Xtrap0e), _C_LABEL(Xtrap0f)
- .long _C_LABEL(Xtrap10), _C_LABEL(Xtrap11)
- .long _C_LABEL(Xtrap12), _C_LABEL(Xtrap13)
- .long _C_LABEL(Xtrap14), _C_LABEL(Xtrap15)
- .long _C_LABEL(Xtrap16), _C_LABEL(Xtrap17)
- .long _C_LABEL(Xtrap18), _C_LABEL(Xtrap19)
- .long _C_LABEL(Xtrap1a), _C_LABEL(Xtrap1b)
- .long _C_LABEL(Xtrap1c), _C_LABEL(Xtrap1d)
- .long _C_LABEL(Xtrap1e), _C_LABEL(Xtrap1f)
-#else
- .long _C_LABEL(divide_error), _C_LABEL(debug)
- .long _C_LABEL(Xtrap02), _C_LABEL(Xtrap03) #int3)
- .long _C_LABEL(overflow), _C_LABEL(bounds)
- .long _C_LABEL(invalid_op), _C_LABEL(device_not_available)
- .long _C_LABEL(double_fault), _C_LABEL(coprocessor_segment_overrun)
- .long _C_LABEL(invalid_TSS), _C_LABEL(segment_not_present)
- .long _C_LABEL(stack_segment)
- #.long _C_LABEL(general_protection)
- .long _C_LABEL(Xtrap0d)
- #.long _C_LABEL(page_fault)
- .long _C_LABEL(Xtrap0e)
- .long _C_LABEL(spurious_interrupt_bug)
- .long _C_LABEL(coprocessor_error), _C_LABEL(alignment_check)
- .long _C_LABEL(machine_check), _C_LABEL(simd_coprocessor_error)
- .long _C_LABEL(Xtrap14), _C_LABEL(Xtrap15)
- .long _C_LABEL(Xtrap16), _C_LABEL(Xtrap17)
- .long _C_LABEL(Xtrap18), _C_LABEL(Xtrap19)
- .long _C_LABEL(Xtrap1a), _C_LABEL(Xtrap1b)
- .long _C_LABEL(Xtrap1c), _C_LABEL(Xtrap1d)
- .long _C_LABEL(Xtrap1e), _C_LABEL(Xtrap1f)
-#endif
-
-
-IDTVEC(tss_trap08)
-1:
- str %ax
- GET_TSS
- movzwl (%eax),%eax
- GET_TSS
- pushl $T_DOUBLEFLT
- pushl %eax
- call _C_LABEL(trap_tss)
- addl $12,%esp
- iret
- jmp 1b
-
-/* LINTSTUB: Ignore */
-NENTRY(alltraps)
- INTRENTRY
-calltrap:
-#ifdef DIAGNOSTIC
- movl CPUVAR(ILEVEL),%ebx
-#endif /* DIAGNOSTIC */
- pushl %esp
- call _C_LABEL(trap)
- addl $4,%esp
- testb $CHK_UPL,TF_CS(%esp)
- jnz alltraps_checkast
-#ifdef VM86
- testl $PSL_VM,TF_EFLAGS(%esp)
- jz 6f
-#else
- jmp 6f
-#endif
-alltraps_checkast:
- /* Check for ASTs on exit to user mode. */
- CLI(%eax)
- CHECK_ASTPENDING(%eax)
- jz 3f
-5: CLEAR_ASTPENDING(%eax)
- STI(%eax)
- movl $T_ASTFLT,TF_TRAPNO(%esp)
- pushl %esp
- call _C_LABEL(trap)
- addl $4,%esp
- jmp alltraps_checkast /* re-check ASTs */
-3: CHECK_DEFERRED_SWITCH(%eax)
- jnz 9f
-6: STIC(%eax)
- jz 4f
- call _C_LABEL(stipending)
- #testl %eax,%eax /* XXXcl */
- #jnz 1b
-4:
-#ifndef DIAGNOSTIC
- INTRFASTEXIT
-#else
- cmpl CPUVAR(ILEVEL),%ebx
- jne 3f
- INTRFASTEXIT
-3: pushl $4f
- call _C_LABEL(printf)
- addl $4,%esp
-#ifdef DDB
- int $3
-#endif /* DDB */
- movl %ebx,CPUVAR(ILEVEL)
- jmp alltraps_checkast /* re-check ASTs */
-4: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT\n"
-#endif /* DIAGNOSTIC */
-9: STI(%eax)
- call _C_LABEL(pmap_load)
- jmp alltraps_checkast /* re-check ASTs */
-
-/* LINTSTUB: Ignore */
-IDTVEC(trap0e)
- INTRENTRY
- movl TF_TRAPNO(%esp),%eax
- movl $T_PAGEFLT,TF_TRAPNO(%esp)
-#ifdef DIAGNOSTIC
- movl CPUVAR(ILEVEL),%ebx
-#endif /* DIAGNOSTIC */
- #pushl %esp
- pushl %eax
- movl %esp,%eax
- addl $4,%eax
- pushl %eax
- call _C_LABEL(trap)
- addl $4,%esp
- addl $4,%esp
- testb $CHK_UPL,TF_CS(%esp)
- jnz trap0e_checkast
-#ifdef VM86
- testl $PSL_VM,TF_EFLAGS(%esp)
- jz 6f
-#else
- jmp 6f
-#endif
-trap0e_checkast:
- /* Check for ASTs on exit to user mode. */
- CLI(%eax)
- CHECK_ASTPENDING(%eax)
- jz 3f
-5: CLEAR_ASTPENDING(%eax)
- STI(%eax)
- movl $T_ASTFLT,TF_TRAPNO(%esp)
- pushl %esp
- call _C_LABEL(trap)
- addl $4,%esp
- jmp trap0e_checkast /* re-check ASTs */
-3: CHECK_DEFERRED_SWITCH(%eax)
- jnz 9f
-6: STIC(%eax)
- jz 4f
- call _C_LABEL(stipending)
- #testl %eax,%eax /* XXXcl */
- #jnz 1b
-4:
-#ifndef DIAGNOSTIC
- INTRFASTEXIT
-#else
- cmpl CPUVAR(ILEVEL),%ebx
- jne 3f
- INTRFASTEXIT
-3: pushl $4f
- call _C_LABEL(printf)
- addl $4,%esp
-#ifdef DDB
- int $3
-#endif /* DDB */
- movl %ebx,CPUVAR(ILEVEL)
- jmp trap0e_checkast /* re-check ASTs */
-4: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT\n"
-#endif /* DIAGNOSTIC */
-9: STI(%eax)
- call _C_LABEL(pmap_load)
- jmp trap0e_checkast /* re-check ASTs */
-
-#ifdef IPKDB
-/* LINTSTUB: Ignore */
-NENTRY(bpttraps)
- INTRENTRY
- call _C_LABEL(ipkdb_trap_glue)
- testl %eax,%eax
- jz calltrap
- INTRFASTEXIT
-
-ipkdbsetup:
- popl %ecx
-
- /* Disable write protection: */
- movl %cr0,%eax
- pushl %eax
- andl $~CR0_WP,%eax
- movl %eax,%cr0
-
- /* Substitute Protection & Page Fault handlers: */
- movl _C_LABEL(idt),%edx
- pushl 13*8(%edx)
- pushl 13*8+4(%edx)
- pushl 14*8(%edx)
- pushl 14*8+4(%edx)
- movl $fault,%eax
- movw %ax,13*8(%edx)
- movw %ax,14*8(%edx)
- shrl $16,%eax
- movw %ax,13*8+6(%edx)
- movw %ax,14*8+6(%edx)
-
- pushl %ecx
- ret
-
-ipkdbrestore:
- popl %ecx
-
- /* Restore Protection & Page Fault handlers: */
- movl _C_LABEL(idt),%edx
- popl 14*8+4(%edx)
- popl 14*8(%edx)
- popl 13*8+4(%edx)
- popl 13*8(%edx)
-
- /* Restore write protection: */
- popl %edx
- movl %edx,%cr0
-
- pushl %ecx
- ret
-#endif /* IPKDB */
-
-
-/*
- * If an error is detected during trap, syscall, or interrupt exit, trap() will
- * change %eip to point to one of these labels. We clean up the stack, if
- * necessary, and resume as if we were handling a general protection fault.
- * This will cause the process to get a SIGBUS.
- */
-/* LINTSTUB: Var: char resume_iret[1]; */
-NENTRY(resume_iret)
- ZTRAP(T_PROTFLT)
-/* LINTSTUB: Var: char resume_pop_ds[1]; */
-NENTRY(resume_pop_ds)
- movl %es,TF_ES(%esp)
- movl $GSEL(GDATA_SEL, SEL_KPL),%eax
- movw %ax,%es
-/* LINTSTUB: Var: char resume_pop_es[1]; */
-NENTRY(resume_pop_es)
- movl %fs,TF_FS(%esp)
- movl $GSEL(GDATA_SEL, SEL_KPL),%eax
- movw %ax,%fs
-/* LINTSTUB: Var: char resume_pop_fs[1]; */
-NENTRY(resume_pop_fs)
- movl %gs,TF_GS(%esp)
- movl $GSEL(GDATA_SEL, SEL_KPL),%eax
- movw %ax,%gs
-/* LINTSTUB: Var: char resume_pop_gs[1]; */
-NENTRY(resume_pop_gs)
- movl $T_PROTFLT,TF_TRAPNO(%esp)
- jmp calltrap
-
-#ifdef IPKDB
-/* LINTSTUB: Func: int ipkdbfbyte(u_char *c) */
-NENTRY(ipkdbfbyte)
- pushl %ebp
- movl %esp,%ebp
- call ipkdbsetup
- movl 8(%ebp),%edx
- movzbl (%edx),%eax
-faultexit:
- call ipkdbrestore
- popl %ebp
- ret
-
-/* LINTSTUB: Func: int ipkdbsbyte(u_char *c, int i) */
-NENTRY(ipkdbsbyte)
- pushl %ebp
- movl %esp,%ebp
- call ipkdbsetup
- movl 8(%ebp),%edx
- movl 12(%ebp),%eax
- movb %al,(%edx)
- call ipkdbrestore
- popl %ebp
- ret
-
-fault:
- popl %eax /* error code */
- movl $faultexit,%eax
- movl %eax,(%esp)
- movl $-1,%eax
- iret
-#endif /* IPKDB */
-
-
-
-# A note on the "critical region" in our callback handler.
-# We want to avoid stacking callback handlers due to events occurring
-# during handling of the last event. To do this, we keep events disabled
-# until weve done all processing. HOWEVER, we must enable events before
-# popping the stack frame (cant be done atomically) and so it would still
-# be possible to get enough handler activations to overflow the stack.
-# Although unlikely, bugs of that kind are hard to track down, so wed
-# like to avoid the possibility.
-# So, on entry to the handler we detect whether we interrupted an
-# existing activation in its critical region -- if so, we pop the current
-# activation and restart the handler using the previous one.
-ENTRY(hypervisor_callback)
- pushl $0 # dummy error code
- pushl $T_ASTFLT
- INTRENTRY
- movl TF_EIP(%esp),%eax
- cmpl $scrit,%eax
- jb 11f
- cmpl $ecrit,%eax
- jb critical_region_fixup
-11: pushl CPUVAR(ILEVEL)
- push %esp
- call do_hypervisor_callback
- add $8,%esp
- movl HYPERVISOR_shared_info,%esi
- xorl %eax,%eax
- movb TF_CS(%esp),%cl
- test $CHK_UPL,%cl # slow return to ring 2 or 3
- je safesti
- movl CPUVAR(ILEVEL),%ebx
- jmp doreti_checkast
-safesti:XEN_UNBLOCK_EVENTS(%esi) # reenable event callbacks
-scrit: /**** START OF CRITICAL REGION ****/
- testb $1,evtchn_upcall_pending(%esi)
- jnz 14f # process more events if necessary...
- INTRFASTEXIT
-critiret:
-14: XEN_BLOCK_EVENTS(%esi)
- jmp 11b
-ecrit: /**** END OF CRITICAL REGION ****/
-# [How we do the fixup]. We want to merge the current stack frame with the
-# just-interrupted frame. How we do this depends on where in the critical
-# region the interrupted handler was executing, and so how many saved
-# registers are in each frame. We do this quickly using the lookup table
-# 'critical_fixup_table'. For each byte offset in the critical region, it
-# provides the number of bytes which have already been popped from the
-# interrupted stack frame.
-critical_region_fixup:
- cmpl $(critiret-1),%eax # eip points to iret?
- jne 1f
- movl $(TF_PUSHSIZE+0x8),%eax
- jmp 2f
-1: xorl %eax,%eax
-2:
- # %eax contains num bytes popped
- mov %esp,%esi
- add %eax,%esi # %esi points at end of src region
- mov %esp,%edi
- add $(TF_PUSHSIZE+0x8+0xC),%edi # %edi points at end of dst region
- mov %eax,%ecx
- shr $2,%ecx # convert words to bytes
- je 16f # skip loop if nothing to copy
-15: subl $4,%esi # pre-decrementing copy loop
- subl $4,%edi
- movl (%esi),%eax
- movl %eax,(%edi)
- loop 15b
-16: movl %edi,%esp # final %edi is top of merged stack
- jmp 11b
-
-
-# Hypervisor uses this for application faults while it executes.
-ENTRY(failsafe_callback)
- pop %ds
- pop %es
- pop %fs
- pop %gs
- call _C_LABEL(xen_failsafe_handler)
- iret
-
-#ifdef XENDEBUG_LOW
-
-ES = 0x20
-ORIG_EAX = 0x24
-EIP = 0x28
-CS = 0x2C
-
-#define SAVE_ALL \
- cld; \
- pushl %es; \
- pushl %ds; \
- pushl %eax; \
- pushl %ebp; \
- pushl %edi; \
- pushl %esi; \
- pushl %edx; \
- pushl %ecx; \
- pushl %ebx; \
- movl $GSEL(GDATA_SEL, SEL_KPL),%edx; \
- movl %edx,%ds; \
- movl %edx,%es;
-
-#define RESTORE_ALL \
- popl %ebx; \
- popl %ecx; \
- popl %edx; \
- popl %esi; \
- popl %edi; \
- popl %ebp; \
- popl %eax; \
- popl %ds; \
- popl %es; \
- addl $4,%esp; \
- iret; \
-
-ret_from_exception:
- movb CS(%esp),%cl
- test $2,%cl # slow return to ring 2 or 3
- jne safesti
- RESTORE_ALL
-
-
-ENTRY(divide_error)
- pushl $0 # no error code
- pushl $do_divide_error
-do_exception:
- pushl %ds
- pushl %eax
- xorl %eax,%eax
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- decl %eax # eax = -1
- pushl %ecx
- pushl %ebx
- cld
- movl %es,%ecx
- movl ORIG_EAX(%esp), %esi # get the error code
- movl ES(%esp), %edi # get the function address
- movl %eax, ORIG_EAX(%esp)
- movl %ecx, ES(%esp)
- movl %esp,%edx
- pushl %esi # push the error code
- pushl %edx # push the pt_regs pointer
- movl $(__KERNEL_DS),%edx
- movl %edx,%ds
- movl %edx,%es
- call *%edi
- addl $8,%esp
- jmp ret_from_exception
-
-ENTRY(coprocessor_error)
- pushl $0
- pushl $do_coprocessor_error
- jmp do_exception
-
-ENTRY(simd_coprocessor_error)
- pushl $0
- pushl $do_simd_coprocessor_error
- jmp do_exception
-
-ENTRY(device_not_available)
- iret
-
-ENTRY(debug)
- pushl $0
- pushl $do_debug
- jmp do_exception
-
-ENTRY(int3)
- pushl $0
- pushl $do_int3
- jmp do_exception
-
-ENTRY(overflow)
- pushl $0
- pushl $do_overflow
- jmp do_exception
-
-ENTRY(bounds)
- pushl $0
- pushl $do_bounds
- jmp do_exception
-
-ENTRY(invalid_op)
- pushl $0
- pushl $do_invalid_op
- jmp do_exception
-
-ENTRY(coprocessor_segment_overrun)
- pushl $0
- pushl $do_coprocessor_segment_overrun
- jmp do_exception
-
-ENTRY(double_fault)
- pushl $do_double_fault
- jmp do_exception
-
-ENTRY(invalid_TSS)
- pushl $do_invalid_TSS
- jmp do_exception
-
-ENTRY(segment_not_present)
- pushl $do_segment_not_present
- jmp do_exception
-
-ENTRY(stack_segment)
- pushl $do_stack_segment
- jmp do_exception
-
-ENTRY(general_protection)
- pushl $do_general_protection
- jmp do_exception
-
-ENTRY(alignment_check)
- pushl $do_alignment_check
- jmp do_exception
-
-# This handler is special, because it gets an extra value on its stack,
-# which is the linear faulting address.
-ENTRY(page_fault)
- pushl %ds
- pushl %eax
- xorl %eax,%eax
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- decl %eax # eax = -1
- pushl %ecx
- pushl %ebx
- cld
- movl %es,%ecx
- movl ORIG_EAX(%esp), %esi # get the error code
- movl ES(%esp), %edi # get the faulting address
- movl %eax, ORIG_EAX(%esp)
- movl %ecx, ES(%esp)
- movl %esp,%edx
- pushl %edi # push the faulting address
- pushl %esi # push the error code
- pushl %edx # push the pt_regs pointer
- movl $(__KERNEL_DS),%edx
- movl %edx,%ds
- movl %edx,%es
- call do_page_fault
- addl $12,%esp
- jmp ret_from_exception
-
-ENTRY(machine_check)
- pushl $0
- pushl $do_machine_check
- jmp do_exception
-
-ENTRY(spurious_interrupt_bug)
- pushl $0
- pushl $do_spurious_interrupt_bug
- jmp do_exception
-#endif
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/i386/xen_machdep.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/xen_machdep.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,680 +0,0 @@
-/* $NetBSD: xen_machdep.c,v 1.1.2.1 2004/05/22 15:57:33 he Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xen_machdep.c,v 1.1.2.1 2004/05/22 15:57:33 he Exp
$");
-
-#include "opt_xen.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/mount.h>
-
-#include <uvm/uvm.h>
-
-#include <machine/gdt.h>
-#include <machine/xenfunc.h>
-#include <machine/xenpmap.h>
-
-/* #define XENDEBUG */
-/* #define XENDEBUG_LOW */
-
-#ifdef XENDEBUG
-#define XENPRINTF(x) printf x
-#define XENPRINTK(x) printk x
-#define XENPRINTK2(x) /* printk x */
-
-static char XBUF[256];
-#else
-#define XENPRINTF(x)
-#define XENPRINTK(x)
-#define XENPRINTK2(x)
-#endif
-void printk(char *, ...);
-#define PRINTF(x) printf x
-#define PRINTK(x) printk x
-
-shared_info_t *HYPERVISOR_shared_info;
-union start_info_union start_info_union;
-
-void xen_failsafe_handler(void);
-
-void
-xen_failsafe_handler(void)
-{
-
- panic("xen_failsafe_handler called!\n");
-}
-
-
-void
-xen_update_descriptor(union descriptor *table, union descriptor *entry)
-{
- paddr_t pa;
- pt_entry_t *ptp;
-
- ptp = kvtopte((vaddr_t)table);
- pa = (*ptp & PG_FRAME) | ((vaddr_t)table & ~PG_FRAME);
- if (HYPERVISOR_update_descriptor(pa, entry->raw[0], entry->raw[1]))
- panic("HYPERVISOR_update_descriptor failed\n");
-}
-
-void
-xen_set_ldt(vaddr_t base, uint32_t entries)
-{
- vaddr_t va;
- pt_entry_t *ptp, *maptp;
-
- for (va = base; va < base + entries * sizeof(union descriptor);
- va += PAGE_SIZE) {
- KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
- ptp = kvtopte(va);
- maptp = (pt_entry_t *)vtomach((vaddr_t)ptp);
- XENPRINTF(("xen_set_ldt %p %d %p %p\n", (void *)base,
- entries, ptp, maptp));
- PTE_CLEARBITS(ptp, maptp, PG_RW);
- }
- PTE_UPDATES_FLUSH();
-
- xpq_queue_set_ldt(base, entries);
- xpq_flush_queue();
-}
-
-void
-lgdt(struct region_descriptor *rdp)
-{
-
- panic("lgdt %p %08x\n", (void *)rdp->rd_base, rdp->rd_limit);
-}
-
-void
-xen_parse_cmdline(int what, union xen_cmdline_parseinfo *xcp)
-{
- char *cmd_line, *opt, *s;
- int b, i, ipidx = 0;
- uint32_t xi_ip[5];
-
- cmd_line = xen_start_info.cmd_line;
-
- switch (what) {
- case XEN_PARSE_BOOTDEV:
- xcp->xcp_bootdev[0] = 0;
- break;
- case XEN_PARSE_CONSOLE:
- xcp->xcp_console[0] = 0;
- break;
- }
-
- while (cmd_line && *cmd_line) {
- opt = cmd_line;
- cmd_line = strchr(opt, ' ');
- if (cmd_line)
- *cmd_line = 0;
-
- switch (what) {
- case XEN_PARSE_BOOTDEV:
- if (strncasecmp(opt, "bootdev=", 8) == 0)
- strncpy(xcp->xcp_bootdev, opt + 8,
- sizeof(xcp->xcp_console));
- break;
-
- case XEN_PARSE_NETINFO:
- if (xcp->xcp_netinfo.xi_root &&
- strncasecmp(opt, "nfsroot=", 8) == 0)
- strncpy(xcp->xcp_netinfo.xi_root, opt + 8,
- MNAMELEN);
-
- if (strncasecmp(opt, "ip=", 3) == 0) {
- memset(xi_ip, 0, sizeof(xi_ip));
- opt += 3;
- ipidx = 0;
- while (opt && *opt) {
- s = opt;
- opt = strchr(opt, ':');
- if (opt)
- *opt = 0;
-
- switch (ipidx) {
- case 0: /* ip */
- case 1: /* nfs server */
- case 2: /* gw */
- case 3: /* mask */
- case 4: /* host */
- if (*s == 0)
- break;
- for (i = 0; i < 4; i++) {
- b = strtoul(s, &s, 10);
- xi_ip[ipidx] = b + 256
- * xi_ip[ipidx];
- if (*s != '.')
- break;
- s++;
- }
- if (i < 3)
- xi_ip[ipidx] = 0;
- break;
- case 5: /* interface */
- if (!strncmp(s, "xennet", 6))
- s += 6;
- else if (!strncmp(s, "eth", 3))
- s += 3;
- else
- break;
- if (xcp->xcp_netinfo.xi_ifno
- == strtoul(s, NULL, 10))
- memcpy(xcp->
- xcp_netinfo.xi_ip,
- xi_ip,
- sizeof(xi_ip));
- break;
- }
- ipidx++;
-
- if (opt)
- *opt++ = ':';
- }
- }
- break;
-
- case XEN_PARSE_CONSOLE:
- if (strncasecmp(opt, "console=", 8) == 0)
- strncpy(xcp->xcp_console, opt + 8,
- sizeof(xcp->xcp_console));
- break;
-
- }
-
- if (cmd_line)
- *cmd_line++ = ' ';
- }
-}
-
-
-
-
-
-#define XEN_PAGE_OFFSET 0xC0100000
-
-static pd_entry_t
-xpmap_get_bootpde(paddr_t va)
-{
-
- return ((pd_entry_t *)xen_start_info.pt_base)[va >> PDSHIFT];
-}
-
-static pd_entry_t
-xpmap_get_vbootpde(paddr_t va)
-{
- pd_entry_t pde;
-
- pde = xpmap_get_bootpde(va);
- if ((pde & PG_V) == 0)
- return (pde & ~PG_FRAME);
- return (pde & ~PG_FRAME) |
- (xpmap_mtop(pde & PG_FRAME) + KERNBASE);
-}
-
-static pt_entry_t *
-xpmap_get_bootptep(paddr_t va)
-{
- pd_entry_t pde;
-
- pde = xpmap_get_vbootpde(va);
- if ((pde & PG_V) == 0)
- return (void *)-1;
- return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >>
PAGE_SHIFT]);
-}
-
-static pt_entry_t
-xpmap_get_bootpte(paddr_t va)
-{
-
- return xpmap_get_bootptep(va)[0];
-}
-
-#if defined(XENDEBUG)
-static void
-xpmap_dump_pt(pt_entry_t *ptp, int p)
-{
- pt_entry_t pte;
- int j;
- int bufpos;
-
- pte = xpmap_ptom((uint32_t)ptp - KERNBASE);
- PRINTK(("%03x: %p(%p) %08x\n", p, ptp, (void *)pte, p << PDSHIFT));
-
- bufpos = 0;
- for (j = 0; j < PTES_PER_PTP; j++) {
- if ((ptp[j] & PG_V) == 0)
- continue;
- pte = ptp[j] /* & PG_FRAME */;
- bufpos += sprintf(XBUF + bufpos, "%x:%03x:%08x ",
- p, j, pte);
- if (bufpos > 70) {
- int k;
- sprintf(XBUF + bufpos, "\n");
- PRINTK((XBUF));
- bufpos = 0;
- for (k = 0; k < 1000000; k++);
- }
- }
- if (bufpos) {
- PRINTK((XBUF));
- PRINTK(("\n"));
- bufpos = 0;
- }
-}
-#endif
-
-void
-xpmap_init(void)
-{
- pd_entry_t *xen_pdp;
- pt_entry_t *ptp, *sysptp;
- pt_entry_t pte;
- uint32_t i, j;
- int bufpos;
-#if defined(XENDEBUG_LOW)
- extern char kernel_text, _etext, __bss_start, end, *esym;
-#endif
-
- xpmap_phys_to_machine_mapping = (void *)xen_start_info.mfn_list;
-
- xen_pdp = (pd_entry_t *)xen_start_info.pt_base;
-
- XENPRINTK(("text %p data %p bss %p end %p esym %p\n", &kernel_text,
- &_etext, &__bss_start, &end, esym));
- XENPRINTK(("xpmap_init PTD %p nkpde %d upages %d xen_PTD %p p2m-map
%p\n",
- (void *)PTDpaddr, nkpde, UPAGES, xen_pdp,
- xpmap_phys_to_machine_mapping));
-
- bufpos = 0;
-
- XENPRINTK(("shared_inf %08x\n", (paddr_t)xen_start_info.shared_info));
- XENPRINTK(("c0100000: %08x\n",
- xpmap_get_bootpte(0xc0100000)));
-
- /* Map kernel. */
-
- /* Map kernel data/bss/tables. */
-
- /* Map ISA I/O memory. */
-
- /* Map kernel PDEs. */
-
- /* Install a PDE recursively mapping page directory as a page table! */
-
- sysptp = (pt_entry_t *)(PTDpaddr + ((1 + UPAGES) << PAGE_SHIFT));
-
- /* make xen's PDE and PTE pages read-only in our pagetable */
- for (i = 0; i < xen_start_info.nr_pt_frames; i++) {
- /* mark PTE page read-only in our table */
- sysptp[((xen_start_info.pt_base +
- (i << PAGE_SHIFT) - KERNBASE_LOCORE) &
- (PD_MASK | PT_MASK)) >> PAGE_SHIFT] &= ~PG_RW;
- }
-
- xpq_flush_queue();
-
- for (i = 0; i < 1 + UPAGES + nkpde; i++) {
- /* mark PTE page read-only in xen's table */
- ptp = xpmap_get_bootptep(PTDpaddr + (i << PAGE_SHIFT));
- xpq_queue_pte_update(
- (void *)xpmap_ptom((unsigned long)ptp - KERNBASE), *ptp &
~PG_RW);
- XENPRINTK(("%03x: %p(%p) -> %08x\n", i, ptp,
- (unsigned long)ptp - KERNTEXTOFF, *ptp));
-
- /* mark PTE page read-only in our table */
- sysptp[((PTDpaddr + (i << PAGE_SHIFT) - KERNBASE_LOCORE) &
- (PD_MASK | PT_MASK)) >> PAGE_SHIFT] &= ~PG_RW;
-
- /* update our pte's */
- ptp = (pt_entry_t *)(PTDpaddr + (i << PAGE_SHIFT));
-#if 0
- pte = xpmap_ptom((uint32_t)ptp - KERNBASE);
- XENPRINTK(("%03x: %p(%p) %08x\n", i, ptp, pte, i << PDSHIFT));
-#endif
- for (j = 0; j < PTES_PER_PTP; j++) {
- if ((ptp[j] & PG_V) == 0)
- continue;
- if (ptp[j] == 0xffffffff)
- ptp[j] = xen_start_info.shared_info |
- (PG_V|PG_RW);
- if (ptp[j] >= KERNTEXTOFF) {
- pte = ptp[j];
- ptp[j] = (pte & ~PG_FRAME) |
- (xpmap_get_bootpte(pte & PG_FRAME) &
- PG_FRAME);
- }
-#if defined(XENDEBUG) && 0
- pte = ptp[j] /* & PG_FRAME */;
- bufpos += sprintf(XBUF + bufpos, "%x:%03x:%08x ",
- i, j, pte);
- if (bufpos > 70) {
- int k;
- sprintf(XBUF + bufpos, "\n");
- XENPRINTK((XBUF));
- bufpos = 0;
- for (k = 0; k < 1000000; k++);
- }
- }
- if (bufpos) {
- XENPRINTK((XBUF));
- bufpos = 0;
-#endif
- }
- if (i == 0)
- i = 1 + UPAGES - 1;
- }
-
-#if 0
- for (i = 0x300; i < 0x305; i++)
- if (((pt_entry_t *)xen_start_info.pt_base)[i] & PG_V)
- xpmap_dump_pt((pt_entry_t *)
- (xpmap_mtop(((pt_entry_t
*)xen_start_info.pt_base)[i] &
- PG_FRAME) + KERNBASE), i);
- xpmap_dump_pt((pt_entry_t *)xen_start_info.pt_base, 0);
-#endif
-
- XENPRINTK(("switching pdp: %p, %08lx, %p, %p, %p\n", (void *)PTDpaddr,
- PTDpaddr - KERNBASE,
- (void *)xpmap_ptom(PTDpaddr - KERNBASE),
- (void *)xpmap_get_bootpte(PTDpaddr),
- (void *)xpmap_mtop(xpmap_ptom(PTDpaddr - KERNBASE))));
-
-#if defined(XENDEBUG)
- xpmap_dump_pt((pt_entry_t *)PTDpaddr, 0);
-#endif
-
- xpq_flush_queue();
-
- xpq_queue_pin_table(xpmap_get_bootpte(PTDpaddr) & PG_FRAME,
- XPQ_PIN_L2_TABLE);
- xpq_queue_pt_switch(xpmap_get_bootpte(PTDpaddr) & PG_FRAME);
- xpq_queue_unpin_table(
- xpmap_get_bootpte(xen_start_info.pt_base) & PG_FRAME);
-
- /* make xen's PDE and PTE pages writable in our pagetable */
- for (i = 0; i < xen_start_info.nr_pt_frames; i++) {
- /* mark PTE page writable in our table */
- ptp = &sysptp[((xen_start_info.pt_base +
- (i << PAGE_SHIFT) - KERNBASE_LOCORE) &
- (PD_MASK | PT_MASK)) >> PAGE_SHIFT];
- xpq_queue_pte_update(
- (void *)xpmap_ptom((unsigned long)ptp - KERNBASE), *ptp |
- PG_RW);
- }
-
- xpq_flush_queue();
- XENPRINTK(("pt_switch done!\n"));
-}
-
-/*
- * Do a binary search to find out where physical memory ends on the
- * real hardware. Xen will fail our updates if they are beyond the
- * last available page (max_page in xen/common/memory.c).
- */
-paddr_t
-find_pmap_mem_end(vaddr_t va)
-{
- mmu_update_t r;
- int start, end, ok;
- pt_entry_t old;
-
- start = xen_start_info.nr_pages;
- end = HYPERVISOR_VIRT_START >> PAGE_SHIFT;
-
- r.ptr = (unsigned long)&PTE_BASE[x86_btop(va)];
- old = PTE_BASE[x86_btop(va)];
-
- while (start + 1 < end) {
- r.val = (((start + end) / 2) << PAGE_SHIFT) | PG_V;
-
- if (HYPERVISOR_mmu_update(&r, 1, &ok) < 0)
- end = (start + end) / 2;
- else
- start = (start + end) / 2;
- }
- r.val = old;
- if (HYPERVISOR_mmu_update(&r, 1, &ok) < 0)
- printf("pmap_mem_end find: old update failed %08x\n",
- old);
-
- return end << PAGE_SHIFT;
-}
-
-
-#if 0
-void xpmap_find_memory(paddr_t);
-void
-xpmap_find_memory(paddr_t first_avail)
-{
- char buf[256];
- uint32_t i;
- int bufpos;
- paddr_t p;
-
- bufpos = 0;
- for (i = ((first_avail - KERNTEXTOFF) >> PAGE_SHIFT);
- i < xen_start_info.nr_pages; i++) {
- /* if (xpmap_phys_to_machine_mapping[i] */
- bufpos += sprintf(buf + bufpos, "%03x:%08x:%08x ",
- i, (uint32_t)xpmap_phys_to_machine_mapping[i],
- (uint32_t)xpmap_mtop(xpmap_phys_to_machine_mapping[i] <<
- PAGE_SHIFT));
- p = xpmap_phys_to_machine_mapping[i];
- uvm_page_physload(p, p + 1, p, p + 1, VM_FREELIST_DEFAULT);
-
- if (bufpos > 70) {
- int k;
- sprintf(buf + bufpos, "\n");
- XENPRINTK((buf));
- bufpos = 0;
- for (k = 0; k < 1000000; k++);
- }
- }
- if (bufpos) {
- XENPRINTK((buf));
- bufpos = 0;
- }
-}
-#endif
-
-
-#ifdef XENDEBUG
-void xpq_debug_dump(void);
-#endif
-
-#define XPQUEUE_SIZE 2048
-typedef union xpq_queue {
- struct {
- pd_entry_t *ptr;
- pd_entry_t val;
- } pde;
- struct {
- pt_entry_t *ptr;
- pt_entry_t val;
- } pte;
- struct {
- paddr_t ptr;
- uint32_t val;
- } pa;
-} xpq_queue_t;
-static xpq_queue_t xpq_queue[XPQUEUE_SIZE];
-static int xpq_idx = 0;
-
-void
-xpq_flush_queue()
-{
- int i, ok;
-
- XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
- for (i = 0; i < xpq_idx; i++)
- XENPRINTK2(("%d: %p %08x\n", i, xpq_queue[i].pde.ptr,
- xpq_queue[i].pde.val));
- if (xpq_idx != 0 &&
- HYPERVISOR_mmu_update((mmu_update_t *)xpq_queue, xpq_idx, &ok) < 0)
- panic("HYPERVISOR_mmu_update failed\n");
- xpq_idx = 0;
-}
-
-static inline void
-xpq_increment_idx(void)
-{
-
- xpq_idx++;
- if (__predict_false(xpq_idx == XPQUEUE_SIZE))
- xpq_flush_queue();
-}
-
-void
-xpq_queue_invlpg(vaddr_t va)
-{
-
- XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
- xpq_queue[xpq_idx].pa.ptr = (va & PG_FRAME) | MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].pa.val = MMUEXT_INVLPG;
- xpq_increment_idx();
-}
-
-void
-xpq_queue_pde_update(pd_entry_t *ptr, pd_entry_t val)
-{
-
- xpq_queue[xpq_idx].pde.ptr = ptr;
- xpq_queue[xpq_idx].pde.val = val;
- xpq_increment_idx();
-}
-
-void
-xpq_queue_pte_update(pt_entry_t *ptr, pt_entry_t val)
-{
-
- xpq_queue[xpq_idx].pte.ptr = ptr;
- xpq_queue[xpq_idx].pte.val = val;
- xpq_increment_idx();
-}
-
-void
-xpq_queue_unchecked_pte_update(pt_entry_t *ptr, pt_entry_t val)
-{
-
- xpq_queue[xpq_idx].pa.ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
- /* XXXcl UNCHECKED_PT_UPDATE */
- xpq_queue[xpq_idx].pa.val = val;
- xpq_increment_idx();
-}
-
-void
-xpq_queue_pt_switch(paddr_t pa)
-{
-
- XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
- xpq_queue[xpq_idx].pa.ptr = pa | MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].pa.val = MMUEXT_NEW_BASEPTR;
- xpq_increment_idx();
-}
-
-void
-xpq_queue_pin_table(paddr_t pa, int type)
-{
-
- XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
- xpq_queue[xpq_idx].pa.ptr = pa | MMU_EXTENDED_COMMAND;
- switch (type) {
- case XPQ_PIN_L1_TABLE:
- xpq_queue[xpq_idx].pa.val = MMUEXT_PIN_L1_TABLE;
- break;
- case XPQ_PIN_L2_TABLE:
- xpq_queue[xpq_idx].pa.val = MMUEXT_PIN_L2_TABLE;
- break;
- }
- xpq_increment_idx();
-}
-
-void
-xpq_queue_unpin_table(paddr_t pa)
-{
-
- XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
- xpq_queue[xpq_idx].pa.ptr = pa | MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].pa.val = MMUEXT_UNPIN_TABLE;
- xpq_increment_idx();
-}
-
-void
-xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
-{
-
- XENPRINTK2(("xpq_queue_set_ldt\n"));
- KASSERT(va == (va & PG_FRAME));
- xpq_queue[xpq_idx].pa.ptr = MMU_EXTENDED_COMMAND | va;
- xpq_queue[xpq_idx].pa.val = MMUEXT_SET_LDT |
- (entries << MMUEXT_CMD_SHIFT);
- xpq_increment_idx();
-}
-
-void
-xpq_queue_tlb_flush()
-{
-
- XENPRINTK2(("xpq_queue_tlb_flush\n"));
- xpq_queue[xpq_idx].pa.ptr = MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].pa.val = MMUEXT_TLB_FLUSH;
- xpq_increment_idx();
-}
-
-#ifdef XENDEBUG
-void
-xpq_debug_dump()
-{
- int i;
-
- XENPRINTK2(("idx: %d\n", xpq_idx));
- for (i = 0; i < xpq_idx; i++) {
- sprintf(XBUF, "%p %08x ", xpq_queue[i].pte.ptr,
- xpq_queue[i].pte.val);
- if (++i < xpq_idx)
- sprintf(XBUF + strlen(XBUF), "%p %08x ",
- xpq_queue[i].pte.ptr, xpq_queue[i].pte.val);
- if (++i < xpq_idx)
- sprintf(XBUF + strlen(XBUF), "%p %08x ",
- xpq_queue[i].pte.ptr, xpq_queue[i].pte.val);
- if (++i < xpq_idx)
- sprintf(XBUF + strlen(XBUF), "%p %08x ",
- xpq_queue[i].pte.ptr, xpq_queue[i].pte.val);
- XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
- }
-}
-#endif
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/include/ctrl_if.h
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/ctrl_if.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,139 +0,0 @@
-/******************************************************************************
- * ctrl_if.h
- *
- * Management functions for special interface to the domain controller.
- *
- * Copyright (c) 2004, K A Fraser
- */
-
-#ifndef __ASM_XEN__CTRL_IF_H__
-#define __ASM_XEN__CTRL_IF_H__
-
-typedef control_msg_t ctrl_msg_t;
-
-/*
- * Callback function type. Called for asynchronous processing of received
- * request messages, and responses to previously-transmitted request messages.
- * The parameters are (@msg, @id).
- * @msg: Original request/response message (not a copy). The message can be
- * modified in-place by the handler (e.g., a response callback can
- * turn a request message into a response message in place). The message
- * is no longer accessible after the callback handler returns -- if the
- * message is required to persist for longer then it must be copied.
- * @id: (Response callbacks only) The 'id' that was specified when the
- * original request message was queued for transmission.
- */
-typedef void (*ctrl_msg_handler_t)(ctrl_msg_t *, unsigned long);
-
-/*
- * Send @msg to the domain controller. Execute @hnd when a response is
- * received, passing the response message and the specified @id. This
- * operation will not block: it will return -EAGAIN if there is no space.
- * Notes:
- * 1. The @msg is copied if it is transmitted and so can be freed after this
- * function returns.
- * 2. If @hnd is NULL then no callback is executed.
- */
-int
-ctrl_if_send_message_noblock(
- ctrl_msg_t *msg,
- ctrl_msg_handler_t hnd,
- unsigned long id);
-
-/*
- * Send @msg to the domain controller. Execute @hnd when a response is
- * received, passing the response message and the specified @id. This
- * operation will block until the message is sent, or a signal is received
- * for the calling process (unless @wait_state is TASK_UNINTERRUPTIBLE).
- * Notes:
- * 1. The @msg is copied if it is transmitted and so can be freed after this
- * function returns.
- * 2. If @hnd is NULL then no callback is executed.
- */
-int
-ctrl_if_send_message_block(
- ctrl_msg_t *msg,
- ctrl_msg_handler_t hnd,
- unsigned long id,
- long wait_state);
-
-/*
- * Send @msg to the domain controller. Block until the response is received,
- * and then copy it into the provided buffer, @rmsg.
- */
-int
-ctrl_if_send_message_and_get_response(
- ctrl_msg_t *msg,
- ctrl_msg_t *rmsg,
- long wait_state);
-
-#ifdef notyet
-/*
- * Request a callback when there is /possibly/ space to immediately send a
- * message to the domain controller. This function returns 0 if there is
- * already space to trasnmit a message --- in this case the callback task /may/
- * still be executed. If this function returns 1 then the callback /will/ be
- * executed when space becomes available.
- */
-int
-ctrl_if_enqueue_space_callback(
- struct tq_struct *task);
-#endif
-
-/*
- * Send a response (@msg) to a message from the domain controller. This will
- * never block.
- * Notes:
- * 1. The @msg is copied and so can be freed after this function returns.
- * 2. The @msg may be the original request message, modified in-place.
- */
-void
-ctrl_if_send_response(
- ctrl_msg_t *msg);
-
-/*
- * Register a receiver for typed messages from the domain controller. The
- * handler (@hnd) is called for every received message of specified @type.
- * Returns TRUE (non-zero) if the handler was successfully registered.
- * If CALLBACK_IN_BLOCKING CONTEXT is specified in @flags then callbacks will
- * occur in a context in which it is safe to yield (i.e., process context).
- */
-#define CALLBACK_IN_BLOCKING_CONTEXT 1
-int ctrl_if_register_receiver(
- uint8_t type,
- ctrl_msg_handler_t hnd,
- unsigned int flags);
-
-/*
- * Unregister a receiver for typed messages from the domain controller. The
- * handler (@hnd) will not be executed after this function returns.
- */
-void
-ctrl_if_unregister_receiver(
- uint8_t type, ctrl_msg_handler_t hnd);
-
-/* Suspend/resume notifications. */
-void ctrl_if_suspend(void);
-void ctrl_if_resume(void);
-
-/* Start-of-day setup. */
-void ctrl_if_early_init(void);
-void ctrl_if_init(void);
-
-/*
- * Returns TRUE if there are no outstanding message requests at the domain
- * controller. This can be used to ensure that messages have really flushed
- * through when it is not possible to use the response-callback interface.
- * WARNING: If other subsystems are using the control interface then this
- * function might never return TRUE!
- */
-int ctrl_if_transmitter_empty(void); /* !! DANGEROUS FUNCTION !! */
-
-/*
- * Manually discard response messages from the domain controller.
- * WARNING: This is usually done automatically -- this function should only
- * be called when normal interrupt mechanisms are disabled!
- */
-void ctrl_if_discard_responses(void); /* !! DANGEROUS FUNCTION !! */
-
-#endif /* __ASM_XEN__CONTROL_IF_H__ */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/include/evtchn.h
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/evtchn.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,53 +0,0 @@
-/* $NetBSD$ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _XEN_EVENTS_H_
-#define _XEN_EVENTS_H_
-
-#define NR_IRQS 32
-
-extern int evtchn_to_irq[];
-
-/* typedef unsigned int (*ev_handler_t)(int, struct pt_regs *); */
-typedef int (*ev_handler_t)(void *);
-
-void events_default_setup(void);
-void init_events(void);
-unsigned int do_event(int, struct intrframe *);
-int event_set_handler(int, ev_handler_t, void *, int);
-
-int bind_virq_to_irq(int);
-void unbind_virq_from_irq(int);
-int bind_evtchn_to_irq(int);
-
-#endif /* _XEN_EVENTS_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/include/frameasm.h
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/frameasm.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,130 +0,0 @@
-/* $NetBSD: frameasm.h,v 1.1 2004/03/11 21:44:08 cl Exp $ */
-/* NetBSD: frameasm.h,v 1.4 2004/02/20 17:35:01 yamt Exp */
-
-#ifndef _I386_FRAMEASM_H_
-#define _I386_FRAMEASM_H_
-
-#ifdef _KERNEL_OPT
-#include "opt_multiprocessor.h"
-#endif
-
-/* XXX assym.h */
-#define TRAP_INSTR int $0x82
-#define __HYPERVISOR_stack_switch 4
-#define __HYPERVISOR_fpu_taskswitch 7
-
-#ifndef TRAPLOG
-#define TLOG /**/
-#else
-/*
- * Fill in trap record
- */
-#define TLOG \
-9: \
- movl %fs:CPU_TLOG_OFFSET, %eax; \
- movl %fs:CPU_TLOG_BASE, %ebx; \
- addl $SIZEOF_TREC,%eax; \
- andl $SIZEOF_TLOG-1,%eax; \
- addl %eax,%ebx; \
- movl %eax,%fs:CPU_TLOG_OFFSET; \
- movl %esp,TREC_SP(%ebx); \
- movl $9b,TREC_HPC(%ebx); \
- movl TF_EIP(%esp),%eax; \
- movl %eax,TREC_IPC(%ebx); \
- rdtsc ; \
- movl %eax,TREC_TSC(%ebx); \
- movl $MSR_LASTBRANCHFROMIP,%ecx; \
- rdmsr ; \
- movl %eax,TREC_LBF(%ebx); \
- incl %ecx ; \
- rdmsr ; \
- movl %eax,TREC_LBT(%ebx); \
- incl %ecx ; \
- rdmsr ; \
- movl %eax,TREC_IBF(%ebx); \
- incl %ecx ; \
- rdmsr ; \
- movl %eax,TREC_IBT(%ebx)
-#endif
-
-/*
- * These are used on interrupt or trap entry or exit.
- */
-#define INTRENTRY \
- cld; \
- subl $TF_PUSHSIZE,%esp ; \
- movl %gs,TF_GS(%esp) ; \
- movl %fs,TF_FS(%esp) ; \
- movl %eax,TF_EAX(%esp) ; \
- movl %es,TF_ES(%esp) ; \
- movl %ds,TF_DS(%esp) ; \
- movl $GSEL(GDATA_SEL, SEL_KPL),%eax ; \
- movl %edi,TF_EDI(%esp) ; \
- movl %esi,TF_ESI(%esp) ; \
- movl %eax,%ds ; \
- movl %ebp,TF_EBP(%esp) ; \
- movl %eax,%es ; \
- movl %ebx,TF_EBX(%esp) ; \
- movl %eax,%gs ; \
- movl %edx,TF_EDX(%esp) ; \
- movl $GSEL(GCPU_SEL, SEL_KPL),%eax ; \
- movl %ecx,TF_ECX(%esp) ; \
- movl %eax,%fs ; \
- TLOG
-
-#define INTRFASTEXIT \
- movl TF_GS(%esp),%gs ; \
- movl TF_FS(%esp),%fs ; \
- movl TF_ES(%esp),%es ; \
- movl TF_DS(%esp),%ds ; \
- movl TF_EDI(%esp),%edi ; \
- movl TF_ESI(%esp),%esi ; \
- movl TF_EBP(%esp),%ebp ; \
- movl TF_EBX(%esp),%ebx ; \
- movl TF_EDX(%esp),%edx ; \
- movl TF_ECX(%esp),%ecx ; \
- movl TF_EAX(%esp),%eax ; \
- addl $(TF_PUSHSIZE+8),%esp ; \
- iret
-
-#define DO_DEFERRED_SWITCH(reg) \
- cmpl $0, CPUVAR(WANT_PMAPLOAD) ; \
- jz 1f ; \
- call _C_LABEL(pmap_load) ; \
- 1:
-
-#define CHECK_DEFERRED_SWITCH(reg) \
- cmpl $0, CPUVAR(WANT_PMAPLOAD)
-
-#define CHECK_ASTPENDING(reg) movl CPUVAR(CURLWP),reg ; \
- cmpl $0, reg ; \
- je 1f ; \
- movl L_PROC(reg),reg ; \
- cmpl $0, P_MD_ASTPENDING(reg); \
- 1:
-#define CLEAR_ASTPENDING(reg) movl $0, P_MD_ASTPENDING(reg)
-
-#if !defined(XEN)
-#define CLI(reg) cli
-#define STI(reg) sti
-#else
-/* XXX assym.h */
-#define EVENTS_MASK 136
-/* Offsets into shared_info_t. */
-#define evtchn_upcall_pending /* 0 */
-#define evtchn_upcall_mask 1
-
-#define XEN_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
-#define XEN_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
-#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(%reg)
-
-#define CLI(reg) movl _C_LABEL(HYPERVISOR_shared_info),reg ; \
- XEN_BLOCK_EVENTS(reg)
-#define STI(reg) movl _C_LABEL(HYPERVISOR_shared_info),reg ; \
- XEN_UNBLOCK_EVENTS(reg)
-#define STIC(reg) movl _C_LABEL(HYPERVISOR_shared_info),reg ; \
- XEN_UNBLOCK_EVENTS(reg) ; \
- testb $1,evtchn_upcall_pending(reg)
-#endif
-
-#endif /* _I386_FRAMEASM_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,517 +0,0 @@
-/* $NetBSD: hypervisor.h,v 1.1.2.2 2004/06/17 09:23:19 tron Exp $ */
-
-/*
- *
- * Communication to/from hypervisor.
- *
- * Copyright (c) 2002-2004, K A Fraser
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-
-#ifndef _XEN_HYPERVISOR_H_
-#define _XEN_HYPERVISOR_H_
-
-
-struct hypervisor_attach_args {
- const char *haa_busname;
-};
-
-struct xencons_attach_args {
- const char *xa_device;
-};
-
-struct xen_npx_attach_args {
- const char *xa_device;
-};
-
-
-#define u8 uint8_t
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
-#define s8 int8_t
-#define s16 int16_t
-#define s32 int32_t
-#define s64 int64_t
-
-/* include the hypervisor interface */
-#include <sys/systm.h>
-#include <machine/xen-public/xen.h>
-#include <machine/xen-public/dom0_ops.h>
-#include <machine/xen-public/event_channel.h>
-#include <machine/xen-public/io/domain_controller.h>
-#include <machine/xen-public/io/netif.h>
-#include <machine/xen-public/io/blkif.h>
-
-#undef u8
-#undef u16
-#undef u32
-#undef u64
-#undef s8
-#undef s16
-#undef s32
-#undef s64
-
-
-/*
- * a placeholder for the start of day information passed up from the hypervisor
- */
-union start_info_union
-{
- start_info_t start_info;
- char padding[512];
-};
-extern union start_info_union start_info_union;
-#define xen_start_info (start_info_union.start_info)
-
-
-/* hypervisor.c */
-void do_hypervisor_callback(struct intrframe *regs);
-void hypervisor_notify_via_evtchn(unsigned int);
-void hypervisor_enable_irq(unsigned int);
-void hypervisor_disable_irq(unsigned int);
-void hypervisor_acknowledge_irq(unsigned int);
-
-/* hypervisor_machdep.c */
-void hypervisor_unmask_event(unsigned int);
-void hypervisor_mask_event(unsigned int);
-void hypervisor_clear_event(unsigned int);
-void hypervisor_force_callback(void);
-
-/*
- * Assembler stubs for hyper-calls.
- */
-
-static inline int
-HYPERVISOR_set_trap_table(trap_info_t *table)
-{
- int ret;
- unsigned long ign1;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_set_trap_table), "1" (table)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_mmu_update(mmu_update_t *req, int count, int *success_count)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
- "3" (success_count)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_set_callbacks(
- unsigned long event_selector, unsigned long event_address,
- unsigned long failsafe_selector, unsigned long failsafe_address)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector),
- "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_fpu_taskswitch(int set)
-{
- int ret;
- unsigned long ign;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign)
- : "0" (__HYPERVISOR_fpu_taskswitch), "1" (set)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_yield(void)
-{
- int ret;
- unsigned long ign1;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_block(void)
-{
- int ret;
- unsigned long ign1;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_shutdown(void)
-{
- int ret;
- unsigned long ign1;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_reboot(void)
-{
- int ret;
- unsigned long ign1;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_suspend(unsigned long srec)
-{
- int ret;
- unsigned long ign1, ign2;
-
- /* NB. On suspend, control software expects a suspend record in %esi. */
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=S" (ign2)
- : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)),
- "S" (srec) : "memory");
-
- return ret;
-}
-
-static inline long
-HYPERVISOR_set_timer_op(uint64_t timeout)
-{
- int ret;
- unsigned long timeout_hi = (unsigned long)(timeout>>32);
- unsigned long timeout_lo = (unsigned long)timeout;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_lo), "c" (timeout_hi)
- : "memory");
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_dom0_op(dom0_op_t *dom0_op)
-{
- int ret;
- unsigned long ign1;
-
- dom0_op->interface_version = DOM0_INTERFACE_VERSION;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op)
- : "memory");
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_set_debugreg(int reg, unsigned long value)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value)
- : "memory" );
-
- return ret;
-}
-
-static inline unsigned long
-HYPERVISOR_get_debugreg(int reg)
-{
- unsigned long ret;
- unsigned long ign1;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_get_debugreg), "1" (reg)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_update_descriptor(unsigned long pa, unsigned long word1,
- unsigned long word2)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_update_descriptor), "1" (pa), "2" (word1),
- "3" (word2)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_dom_mem_op(unsigned int op, unsigned long *extent_list,
- unsigned long nr_extents, unsigned int extent_order)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4, ign5;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4),
- "=D" (ign5)
- : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list),
- "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_multicall(void *call_list, int nr_calls)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_update_va_mapping(unsigned long va, unsigned long new_val,
- unsigned long flags)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_update_va_mapping),
- "1" (va), "2" (new_val), "3" (flags)
- : "memory" );
-
- if (__predict_false(ret < 0))
- panic("Failed update VA mapping: %08lx, %08lx, %08lx",
- va, new_val, flags);
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_event_channel_op(void *op)
-{
- int ret;
- unsigned long ign1;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_event_channel_op), "1" (op)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_xen_version(int cmd)
-{
- int ret;
- unsigned long ign1;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_xen_version), "1" (cmd)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_console_io(int cmd, int count, char *str)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_physdev_op(void *physdev_op)
-{
- int ret;
- unsigned long ign1;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (count), "3" (uop)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_update_va_mapping_otherdomain(unsigned long va,
- unsigned long new_val, unsigned long flags, domid_t domid)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_update_va_mapping_otherdomain),
- "1" (va), "2" (new_val), "3" (flags), "4" (domid) :
- "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type)
- : "memory" );
-
- return ret;
-}
-
-#endif /* _XEN_HYPERVISOR_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/include/if_xennetvar.h
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/if_xennetvar.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,117 +0,0 @@
-/* $NetBSD: if_xennetvar.h,v 1.1.2.1 2004/05/22 15:59:31 he Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _XEN_IF_XENNETVAR_H_
-#define _XEN_IF_XENNETVAR_H_
-
-#include <machine/xen.h>
-
-union xennet_bufarray {
- struct {
- struct mbuf *xbtx_m;
- } xb_tx;
- struct {
- vaddr_t xbrx_va;
- paddr_t xbrx_pa;
- struct xennet_softc *xbrx_sc;
- } xb_rx;
- int xb_next;
-};
-
-struct xennet_txbuf {
- SLIST_ENTRY(xennet_txbuf) xt_next;
- struct xennet_softc *xt_sc;
- paddr_t xt_pa;
- u_char xt_buf[0];
-};
-#define TXBUF_PER_PAGE 2
-#define TXBUF_BUFSIZE (PAGE_SIZE / TXBUF_PER_PAGE) - sizeof(struct
xennet_txbuf)
-
-struct xennet_softc {
- struct device sc_dev; /* base device glue */
- struct ethercom sc_ethercom; /* Ethernet common part */
-
- int sc_ifno;
-
- uint8_t sc_enaddr[6];
-
-#ifdef mediacode
- struct ifmedia sc_media;
-#endif
-
- /* What is the status of our connection to the remote backend? */
-#define BEST_CLOSED 0
-#define BEST_DISCONNECTED 1
-#define BEST_CONNECTED 2
- unsigned int sc_backend_state;
-
- unsigned int sc_evtchn;
- unsigned int sc_irq;
-
- netif_tx_interface_t *sc_tx;
- netif_rx_interface_t *sc_rx;
- struct vm_page *sc_pg_tx;
- struct vm_page *sc_pg_rx;
-
- uint32_t sc_tx_entries;
- uint32_t sc_tx_resp_cons;
-
- uint32_t sc_rx_resp_cons;
- uint32_t sc_rx_bufs_to_notify;
-
- union xennet_bufarray sc_tx_bufa[NETIF_TX_RING_SIZE];
- union xennet_bufarray sc_rx_bufa[NETIF_TX_RING_SIZE];
-
- SLIST_HEAD(, xennet_txbuf) sc_tx_bufs;
-
-#if NRND > 0
- rndsource_element_t sc_rnd_source;
-#endif
-};
-
-struct xennet_attach_args {
- const char *xa_device;
- int xa_handle;
-};
-
-struct nfs_diskless;
-
-int xennet_scan(struct device *, struct xennet_attach_args *, cfprint_t);
-void xennet_scan_finish(struct device *);
-void xennet_start(struct ifnet *);
-int xennet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
-void xennet_watchdog(struct ifnet *ifp);
-int xennet_bootstatic_callback(struct nfs_diskless *);
-
-#endif /* _XEN_IF_XENNETVAR_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/include/pmap.h
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/pmap.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,533 +0,0 @@
-/* $NetBSD: pmap.h,v 1.1.2.1 2004/05/22 15:59:58 he Exp $ */
-/* NetBSD: pmap.h,v 1.79 2004/02/20 17:35:01 yamt Exp */
-
-/*
- *
- * Copyright (c) 1997 Charles D. Cranor and Washington University.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgment:
- * This product includes software developed by Charles D. Cranor and
- * Washington University.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * pmap.h: see pmap.c for the history of this pmap module.
- */
-
-#ifndef _I386_PMAP_H_
-#define _I386_PMAP_H_
-
-#if defined(_KERNEL_OPT)
-#include "opt_user_ldt.h"
-#include "opt_largepages.h"
-#endif
-
-#include "opt_xen.h"
-
-#include <machine/cpufunc.h>
-#include <machine/pte.h>
-#include <machine/xenfunc.h>
-#include <machine/xenpmap.h>
-#include <machine/segments.h>
-#include <uvm/uvm_object.h>
-
-/*
- * see pte.h for a description of i386 MMU terminology and hardware
- * interface.
- *
- * a pmap describes a processes' 4GB virtual address space. this
- * virtual address space can be broken up into 1024 4MB regions which
- * are described by PDEs in the PDP. the PDEs are defined as follows:
- *
- * (ranges are inclusive -> exclusive, just like vm_map_entry start/end)
- * (the following assumes that KERNBASE is 0xc0000000)
- *
- * PDE#s VA range usage
- * 0->766 0x0 -> 0xbfc00000 user address space
- * 767 0xbfc00000-> recursive mapping of PDP (used for
- * 0xc0000000 linear mapping of PTPs)
- * 768->1023 0xc0000000-> kernel address space (constant
- * 0xffc00000 across all pmap's/processes)
- * 1023 0xffc00000-> "alternate" recursive PDP
mapping
- * <end> (for other pmaps)
- *
- *
- * note: a recursive PDP mapping provides a way to map all the PTEs for
- * a 4GB address space into a linear chunk of virtual memory. in other
- * words, the PTE for page 0 is the first int mapped into the 4MB recursive
- * area. the PTE for page 1 is the second int. the very last int in the
- * 4MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB
- * address).
- *
- * all pmap's PD's must have the same values in slots 768->1023 so that
- * the kernel is always mapped in every process. these values are loaded
- * into the PD at pmap creation time.
- *
- * at any one time only one pmap can be active on a processor. this is
- * the pmap whose PDP is pointed to by processor register %cr3. this pmap
- * will have all its PTEs mapped into memory at the recursive mapping
- * point (slot #767 as show above). when the pmap code wants to find the
- * PTE for a virtual address, all it has to do is the following:
- *
- * address of PTE = (767 * 4MB) + (VA / PAGE_SIZE) * sizeof(pt_entry_t)
- * = 0xbfc00000 + (VA / 4096) * 4
- *
- * what happens if the pmap layer is asked to perform an operation
- * on a pmap that is not the one which is currently active? in that
- * case we take the PA of the PDP of non-active pmap and put it in
- * slot 1023 of the active pmap. this causes the non-active pmap's
- * PTEs to get mapped in the final 4MB of the 4GB address space
- * (e.g. starting at 0xffc00000).
- *
- * the following figure shows the effects of the recursive PDP mapping:
- *
- * PDP (%cr3)
- * +----+
- * | 0| -> PTP#0 that maps VA 0x0 -> 0x400000
- * | |
- * | |
- * | 767| -> points back to PDP (%cr3) mapping VA 0xbfc00000 -> 0xc0000000
- * | 768| -> first kernel PTP (maps 0xc0000000 -> 0xf0400000)
- * | |
- * |1023| -> points to alternate pmap's PDP (maps 0xffc00000 -> end)
- * +----+
- *
- * note that the PDE#767 VA (0xbfc00000) is defined as "PTE_BASE"
- * note that the PDE#1023 VA (0xffc00000) is defined as "APTE_BASE"
- *
- * starting at VA 0xbfc00000 the current active PDP (%cr3) acts as a
- * PTP:
- *
- * PTP#767 == PDP(%cr3) => maps VA 0xbfc00000 -> 0xc0000000
- * +----+
- * | 0| -> maps the contents of PTP#0 at VA 0xbfc00000->0xbfc01000
- * | |
- * | |
- * | 767| -> maps contents of PTP#767 (the PDP) at VA 0xbffbf000
- * | 768| -> maps contents of first kernel PTP
- * | |
- * |1023|
- * +----+
- *
- * note that mapping of the PDP at PTP#767's VA (0xbffbf000) is
- * defined as "PDP_BASE".... within that mapping there are two
- * defines:
- * "PDP_PDE" (0xbfeffbfc) is the VA of the PDE in the PDP
- * which points back to itself.
- * "APDP_PDE" (0xbfeffffc) is the VA of the PDE in the PDP which
- * establishes the recursive mapping of the alternate pmap.
- * to set the alternate PDP, one just has to put the correct
- * PA info in *APDP_PDE.
- *
- * note that in the APTE_BASE space, the APDP appears at VA
- * "APDP_BASE" (0xfffff000).
- */
-/* XXX MP should we allocate one APDP_PDE per processor?? */
-
-/*
- * the following defines identify the slots used as described above.
- */
-
-#define PDSLOT_PTE ((KERNBASE/NBPD)-1) /* 767: for recursive PDP map */
-#define PDSLOT_KERN (KERNBASE/NBPD) /* 768: start of kernel space */
-#define PDSLOT_APTE ((unsigned)1023-16) /* 1023: alternative recursive slot
*/
-
-/*
- * the following defines give the virtual addresses of various MMU
- * data structures:
- * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings
- * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD
- * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP
- */
-
-#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) )
-#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) )
-#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE *
PAGE_SIZE)))
-#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE *
PAGE_SIZE)))
-#define PDP_PDE (PDP_BASE + PDSLOT_PTE)
-#define APDP_PDE (PDP_BASE + PDSLOT_APTE)
-
-/*
- * the follow define determines how many PTPs should be set up for the
- * kernel by locore.s at boot time. this should be large enough to
- * get the VM system running. once the VM system is running, the
- * pmap module can add more PTPs to the kernel area on demand.
- */
-
-#ifndef NKPTP
-#define NKPTP 4 /* 16MB to start */
-#endif
-#define NKPTP_MIN 4 /* smallest value we allow */
-#define NKPTP_MAX (1024 - (KERNBASE/NBPD) - 1)
- /* largest value (-1 for APTP space) */
-
-/*
- * pdei/ptei: generate index into PDP/PTP from a VA
- */
-#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT)
-#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT)
-
-/*
- * PTP macros:
- * a PTP's index is the PD index of the PDE that points to it
- * a PTP's offset is the byte-offset in the PTE space that this PTP is at
- * a PTP's VA is the first VA mapped by that PTP
- *
- * note that PAGE_SIZE == number of bytes in a PTP (4096 bytes == 1024 entries)
- * NBPD == number of bytes a PTP can map (4MB)
- */
-
-#define ptp_i2o(I) ((I) * PAGE_SIZE) /* index => offset */
-#define ptp_o2i(O) ((O) / PAGE_SIZE) /* offset => index */
-#define ptp_i2v(I) ((I) * NBPD) /* index => VA */
-#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */
-
-/*
- * PG_AVAIL usage: we make use of the ignored bits of the PTE
- */
-
-#define PG_W PG_AVAIL1 /* "wired" mapping */
-#define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */
-#define PG_X PG_AVAIL3 /* executable mapping */
-
-/*
- * Number of PTE's per cache line. 4 byte pte, 32-byte cache line
- * Used to avoid false sharing of cache lines.
- */
-#define NPTECL 8
-
-#ifdef _KERNEL
-/*
- * pmap data structures: see pmap.c for details of locking.
- */
-
-struct pmap;
-typedef struct pmap *pmap_t;
-
-/*
- * we maintain a list of all non-kernel pmaps
- */
-
-LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */
-
-/*
- * the pmap structure
- *
- * note that the pm_obj contains the simple_lock, the reference count,
- * page list, and number of PTPs within the pmap.
- *
- * XXX If we ever support processor numbers higher than 31, we'll have
- * XXX to rethink the CPU mask.
- */
-
-struct pmap {
- struct uvm_object pm_obj; /* object (lck by object lock) */
-#define pm_lock pm_obj.vmobjlock
- LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */
- pd_entry_t *pm_pdir; /* VA of PD (lck by object lock) */
- u_int32_t pm_pdirpa; /* PA of PD (read-only after create) */
- struct vm_page *pm_ptphint; /* pointer to a PTP in our pmap */
- struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */
-
- vaddr_t pm_hiexec; /* highest executable mapping */
- int pm_flags; /* see below */
-
- union descriptor *pm_ldt; /* user-set LDT */
- int pm_ldt_len; /* number of LDT entries */
- int pm_ldt_sel; /* LDT selector */
- u_int32_t pm_cpus; /* mask of CPUs using pmap */
-};
-
-/* pm_flags */
-#define PMF_USER_LDT 0x01 /* pmap has user-set LDT */
-
-/*
- * for each managed physical page we maintain a list of <PMAP,VA>'s
- * which it is mapped at. the list is headed by a pv_head structure.
- * there is one pv_head per managed phys page (allocated at boot time).
- * the pv_head structure points to a list of pv_entry structures (each
- * describes one mapping).
- */
-
-struct pv_entry { /* locked by its list's pvh_lock */
- SPLAY_ENTRY(pv_entry) pv_node; /* splay-tree node */
- struct pmap *pv_pmap; /* the pmap */
- vaddr_t pv_va; /* the virtual address */
- struct vm_page *pv_ptp; /* the vm_page of the PTP */
-};
-
-/*
- * pv_entrys are dynamically allocated in chunks from a single page.
- * we keep track of how many pv_entrys are in use for each page and
- * we can free pv_entry pages if needed. there is one lock for the
- * entire allocation system.
- */
-
-struct pv_page_info {
- TAILQ_ENTRY(pv_page) pvpi_list;
- struct pv_entry *pvpi_pvfree;
- int pvpi_nfree;
-};
-
-/*
- * number of pv_entry's in a pv_page
- * (note: won't work on systems where NPBG isn't a constant)
- */
-
-#define PVE_PER_PVPAGE ((PAGE_SIZE - sizeof(struct pv_page_info)) / \
- sizeof(struct pv_entry))
-
-/*
- * a pv_page: where pv_entrys are allocated from
- */
-
-struct pv_page {
- struct pv_page_info pvinfo;
- struct pv_entry pvents[PVE_PER_PVPAGE];
-};
-
-/*
- * global kernel variables
- */
-
-/* PTDpaddr: is the physical address of the kernel's PDP */
-extern u_long PTDpaddr;
-
-extern struct pmap kernel_pmap_store; /* kernel pmap */
-extern int nkpde; /* current # of PDEs for kernel */
-extern int pmap_pg_g; /* do we support PG_G? */
-
-/*
- * macros
- */
-
-#define pmap_kernel() (&kernel_pmap_store)
-#define pmap_resident_count(pmap)
((pmap)->pm_stats.resident_count)
-#define pmap_wired_count(pmap) ((pmap)->pm_stats.wired_count)
-#define pmap_update(pmap) /* nothing (yet) */
-
-#define pmap_clear_modify(pg) pmap_clear_attrs(pg, PG_M)
-#define pmap_clear_reference(pg) pmap_clear_attrs(pg, PG_U)
-#define pmap_copy(DP,SP,D,L,S)
-#define pmap_is_modified(pg) pmap_test_attrs(pg, PG_M)
-#define pmap_is_referenced(pg) pmap_test_attrs(pg, PG_U)
-#define pmap_move(DP,SP,D,L,S)
-#define pmap_phys_address(ppn) x86_ptob(ppn)
-#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */
-
-
-/*
- * prototypes
- */
-
-void pmap_activate(struct lwp *);
-void pmap_bootstrap(vaddr_t);
-boolean_t pmap_clear_attrs(struct vm_page *, int);
-void pmap_deactivate(struct lwp *);
-void pmap_deactivate2(struct lwp *);
-void pmap_page_remove (struct vm_page *);
-void pmap_remove(struct pmap *, vaddr_t, vaddr_t);
-boolean_t pmap_test_attrs(struct vm_page *, int);
-void pmap_write_protect(struct pmap *, vaddr_t, vaddr_t, vm_prot_t);
-int pmap_exec_fixup(struct vm_map *, struct trapframe *,
- struct pcb *);
-void pmap_load(void);
-int pmap_enter_ma(struct pmap *, vaddr_t, paddr_t, vm_prot_t,
- int);
-
-vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */
-
-void pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, int32_t *);
-void pmap_tlb_shootnow(int32_t);
-void pmap_do_tlb_shootdown(struct cpu_info *);
-
-#define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */
-
-/*
- * Do idle page zero'ing uncached to avoid polluting the cache.
- */
-boolean_t pmap_pageidlezero(paddr_t);
-#define PMAP_PAGEIDLEZERO(pa) pmap_pageidlezero((pa))
-
-/*
- * inline functions
- */
-
-/*ARGSUSED*/
-static __inline void
-pmap_remove_all(struct pmap *pmap)
-{
- /* Nothing. */
-}
-
-/*
- * pmap_update_pg: flush one page from the TLB (or flush the whole thing
- * if hardware doesn't support one-page flushing)
- */
-
-__inline static void __attribute__((__unused__))
-pmap_update_pg(vaddr_t va)
-{
-#if defined(I386_CPU)
- if (cpu_class == CPUCLASS_386)
- tlbflush();
- else
-#endif
- invlpg((u_int) va);
-}
-
-/*
- * pmap_update_2pg: flush two pages from the TLB
- */
-
-__inline static void __attribute__((__unused__))
-pmap_update_2pg(vaddr_t va, vaddr_t vb)
-{
-#if defined(I386_CPU)
- if (cpu_class == CPUCLASS_386)
- tlbflush();
- else
-#endif
- {
- invlpg((u_int) va);
- invlpg((u_int) vb);
- }
-}
-
-/*
- * pmap_page_protect: change the protection of all recorded mappings
- * of a managed page
- *
- * => this function is a frontend for pmap_page_remove/pmap_clear_attrs
- * => we only have to worry about making the page more protected.
- * unprotecting a page is done on-demand at fault time.
- */
-
-__inline static void __attribute__((__unused__))
-pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
-{
- if ((prot & VM_PROT_WRITE) == 0) {
- if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
- (void) pmap_clear_attrs(pg, PG_RW);
- } else {
- pmap_page_remove(pg);
- }
- }
-}
-
-/*
- * pmap_protect: change the protection of pages in a pmap
- *
- * => this function is a frontend for pmap_remove/pmap_write_protect
- * => we only have to worry about making the page more protected.
- * unprotecting a page is done on-demand at fault time.
- */
-
-__inline static void __attribute__((__unused__))
-pmap_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
-{
- if ((prot & VM_PROT_WRITE) == 0) {
- if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
- pmap_write_protect(pmap, sva, eva, prot);
- } else {
- pmap_remove(pmap, sva, eva);
- }
- }
-}
-
-/*
- * various address inlines
- *
- * vtopte: return a pointer to the PTE mapping a VA, works only for
- * user and PT addresses
- *
- * kvtopte: return a pointer to the PTE mapping a kernel VA
- */
-
-#include <lib/libkern/libkern.h>
-
-static __inline pt_entry_t * __attribute__((__unused__))
-vtopte(vaddr_t va)
-{
-
- KASSERT(va < (PDSLOT_KERN << PDSHIFT));
-
- return (PTE_BASE + x86_btop(va));
-}
-
-static __inline pt_entry_t * __attribute__((__unused__))
-kvtopte(vaddr_t va)
-{
-
- KASSERT(va >= (PDSLOT_KERN << PDSHIFT));
-
-#ifdef LARGEPAGES
- {
- pd_entry_t *pde;
-
- pde = PDP_BASE + pdei(va);
- if (*pde & PG_PS)
- return ((pt_entry_t *)pde);
- }
-#endif
-
- return (PTE_BASE + x86_btop(va));
-}
-
-/*
- * vtomach: virtual address to machine address. For use by
- * machine-dependent code only.
- */
-
-static inline paddr_t __attribute__((__unused__))
-vtomach(vaddr_t va)
-{
- pt_entry_t pte;
-
- pte = PTE_GET(&PTE_BASE[x86_btop(va)]);
- return xpmap_ptom((pte & PG_FRAME) | (va & ~PG_FRAME));
-}
-
-#define pmap_cpu_has_pg_n() (cpu_class != CPUCLASS_386)
-#define pmap_cpu_has_invlpg() (cpu_class != CPUCLASS_386)
-
-paddr_t vtophys(vaddr_t);
-vaddr_t pmap_map(vaddr_t, paddr_t, paddr_t, vm_prot_t);
-
-void pmap_kenter_ma(vaddr_t, paddr_t, vm_prot_t);
-
-#if defined(USER_LDT)
-void pmap_ldt_cleanup(struct lwp *);
-#define PMAP_FORK
-#endif /* USER_LDT */
-
-/*
- * Hooks for the pool allocator.
- */
-#define POOL_VTOPHYS(va) vtophys((vaddr_t) (va))
-
-#endif /* _KERNEL */
-#endif /* _I386_PMAP_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/include/xbdvar.h
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/xbdvar.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,60 +0,0 @@
-/* $NetBSD: xbdvar.h,v 1.5 2004/05/07 14:15:11 cl Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _XEN_XBDVAR_H_
-#define _XEN_XBDVAR_H_
-
-struct xbd_softc {
- struct device sc_dev; /* base device glue */
- struct dk_softc sc_dksc; /* generic disk interface */
- unsigned long sc_xd_device; /* cookie identifying device */
- struct dk_intf *sc_di; /* pseudo-disk interface */
- struct simplelock sc_slock; /* our lock */
- int sc_shutdown; /* about to be removed */
-#if NRND > 0
- rndsource_element_t sc_rnd_source;
-#endif
-};
-
-struct xbd_attach_args {
- const char *xa_device;
- vdisk_t *xa_xd;
- struct dk_intf *xa_dkintf;
- struct sysctlnode *xa_diskcookies;
-};
-
-int xbd_scan(struct device *, struct xbd_attach_args *, cfprint_t);
-void xbd_scan_finish(struct device *);
-
-#endif /* _XEN_XBDVAR_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/include/xen.h
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/xen.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,249 +0,0 @@
-/* $NetBSD: xen.h,v 1.1.2.2 2004/06/17 09:23:19 tron Exp $ */
-
-/*
- *
- * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team)
- * All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef _XEN_H
-#define _XEN_H
-
-#ifndef _LOCORE
-
-struct xen_netinfo {
- uint32_t xi_ifno;
- char *xi_root;
- uint32_t xi_ip[5];
-};
-
-union xen_cmdline_parseinfo {
- char xcp_bootdev[16]; /* sizeof(dv_xname) */
- struct xen_netinfo xcp_netinfo;
- char xcp_console[16];
-};
-
-#define XEN_PARSE_BOOTDEV 0
-#define XEN_PARSE_NETINFO 1
-#define XEN_PARSE_CONSOLE 2
-
-void xen_parse_cmdline(int, union xen_cmdline_parseinfo *);
-
-void xenconscn_attach(void);
-
-void xenmachmem_init(void);
-void xenprivcmd_init(void);
-void xenvfr_init(void);
-
-void idle_block(void);
-
-#ifdef XENDEBUG
-void printk(const char *, ...);
-void vprintk(const char *, va_list);
-#endif
-
-#endif
-
-#endif /* _XEN_H */
-
-/******************************************************************************
- * os.h
- *
- * random collection of macros and definition
- */
-
-#ifndef _OS_H_
-#define _OS_H_
-
-/*
- * These are the segment descriptors provided for us by the hypervisor.
- * For now, these are hardwired -- guest OSes cannot update the GDT
- * or LDT.
- *
- * It shouldn't be hard to support descriptor-table frobbing -- let me
- * know if the BSD or XP ports require flexibility here.
- */
-
-
-/*
- * these are also defined in xen-public/xen.h but can't be pulled in as
- * they are used in start of day assembly. Need to clean up the .h files
- * a bit more...
- */
-
-#ifndef FLAT_RING1_CS
-#define FLAT_RING1_CS 0x0819
-#define FLAT_RING1_DS 0x0821
-#define FLAT_RING3_CS 0x082b
-#define FLAT_RING3_DS 0x0833
-#endif
-
-#define __KERNEL_CS FLAT_RING1_CS
-#define __KERNEL_DS FLAT_RING1_DS
-
-/* Everything below this point is not included by assembler (.S) files. */
-#ifndef _LOCORE
-
-/* some function prototypes */
-void trap_init(void);
-
-
-/*
- * STI/CLI equivalents. These basically set and clear the virtual
- * event_enable flag in the shared_info structure. Note that when
- * the enable bit is set, there may be pending events to be handled.
- * We may therefore call into do_hypervisor_callback() directly.
- */
-
-#define __save_flags(x)
\
-do { \
- (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \
-} while (0)
-
-#define __restore_flags(x) \
-do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
- __insn_barrier(); \
- if ((_shared->vcpu_data[0].evtchn_upcall_mask = (x)) == 0) { \
- __insn_barrier(); \
- if
(__predict_false(_shared->vcpu_data[0].evtchn_upcall_pending)) \
- hypervisor_force_callback(); \
- } \
-} while (0)
-
-#define __cli()
\
-do { \
- HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \
- __insn_barrier(); \
-} while (0)
-
-#define __sti()
\
-do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
- __insn_barrier(); \
- _shared->vcpu_data[0].evtchn_upcall_mask = 0; \
- __insn_barrier(); /* unmask then check (avoid races) */ \
- if (__predict_false(_shared->vcpu_data[0].evtchn_upcall_pending)) \
- hypervisor_force_callback(); \
-} while (0)
-
-#define cli() __cli()
-#define sti() __sti()
-#define save_flags(x) __save_flags(x)
-#define restore_flags(x) __restore_flags(x)
-#define save_and_cli(x) do { \
- __save_flags(x); \
- __cli(); \
-} while (/* CONSTCOND */ 0)
-#define save_and_sti(x) __save_and_sti(x)
-
-#ifdef MULTIPROCESSOR
-#define __LOCK_PREFIX "lock; "
-#else
-#define __LOCK_PREFIX ""
-#endif
-
-static __inline__ uint32_t
-x86_atomic_xchg(uint32_t *ptr, unsigned long val)
-{
- unsigned long result;
-
- __asm __volatile("xchgl %0,%1"
- :"=r" (result)
- :"m" (*ptr), "0" (val)
- :"memory");
-
- return result;
-}
-
-static __inline__ int
-x86_atomic_test_and_clear_bit(volatile void *ptr, int bitno)
-{
- int result;
-
- __asm __volatile(__LOCK_PREFIX
- "btrl %2,%1 ;"
- "sbbl %0,%0"
- :"=r" (result), "=m" (*(volatile uint32_t *)(ptr))
- :"Ir" (bitno) : "memory");
- return result;
-}
-
-static __inline__ int
-x86_atomic_test_and_set_bit(volatile void *ptr, int bitno)
-{
- int result;
-
- __asm __volatile(__LOCK_PREFIX
- "btsl %2,%1 ;"
- "sbbl %0,%0"
- :"=r" (result), "=m" (*(volatile uint32_t *)(ptr))
- :"Ir" (bitno) : "memory");
- return result;
-}
-
-static __inline int
-x86_constant_test_bit(const volatile void *ptr, int bitno)
-{
- return ((1UL << (bitno & 31)) &
- (((const volatile uint32_t *) ptr)[bitno >> 5])) != 0;
-}
-
-static __inline int
-x86_variable_test_bit(const volatile void *ptr, int bitno)
-{
- int result;
-
- __asm __volatile(
- "btl %2,%1 ;"
- "sbbl %0,%0"
- :"=r" (result)
- :"m" (*(volatile uint32_t *)(ptr)), "Ir" (bitno));
- return result;
-}
-
-#define x86_atomic_test_bit(ptr, bitno) \
- (__builtin_constant_p(bitno) ? \
- x86_constant_test_bit((ptr),(bitno)) : \
- x86_variable_test_bit((ptr),(bitno)))
-
-static __inline void
-x86_atomic_set_bit(volatile void *ptr, int bitno)
-{
- __asm __volatile(__LOCK_PREFIX
- "btsl %1,%0"
- :"=m" (*(volatile uint32_t *)(ptr))
- :"Ir" (bitno));
-}
-
-static __inline void
-x86_atomic_clear_bit(volatile void *ptr, int bitno)
-{
- __asm __volatile(__LOCK_PREFIX
- "btrl %1,%0"
- :"=m" (*(volatile uint32_t *)(ptr))
- :"Ir" (bitno));
-}
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _OS_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/include/xenfunc.h
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/xenfunc.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,135 +0,0 @@
-/* $NetBSD: xenfunc.h,v 1.1.2.1 2004/05/22 15:59:31 he Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _XEN_XENFUNC_H_
-#define _XEN_XENFUNC_H_
-
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-#include <machine/evtchn.h>
-#include <machine/xenpmap.h>
-#include <machine/pte.h>
-
-#ifdef XENDEBUG_LOW
-#define __PRINTK(x) printk x
-#else
-#define __PRINTK(x)
-#endif
-
-void xen_set_ldt(vaddr_t, uint32_t);
-void xen_update_descriptor(union descriptor *, union descriptor *);
-
-static __inline void
-invlpg(u_int addr)
-{
- xpq_queue_invlpg(addr);
- xpq_flush_queue();
-}
-
-static __inline void
-lldt(u_short sel)
-{
-
- /* __PRINTK(("ldt %x\n", IDXSELN(sel))); */
- if (sel == GSEL(GLDT_SEL, SEL_KPL))
- xen_set_ldt((vaddr_t)ldt, NLDT);
- else
- xen_set_ldt(cpu_info_primary.ci_gdt[IDXSELN(sel)].ld.ld_base,
- cpu_info_primary.ci_gdt[IDXSELN(sel)].ld.ld_entries);
-}
-
-static __inline void
-ltr(u_short sel)
-{
- __PRINTK(("XXX ltr not supported\n"));
-}
-
-static __inline void
-lcr0(u_int val)
-{
- __PRINTK(("XXX lcr0 not supported\n"));
-}
-
-static __inline u_int
-rcr0(void)
-{
- __PRINTK(("XXX rcr0 not supported\n"));
- return 0;
-}
-
-#define lcr3(_v) _lcr3((_v), __FILE__, __LINE__)
-static __inline void
-_lcr3(u_int val, char *file, int line)
-{
-/* __PRINTK(("lcr3 %08x at %s:%d\n", val, file, line)); */
- xpq_queue_pt_switch(xpmap_ptom(val) & PG_FRAME);
- xpq_flush_queue();
-}
-
-static __inline void
-tlbflush(void)
-{
- xpq_queue_tlb_flush();
- xpq_flush_queue();
-}
-
-static __inline u_int
-rdr6(void)
-{
- u_int val;
-
- val = HYPERVISOR_get_debugreg(6);
- return val;
-}
-
-static __inline void
-ldr6(u_int val)
-{
-
- HYPERVISOR_set_debugreg(6, val);
-}
-
-static __inline void
-disable_intr(void)
-{
- __cli();
-}
-
-static __inline void
-enable_intr(void)
-{
- __sti();
-}
-
-#endif /* _XEN_XENFUNC_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/include/xenpmap.h
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/xenpmap.h Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,193 +0,0 @@
-/* $NetBSD: xenpmap.h,v 1.1.2.1 2004/05/22 15:59:58 he Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _XEN_XENPMAP_H_
-#define _XEN_XENPMAP_H_
-
-#define INVALID_P2M_ENTRY (~0UL)
-
-void xpq_queue_invlpg(vaddr_t);
-void xpq_queue_pde_update(pd_entry_t *, pd_entry_t);
-void xpq_queue_pte_update(pt_entry_t *, pt_entry_t);
-void xpq_queue_unchecked_pte_update(pt_entry_t *, pt_entry_t);
-void xpq_queue_pt_switch(paddr_t);
-void xpq_flush_queue(void);
-void xpq_queue_set_ldt(vaddr_t, uint32_t);
-void xpq_queue_tlb_flush(void);
-void xpq_queue_pin_table(paddr_t, int);
-void xpq_queue_unpin_table(paddr_t);
-
-extern paddr_t *xpmap_phys_to_machine_mapping;
-
-#define XPQ_PIN_L1_TABLE 1
-#define XPQ_PIN_L2_TABLE 2
-
-#ifndef XEN
-#define PDE_GET(_pdp) \
- *(_pdp)
-#define PDE_SET(_pdp,_mapdp,_npde) \
- *(_mapdp) = (_npde)
-#define PDE_CLEAR(_pdp,_mapdp) \
- *(_mapdp) = 0
-#define PTE_SET(_ptp,_maptp,_npte) \
- *(_maptp) = (_npte)
-#define PTE_CLEAR(_ptp,_maptp) \
- *(_maptp) = 0
-#define PTE_ATOMIC_SET(_ptp,_maptp,_npte,_opte) \
- (_opte) = x86_atomic_testset_ul((_maptp), (_npte))
-#define PTE_ATOMIC_CLEAR(_ptp,_maptp,_opte) \
- (_opte) = x86_atomic_testset_ul((_maptp), 0)
-#define PDE_CLEARBITS(_pdp,_mapdp,_bits) \
- *(_mapdp) &= ~(_bits)
-#define PTE_ATOMIC_CLEARBITS(_ptp,_maptp,_bits) \
- x86_atomic_clearbits_l((_maptp), (_bits))
-#define PTE_SETBITS(_ptp,_maptp,_bits) \
- *(_maptp) |= (_bits)
-#define PTE_ATOMIC_SETBITS(_ptp,_maptp,_bits) \
- x86_atomic_setbits_l((_maptp), (_bits))
-#else
-paddr_t *xpmap_phys_to_machine_mapping;
-
-#define PDE_GET(_pdp) \
- (pmap_valid_entry(*(_pdp)) ? xpmap_mtop(*(_pdp)) : *(_pdp))
-#define PDE_SET(_pdp,_mapdp,_npde) do { \
- xpq_queue_pde_update((_mapdp), xpmap_ptom((_npde))); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PDE_CLEAR(_pdp,_mapdp) do { \
- xpq_queue_pde_update((_mapdp), 0); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_GET(_ptp) \
- (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : *(_ptp))
-#define PTE_GET_MA(_ptp) \
- *(_ptp)
-#define PTE_SET(_ptp,_maptp,_npte) do { \
- xpq_queue_pte_update((_maptp), xpmap_ptom((_npte))); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_SET_MA(_ptp,_maptp,_npte) do { \
- xpq_queue_pte_update((_maptp), (_npte)); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_SET_MA_UNCHECKED(_ptp,_maptp,_npte) do { \
- xpq_queue_unchecked_pte_update((_maptp), (_npte)); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_CLEAR(_ptp,_maptp) do { \
- xpq_queue_pte_update((_maptp), 0); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_ATOMIC_SET(_ptp,_maptp,_npte,_opte) do { \
- (_opte) = PTE_GET(_ptp); \
- xpq_queue_pte_update((_maptp), xpmap_ptom((_npte))); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_ATOMIC_SET_MA(_ptp,_maptp,_npte,_opte) do { \
- (_opte) = *(_ptp); \
- xpq_queue_pte_update((_maptp), (_npte)); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_ATOMIC_CLEAR(_ptp,_maptp,_opte) do { \
- (_opte) = PTE_GET(_ptp); \
- xpq_queue_pte_update((_maptp), 0); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_ATOMIC_CLEAR_MA(_ptp,_maptp,_opte) do { \
- (_opte) = *(_ptp); \
- xpq_queue_pte_update((_maptp), 0); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PDE_CLEARBITS(_pdp,_mapdp,_bits) do { \
- xpq_queue_pte_update((_mapdp), *(_pdp) & ~((_bits) & ~PG_FRAME)); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_CLEARBITS(_ptp,_maptp,_bits) do { \
- xpq_queue_pte_update((_maptp), *(_ptp) & ~((_bits) & ~PG_FRAME)); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PDE_ATOMIC_CLEARBITS(_pdp,_mapdp,_bits) do { \
- xpq_queue_pde_update((_mapdp), *(_pdp) & ~((_bits) & ~PG_FRAME)); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_ATOMIC_CLEARBITS(_ptp,_maptp,_bits) do { \
- xpq_queue_pte_update((_maptp), *(_ptp) & ~((_bits) & ~PG_FRAME)); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_SETBITS(_ptp,_maptp,_bits) do { \
- xpq_queue_pte_update((_maptp), *(_ptp) | ((_bits) & ~PG_FRAME)); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PDE_ATOMIC_SETBITS(_pdp,_mapdp,_bits) do { \
- xpq_queue_pde_update((_mapdp), *(_pdp) | ((_bits) & ~PG_FRAME)); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_ATOMIC_SETBITS(_ptp,_maptp,_bits) do { \
- xpq_queue_pte_update((_maptp), *(_ptp) | ((_bits) & ~PG_FRAME)); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PDE_COPY(_dpdp,_madpdp,_spdp) do { \
- xpq_queue_pde_update((_madpdp), *(_spdp)); \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PTE_UPDATES_FLUSH() do { \
- xpq_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#endif
-
-#define XPMAP_OFFSET (KERNTEXTOFF - KERNBASE_LOCORE)
-static __inline paddr_t
-xpmap_mtop(paddr_t mpa)
-{
- return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT) +
- XPMAP_OFFSET) | (mpa & ~PG_FRAME);
-}
-
-static __inline paddr_t
-xpmap_ptom(paddr_t ppa)
-{
- return (xpmap_phys_to_machine_mapping[(ppa -
- XPMAP_OFFSET) >> PAGE_SHIFT] << PAGE_SHIFT)
- | (ppa & ~PG_FRAME);
-}
-
-static __inline paddr_t
-xpmap_ptom_masked(paddr_t ppa)
-{
- return (xpmap_phys_to_machine_mapping[(ppa -
- XPMAP_OFFSET) >> PAGE_SHIFT] << PAGE_SHIFT);
-}
-
-#endif /* _XEN_XENPMAP_H_ */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/x86/bus_space.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/x86/bus_space.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,505 +0,0 @@
-/* $NetBSD: bus_space.c,v 1.2.2.1 2004/05/22 15:57:25 he Exp $ */
-/* NetBSD: bus_space.c,v 1.2 2003/03/14 18:47:53 christos Exp */
-
-/*-
- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
- * Simulation Facility, NASA Ames Research Center.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: bus_space.c,v 1.2.2.1 2004/05/22 15:57:25 he Exp
$");
-
-#include "opt_xen.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/extent.h>
-
-#include <uvm/uvm_extern.h>
-
-#include <machine/bus.h>
-
-#include <dev/isa/isareg.h>
-#include <machine/isa_machdep.h>
-
-#include <machine/hypervisor.h>
-#include <machine/xenpmap.h>
-
-/*
- * Extent maps to manage I/O and memory space. Allocate
- * storage for 8 regions in each, initially. Later, ioport_malloc_safe
- * will indicate that it's safe to use malloc() to dynamically allocate
- * region descriptors.
- *
- * N.B. At least two regions are _always_ allocated from the iomem
- * extent map; (0 -> ISA hole) and (end of ISA hole -> end of RAM).
- *
- * The extent maps are not static! Machine-dependent ISA and EISA
- * routines need access to them for bus address space allocation.
- */
-static long ioport_ex_storage[EXTENT_FIXED_STORAGE_SIZE(8) / sizeof(long)];
-static long iomem_ex_storage[EXTENT_FIXED_STORAGE_SIZE(8) / sizeof(long)];
-struct extent *ioport_ex;
-struct extent *iomem_ex;
-static int ioport_malloc_safe;
-
-int x86_mem_add_mapping __P((bus_addr_t, bus_size_t,
- int, bus_space_handle_t *));
-
-void
-x86_bus_space_init()
-{
- /*
- * Initialize the I/O port and I/O mem extent maps.
- * Note: we don't have to check the return value since
- * creation of a fixed extent map will never fail (since
- * descriptor storage has already been allocated).
- *
- * N.B. The iomem extent manages _all_ physical addresses
- * on the machine. When the amount of RAM is found, the two
- * extents of RAM are allocated from the map (0 -> ISA hole
- * and end of ISA hole -> end of RAM).
- */
- ioport_ex = extent_create("ioport", 0x0, 0xffff, M_DEVBUF,
- (caddr_t)ioport_ex_storage, sizeof(ioport_ex_storage),
- EX_NOCOALESCE|EX_NOWAIT);
- iomem_ex = extent_create("iomem", 0x0, 0xffffffff, M_DEVBUF,
- (caddr_t)iomem_ex_storage, sizeof(iomem_ex_storage),
- EX_NOCOALESCE|EX_NOWAIT);
-
- /* We are privileged guest os - should have IO privileges. */
- if (xen_start_info.flags & SIF_PRIVILEGED) {
- dom0_op_t op;
- op.cmd = DOM0_IOPL;
- op.u.iopl.domain = DOMID_SELF;
- op.u.iopl.iopl = 1;
- if (HYPERVISOR_dom0_op(&op) != 0)
- panic("Unable to obtain IOPL, "
- "despite being SIF_PRIVILEGED");
- }
-}
-
-void
-x86_bus_space_mallocok()
-{
-
- ioport_malloc_safe = 1;
-}
-
-int
-x86_memio_map(t, bpa, size, flags, bshp)
- bus_space_tag_t t;
- bus_addr_t bpa;
- bus_size_t size;
- int flags;
- bus_space_handle_t *bshp;
-{
- int error;
- struct extent *ex;
-
- /*
- * Pick the appropriate extent map.
- */
- if (t == X86_BUS_SPACE_IO) {
- if (flags & BUS_SPACE_MAP_LINEAR)
- return (EOPNOTSUPP);
- ex = ioport_ex;
- } else if (t == X86_BUS_SPACE_MEM)
- ex = iomem_ex;
- else
- panic("x86_memio_map: bad bus space tag");
-
- /*
- * Before we go any further, let's make sure that this
- * region is available.
- */
- error = extent_alloc_region(ex, bpa, size,
- EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0));
- if (error)
- return (error);
-
- /*
- * For I/O space, that's all she wrote.
- */
- if (t == X86_BUS_SPACE_IO) {
- *bshp = bpa;
- return (0);
- }
-
- /*
- * For memory space, map the bus physical address to
- * a kernel virtual address.
- */
- error = x86_mem_add_mapping(bpa, size,
- (flags & BUS_SPACE_MAP_CACHEABLE) != 0, bshp);
- if (error) {
- if (extent_free(ex, bpa, size, EX_NOWAIT |
- (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
- printf("x86_memio_map: pa 0x%lx, size 0x%lx\n",
- bpa, size);
- printf("x86_memio_map: can't free region\n");
- }
- }
-
- return (error);
-}
-
-int
-_x86_memio_map(t, bpa, size, flags, bshp)
- bus_space_tag_t t;
- bus_addr_t bpa;
- bus_size_t size;
- int flags;
- bus_space_handle_t *bshp;
-{
-
- /*
- * For I/O space, just fill in the handle.
- */
- if (t == X86_BUS_SPACE_IO) {
- if (flags & BUS_SPACE_MAP_LINEAR)
- return (EOPNOTSUPP);
- *bshp = bpa;
- return (0);
- }
-
- /*
- * For memory space, map the bus physical address to
- * a kernel virtual address.
- */
- return (x86_mem_add_mapping(bpa, size,
- (flags & BUS_SPACE_MAP_CACHEABLE) != 0, bshp));
-}
-
-int
-x86_memio_alloc(t, rstart, rend, size, alignment, boundary, flags,
- bpap, bshp)
- bus_space_tag_t t;
- bus_addr_t rstart, rend;
- bus_size_t size, alignment, boundary;
- int flags;
- bus_addr_t *bpap;
- bus_space_handle_t *bshp;
-{
- struct extent *ex;
- u_long bpa;
- int error;
-
- /*
- * Pick the appropriate extent map.
- */
- if (t == X86_BUS_SPACE_IO) {
- if (flags & BUS_SPACE_MAP_LINEAR)
- return (EOPNOTSUPP);
- ex = ioport_ex;
- } else if (t == X86_BUS_SPACE_MEM)
- ex = iomem_ex;
- else
- panic("x86_memio_alloc: bad bus space tag");
-
- /*
- * Sanity check the allocation against the extent's boundaries.
- */
- if (rstart < ex->ex_start || rend > ex->ex_end)
- panic("x86_memio_alloc: bad region start/end");
-
- /*
- * Do the requested allocation.
- */
- error = extent_alloc_subregion(ex, rstart, rend, size, alignment,
- boundary,
- EX_FAST | EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0),
- &bpa);
-
- if (error)
- return (error);
-
- /*
- * For I/O space, that's all she wrote.
- */
- if (t == X86_BUS_SPACE_IO) {
- *bshp = *bpap = bpa;
- return (0);
- }
-
- /*
- * For memory space, map the bus physical address to
- * a kernel virtual address.
- */
- error = x86_mem_add_mapping(bpa, size,
- (flags & BUS_SPACE_MAP_CACHEABLE) != 0, bshp);
- if (error) {
- if (extent_free(iomem_ex, bpa, size, EX_NOWAIT |
- (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
- printf("x86_memio_alloc: pa 0x%lx, size 0x%lx\n",
- bpa, size);
- printf("x86_memio_alloc: can't free region\n");
- }
- }
-
- *bpap = bpa;
-
- return (error);
-}
-
-int
-x86_mem_add_mapping(bpa, size, cacheable, bshp)
- bus_addr_t bpa;
- bus_size_t size;
- int cacheable;
- bus_space_handle_t *bshp;
-{
- u_long pa, endpa;
- vaddr_t va;
- pt_entry_t *pte;
- pt_entry_t *maptp;
- int32_t cpumask = 0;
-
- pa = x86_trunc_page(bpa);
- endpa = x86_round_page(bpa + size);
-
-#ifdef DIAGNOSTIC
- if (endpa <= pa)
- panic("x86_mem_add_mapping: overflow");
-#endif
-
- if (bpa >= IOM_BEGIN && (bpa + size) <= IOM_END) {
- va = (vaddr_t)ISA_HOLE_VADDR(pa);
- } else {
- va = uvm_km_valloc(kernel_map, endpa - pa);
- if (va == 0)
- return (ENOMEM);
- }
-
- *bshp = (bus_space_handle_t)(va + (bpa & PGOFSET));
-
- for (; pa < endpa; pa += PAGE_SIZE, va += PAGE_SIZE) {
- pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE);
-
- /*
- * PG_N doesn't exist on 386's, so we assume that
- * the mainboard has wired up device space non-cacheable
- * on those machines.
- *
- * Note that it's not necessary to use atomic ops to
- * fiddle with the PTE here, because we don't care
- * about mod/ref information.
- *
- * XXX should hand this bit to pmap_kenter_pa to
- * save the extra invalidate!
- *
- * XXX extreme paranoia suggests tlb shootdown belongs here.
- */
- if (pmap_cpu_has_pg_n()) {
- pte = kvtopte(va);
- maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
- if (cacheable)
- PTE_CLEARBITS(pte, maptp, PG_N);
- else
- PTE_SETBITS(pte, maptp, PG_N);
- pmap_tlb_shootdown(pmap_kernel(), va, *pte,
- &cpumask);
- }
- }
-
- pmap_tlb_shootnow(cpumask);
- pmap_update(pmap_kernel());
-
- return 0;
-}
-
-/*
- * void _x86_memio_unmap(bus_space_tag bst, bus_space_handle bsh,
- * bus_size_t size, bus_addr_t *adrp)
- *
- * This function unmaps memory- or io-space mapped by the function
- * _x86_memio_map(). This function works nearly as same as
- * x86_memio_unmap(), but this function does not ask kernel
- * built-in extents and returns physical address of the bus space,
- * for the convenience of the extra extent manager.
- */
-void
-_x86_memio_unmap(t, bsh, size, adrp)
- bus_space_tag_t t;
- bus_space_handle_t bsh;
- bus_size_t size;
- bus_addr_t *adrp;
-{
- u_long va, endva;
- bus_addr_t bpa;
-
- /*
- * Find the correct extent and bus physical address.
- */
- if (t == X86_BUS_SPACE_IO) {
- bpa = bsh;
- } else if (t == X86_BUS_SPACE_MEM) {
- if (bsh >= atdevbase && (bsh + size) <= (atdevbase + IOM_SIZE))
{
- bpa = (bus_addr_t)ISA_PHYSADDR(bsh);
- } else {
-
- va = x86_trunc_page(bsh);
- endva = x86_round_page(bsh + size);
-
-#ifdef DIAGNOSTIC
- if (endva <= va) {
- panic("_x86_memio_unmap: overflow");
- }
-#endif
-
-#if __NetBSD_Version__ > 104050000
- if (pmap_extract(pmap_kernel(), va, &bpa) == FALSE) {
- panic("_x86_memio_unmap:"
- " wrong virtual address");
- }
- bpa += (bsh & PGOFSET);
-#else
- bpa = pmap_extract(pmap_kernel(), va) + (bsh & PGOFSET);
-#endif
-
- pmap_kremove(va, endva - va);
- /*
- * Free the kernel virtual mapping.
- */
- uvm_km_free(kernel_map, va, endva - va);
- }
- } else {
- panic("_x86_memio_unmap: bad bus space tag");
- }
-
- if (adrp != NULL) {
- *adrp = bpa;
- }
-}
-
-void
-x86_memio_unmap(t, bsh, size)
- bus_space_tag_t t;
- bus_space_handle_t bsh;
- bus_size_t size;
-{
- struct extent *ex;
- u_long va, endva;
- bus_addr_t bpa;
-
- /*
- * Find the correct extent and bus physical address.
- */
- if (t == X86_BUS_SPACE_IO) {
- ex = ioport_ex;
- bpa = bsh;
- } else if (t == X86_BUS_SPACE_MEM) {
- ex = iomem_ex;
-
- if (bsh >= atdevbase &&
- (bsh + size) <= (atdevbase + IOM_SIZE)) {
- bpa = (bus_addr_t)ISA_PHYSADDR(bsh);
- goto ok;
- }
-
- va = x86_trunc_page(bsh);
- endva = x86_round_page(bsh + size);
-
-#ifdef DIAGNOSTIC
- if (endva <= va)
- panic("x86_memio_unmap: overflow");
-#endif
-
- (void) pmap_extract(pmap_kernel(), va, &bpa);
- bpa += (bsh & PGOFSET);
-
- pmap_kremove(va, endva - va);
- /*
- * Free the kernel virtual mapping.
- */
- uvm_km_free(kernel_map, va, endva - va);
- } else
- panic("x86_memio_unmap: bad bus space tag");
-
-ok:
- if (extent_free(ex, bpa, size,
- EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
- printf("x86_memio_unmap: %s 0x%lx, size 0x%lx\n",
- (t == X86_BUS_SPACE_IO) ? "port" : "pa", bpa, size);
- printf("x86_memio_unmap: can't free region\n");
- }
-}
-
-void
-x86_memio_free(t, bsh, size)
- bus_space_tag_t t;
- bus_space_handle_t bsh;
- bus_size_t size;
-{
-
- /* x86_memio_unmap() does all that we need to do. */
- x86_memio_unmap(t, bsh, size);
-}
-
-int
-x86_memio_subregion(t, bsh, offset, size, nbshp)
- bus_space_tag_t t;
- bus_space_handle_t bsh;
- bus_size_t offset, size;
- bus_space_handle_t *nbshp;
-{
-
- *nbshp = bsh + offset;
- return (0);
-}
-
-paddr_t
-x86_memio_mmap(t, addr, off, prot, flags)
- bus_space_tag_t t;
- bus_addr_t addr;
- off_t off;
- int prot;
- int flags;
-{
-
- /* Can't mmap I/O space. */
- if (t == X86_BUS_SPACE_IO)
- return (-1);
-
- /*
- * "addr" is the base address of the device we're mapping.
- * "off" is the offset into that device.
- *
- * Note we are called for each "page" in the device that
- * the upper layers want to map.
- */
- return (x86_btop(addr + off));
-}
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/xen/clock.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/clock.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,273 +0,0 @@
-/* $NetBSD: clock.c,v 1.1.2.2 2004/07/17 16:43:56 he Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "opt_xen.h"
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: clock.c,v 1.1.2.2 2004/07/17 16:43:56 he Exp $");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/time.h>
-#include <sys/kernel.h>
-#include <sys/device.h>
-
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-#include <machine/evtchn.h>
-#include <machine/cpu_counter.h>
-
-#include <dev/clock_subr.h>
-
-#include "config_time.h" /* for CONFIG_TIME */
-
-static int xen_timer_handler(void *, struct intrframe *);
-
-/* These are peridically updated in shared_info, and then copied here. */
-static uint64_t shadow_tsc_stamp;
-static uint64_t shadow_system_time;
-static unsigned long shadow_time_version;
-static struct timeval shadow_tv;
-
-static int timeset;
-
-static uint64_t processed_system_time;
-
-#define NS_PER_TICK (1000000000ULL/hz)
-
-/*
- * Reads a consistent set of time-base values from Xen, into a shadow data
- * area. Must be called at splclock.
- */
-static void
-get_time_values_from_xen(void)
-{
- do {
- shadow_time_version = HYPERVISOR_shared_info->time_version2;
- __insn_barrier();
- shadow_tv.tv_sec = HYPERVISOR_shared_info->wc_sec;
- shadow_tv.tv_usec = HYPERVISOR_shared_info->wc_usec;
- shadow_tsc_stamp = HYPERVISOR_shared_info->tsc_timestamp;
- shadow_system_time = HYPERVISOR_shared_info->system_time;
- __insn_barrier();
- } while (shadow_time_version != HYPERVISOR_shared_info->time_version1);
-}
-
-static uint64_t
-get_tsc_offset_ns(void)
-{
- uint32_t tsc_delta;
- struct cpu_info *ci = curcpu();
-
- tsc_delta = cpu_counter32() - shadow_tsc_stamp;
- return tsc_delta * 1000000000 / cpu_frequency(ci);
-}
-
-void
-inittodr(time_t base)
-{
- int s;
-
- /*
- * if the file system time is more than a year older than the
- * kernel, warn and then set the base time to the CONFIG_TIME.
- */
- if (base && base < (CONFIG_TIME-SECYR)) {
- printf("WARNING: preposterous time in file system\n");
- base = CONFIG_TIME;
- }
-
- s = splclock();
- get_time_values_from_xen();
- splx(s);
-
- time.tv_usec = shadow_tv.tv_usec;
- time.tv_sec = shadow_tv.tv_sec + rtc_offset * 60;
-#ifdef DEBUG_CLOCK
- printf("readclock: %ld (%ld)\n", time.tv_sec, base);
-#endif
- if (base != 0 && base < time.tv_sec - 5*SECYR)
- printf("WARNING: file system time much less than clock time\n");
- else if (base > time.tv_sec + 5*SECYR) {
- printf("WARNING: clock time much less than file system time\n");
- printf("WARNING: using file system time\n");
- goto fstime;
- }
-
- timeset = 1;
- return;
-
-fstime:
- timeset = 1;
- time.tv_sec = base;
- printf("WARNING: CHECK AND RESET THE DATE!\n");
-}
-
-void
-resettodr()
-{
-#ifdef DOM0OPS
- dom0_op_t op;
- int s;
-#endif
-#ifdef DEBUG_CLOCK
- struct clock_ymdhms dt;
-#endif
-
- /*
- * We might have been called by boot() due to a crash early
- * on. Don't reset the clock chip in this case.
- */
- if (!timeset)
- return;
-
-#ifdef DEBUG_CLOCK
- clock_secs_to_ymdhms(time.tv_sec - rtc_offset * 60, &dt);
-
- printf("setclock: %d/%d/%d %02d:%02d:%02d\n", dt.dt_year,
- dt.dt_mon, dt.dt_day, dt.dt_hour, dt.dt_min, dt.dt_sec);
-#endif
-#ifdef DOM0OPS
- if (xen_start_info.dom_id == 0) {
- s = splclock();
-
- op.cmd = DOM0_SETTIME;
- op.u.settime.secs = time.tv_sec - rtc_offset * 60;
- op.u.settime.usecs = time.tv_usec;
- op.u.settime.system_time = shadow_system_time;
- HYPERVISOR_dom0_op(&op);
-
- splx(s);
- }
-#endif
-}
-
-void
-startrtclock()
-{
-
-}
-
-/*
- * Wait approximately `n' microseconds.
- */
-void
-xen_delay(int n)
-{
- uint64_t when;
-
- get_time_values_from_xen();
- when = shadow_system_time + n * 1000;
- while (shadow_system_time < when)
- get_time_values_from_xen();
-}
-
-void
-xen_microtime(struct timeval *tv)
-{
-
- *tv = time;
-}
-
-void
-xen_initclocks()
-{
- int irq = bind_virq_to_irq(VIRQ_TIMER);
-
- get_time_values_from_xen();
- processed_system_time = shadow_system_time;
-
- event_set_handler(irq, (int (*)(void *))xen_timer_handler,
- NULL, IPL_CLOCK);
- hypervisor_enable_irq(irq);
-}
-
-static int
-xen_timer_handler(void *arg, struct intrframe *regs)
-{
- int64_t delta;
-
-#if defined(I586_CPU) || defined(I686_CPU)
- static int microset_iter; /* call cc_microset once/sec */
- struct cpu_info *ci = curcpu();
-
- /*
- * If we have a cycle counter, do the microset thing.
- */
- if (ci->ci_feature_flags & CPUID_TSC) {
- if (
-#if defined(MULTIPROCESSOR)
- CPU_IS_PRIMARY(ci) &&
-#endif
- (microset_iter--) == 0) {
- microset_iter = hz - 1;
-#if defined(MULTIPROCESSOR)
- x86_broadcast_ipi(X86_IPI_MICROSET);
-#endif
- cc_microset_time = time;
- cc_microset(ci);
- }
- }
-#endif
-
- get_time_values_from_xen();
-
- delta = (int64_t)(shadow_system_time + get_tsc_offset_ns() -
- processed_system_time);
- while (delta >= NS_PER_TICK) {
- hardclock(regs);
- delta -= NS_PER_TICK;
- processed_system_time += NS_PER_TICK;
- }
-
- return 0;
-}
-
-void
-setstatclockrate(int arg)
-{
-}
-
-void
-idle_block(void)
-{
-
- /*
- * We set the timer to when we expect the next timer
- * interrupt. We could set the timer to later if we could
- * easily find out when we will have more work (callouts) to
- * process from hardclock.
- */
- if (HYPERVISOR_set_timer_op(processed_system_time + NS_PER_TICK) == 0)
- HYPERVISOR_block();
-}
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/xen/ctrl_if.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/ctrl_if.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,550 +0,0 @@
-/******************************************************************************
- * ctrl_if.c
- *
- * Management functions for special interface to the domain controller.
- *
- * Copyright (c) 2004, K A Fraser
- */
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/malloc.h>
-
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-#include <machine/ctrl_if.h>
-#include <machine/evtchn.h>
-
-void printk(char *, ...);
-#if 0
-#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \
- __FILE__ , __LINE__ , ## _a )
-#else
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-/*
- * Only used by initial domain which must create its own control-interface
- * event channel. This value is picked up by the user-space domain controller
- * via an ioctl.
- */
-int initdom_ctrlif_domcontroller_port = -1;
-
-/* static */ int ctrl_if_evtchn = -1;
-static int ctrl_if_irq;
-static struct simplelock ctrl_if_lock;
-
-static CONTROL_RING_IDX ctrl_if_tx_resp_cons;
-static CONTROL_RING_IDX ctrl_if_rx_req_cons;
-
-/* Incoming message requests. */
- /* Primary message type -> message handler. */
-static ctrl_msg_handler_t ctrl_if_rxmsg_handler[256];
- /* Primary message type -> callback in process context? */
-static unsigned long ctrl_if_rxmsg_blocking_context[256/sizeof(unsigned long)];
-#if 0
- /* Is it late enough during bootstrap to use schedule_task()? */
-static int safe_to_schedule_task;
-#endif
- /* Queue up messages to be handled in process context. */
-static ctrl_msg_t ctrl_if_rxmsg_deferred[CONTROL_RING_SIZE];
-static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_prod;
-static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_cons;
-
-/* Incoming message responses: message identifier -> message handler/id. */
-static struct {
- ctrl_msg_handler_t fn;
- unsigned long id;
-} ctrl_if_txmsg_id_mapping[CONTROL_RING_SIZE];
-
-/* For received messages that must be deferred to process context. */
-static void __ctrl_if_rxmsg_deferred(void *unused);
-
-#ifdef notyet
-/* Deferred callbacks for people waiting for space in the transmit ring. */
-static int DECLARE_TASK_QUEUE(ctrl_if_tx_tq);
-#endif
-
-static void *ctrl_if_softintr = NULL;
-
-static int ctrl_if_tx_wait;
-static void __ctrl_if_tx_tasklet(unsigned long data);
-
-static void __ctrl_if_rx_tasklet(unsigned long data);
-
-#define get_ctrl_if() ((control_if_t *)((char *)HYPERVISOR_shared_info + 2048))
-#define TX_FULL(_c) \
- (((_c)->tx_req_prod - ctrl_if_tx_resp_cons) == CONTROL_RING_SIZE)
-
-static void ctrl_if_notify_controller(void)
-{
- hypervisor_notify_via_evtchn(ctrl_if_evtchn);
-}
-
-static void ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id)
-{
- msg->length = 0;
- ctrl_if_send_response(msg);
-}
-
-static void __ctrl_if_tx_tasklet(unsigned long data)
-{
- control_if_t *ctrl_if = get_ctrl_if();
- ctrl_msg_t *msg;
- int was_full = TX_FULL(ctrl_if);
- CONTROL_RING_IDX rp;
-
- rp = ctrl_if->tx_resp_prod;
- __insn_barrier(); /* Ensure we see all requests up to 'rp'. */
-
- while ( ctrl_if_tx_resp_cons != rp )
- {
- msg = &ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if_tx_resp_cons)];
-
- DPRINTK("Rx-Rsp %u/%u :: %d/%d\n",
- ctrl_if_tx_resp_cons,
- ctrl_if->tx_resp_prod,
- msg->type, msg->subtype);
-
- /* Execute the callback handler, if one was specified. */
- if ( msg->id != 0xFF )
- {
- (*ctrl_if_txmsg_id_mapping[msg->id].fn)(
- msg, ctrl_if_txmsg_id_mapping[msg->id].id);
- __insn_barrier(); /* Execute, /then/ free. */
- ctrl_if_txmsg_id_mapping[msg->id].fn = NULL;
- }
-
- /*
- * Step over the message in the ring /after/ finishing reading it. As
- * soon as the index is updated then the message may get blown away.
- */
- __insn_barrier();
- ctrl_if_tx_resp_cons++;
- }
-
- if ( was_full && !TX_FULL(ctrl_if) )
- {
- wakeup(&ctrl_if_tx_wait);
-#ifdef notyet
- run_task_queue(&ctrl_if_tx_tq);
-#endif
- }
-}
-
-static void __ctrl_if_rxmsg_deferred(void *unused)
-{
- ctrl_msg_t *msg;
- CONTROL_RING_IDX dp;
-
- dp = ctrl_if_rxmsg_deferred_prod;
- __insn_barrier(); /* Ensure we see all deferred requests up to 'dp'. */
-
- while ( ctrl_if_rxmsg_deferred_cons != dp )
- {
- msg = &ctrl_if_rxmsg_deferred[
- MASK_CONTROL_IDX(ctrl_if_rxmsg_deferred_cons)];
- (*ctrl_if_rxmsg_handler[msg->type])(msg, 0);
- ctrl_if_rxmsg_deferred_cons++;
- }
-}
-
-static void __ctrl_if_rx_tasklet(unsigned long data)
-{
- control_if_t *ctrl_if = get_ctrl_if();
- ctrl_msg_t msg, *pmsg;
- CONTROL_RING_IDX rp, dp;
-
- dp = ctrl_if_rxmsg_deferred_prod;
- rp = ctrl_if->rx_req_prod;
- __insn_barrier(); /* Ensure we see all requests up to 'rp'. */
-
- while ( ctrl_if_rx_req_cons != rp )
- {
- pmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons)];
- memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg));
-
- DPRINTK("Rx-Req %u/%u :: %d/%d\n",
- ctrl_if_rx_req_cons-1,
- ctrl_if->rx_req_prod,
- msg.type, msg.subtype);
-
- if ( msg.length != 0 )
- memcpy(msg.msg, pmsg->msg, msg.length);
-
- if ( x86_atomic_test_bit(
- (unsigned long *)&ctrl_if_rxmsg_blocking_context,
- msg.type) )
- memcpy(&ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(dp++)],
- &msg, offsetof(ctrl_msg_t, msg) + msg.length);
- else
- (*ctrl_if_rxmsg_handler[msg.type])(&msg, 0);
-
- ctrl_if_rx_req_cons++;
- }
-
- if ( dp != ctrl_if_rxmsg_deferred_prod )
- {
- __insn_barrier();
- ctrl_if_rxmsg_deferred_prod = dp;
- if (ctrl_if_softintr)
- softintr_schedule(ctrl_if_softintr);
- }
-}
-
-static int ctrl_if_interrupt(void *arg)
-{
- control_if_t *ctrl_if = get_ctrl_if();
-
- if ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod )
- __ctrl_if_tx_tasklet(0);
-
- if ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod )
- __ctrl_if_rx_tasklet(0);
-
- return 0;
-}
-
-int
-ctrl_if_send_message_noblock(
- ctrl_msg_t *msg,
- ctrl_msg_handler_t hnd,
- unsigned long id)
-{
- control_if_t *ctrl_if = get_ctrl_if();
- unsigned long flags;
- int i;
- int s;
-
- save_and_cli(flags);
- simple_lock(&ctrl_if_lock);
-
- if ( TX_FULL(ctrl_if) )
- {
- simple_unlock(&ctrl_if_lock);
- restore_flags(flags);
- s = splhigh();
- if ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod )
- __ctrl_if_tx_tasklet(0);
- splx(s);
- return EAGAIN;
- }
-
- msg->id = 0xFF;
- if ( hnd != NULL )
- {
- for ( i = 0; ctrl_if_txmsg_id_mapping[i].fn != NULL; i++ )
- continue;
- ctrl_if_txmsg_id_mapping[i].fn = hnd;
- ctrl_if_txmsg_id_mapping[i].id = id;
- msg->id = i;
- }
-
- DPRINTK("Tx-Req %u/%u :: %d/%d\n",
- ctrl_if->tx_req_prod,
- ctrl_if_tx_resp_cons,
- msg->type, msg->subtype);
-
- memcpy(&ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if->tx_req_prod)],
- msg, sizeof(*msg));
- __insn_barrier(); /* Write the message before letting the controller peek
at it. */
- ctrl_if->tx_req_prod++;
-
- simple_unlock(&ctrl_if_lock);
- restore_flags(flags);
-
- ctrl_if_notify_controller();
-
- return 0;
-}
-
-int
-ctrl_if_send_message_block(
- ctrl_msg_t *msg,
- ctrl_msg_handler_t hnd,
- unsigned long id,
- long wait_state)
-{
- int rc;
-
- while ((rc = ctrl_if_send_message_noblock(msg, hnd, id)) == EAGAIN) {
- /* XXXcl possible race -> add a lock and ltsleep */
-#if 1
- HYPERVISOR_yield();
-#else
- rc = tsleep((caddr_t) &ctrl_if_tx_wait, PUSER | PCATCH,
- "ctrl_if", 0);
- if (rc)
- break;
-#endif
- }
-
- return rc;
-}
-
-/* Allow a reponse-callback handler to find context of a blocked requester. */
-struct rsp_wait {
- ctrl_msg_t *msg; /* Buffer for the response message. */
- struct task_struct *task; /* The task that is blocked on the response. */
- int done; /* Indicate to 'task' that response is rcv'ed. */
-};
-
-static void __ctrl_if_get_response(ctrl_msg_t *msg, unsigned long id)
-{
- struct rsp_wait *wait = (struct rsp_wait *)id;
-
- memcpy(wait->msg, msg, sizeof(*msg));
- __insn_barrier();
- wait->done = 1;
-
- wakeup(wait);
-}
-
-int
-ctrl_if_send_message_and_get_response(
- ctrl_msg_t *msg,
- ctrl_msg_t *rmsg,
- long wait_state)
-{
- struct rsp_wait wait;
- int rc;
-
- wait.msg = rmsg;
- wait.done = 0;
-
- if ( (rc = ctrl_if_send_message_block(msg, __ctrl_if_get_response,
- (unsigned long)&wait,
- wait_state)) != 0 )
- return rc;
-
- for ( ; ; )
- {
- if ( wait.done )
- break;
- tsleep((caddr_t)&wait, PUSER | PCATCH, "ctrl_if", 0);
- }
-
- return 0;
-}
-
-#ifdef notyet
-int
-ctrl_if_enqueue_space_callback(
- struct tq_struct *task)
-{
- control_if_t *ctrl_if = get_ctrl_if();
-
- /* Fast path. */
- if ( !TX_FULL(ctrl_if) )
- return 0;
-
- (void)queue_task(task, &ctrl_if_tx_tq);
-
- /*
- * We may race execution of the task queue, so return re-checked status. If
- * the task is not executed despite the ring being non-full then we will
- * certainly return 'not full'.
- */
- __insn_barrier();
- return TX_FULL(ctrl_if);
-}
-#endif
-
-void
-ctrl_if_send_response(
- ctrl_msg_t *msg)
-{
- control_if_t *ctrl_if = get_ctrl_if();
- unsigned long flags;
- ctrl_msg_t *dmsg;
-
- /*
- * NB. The response may the original request message, modified in-place.
- * In this situation we may have src==dst, so no copying is required.
- */
- save_and_cli(flags);
- simple_lock(&ctrl_if_lock);
-
- DPRINTK("Tx-Rsp %u :: %d/%d\n",
- ctrl_if->rx_resp_prod,
- msg->type, msg->subtype);
-
- dmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if->rx_resp_prod)];
- if ( dmsg != msg )
- memcpy(dmsg, msg, sizeof(*msg));
-
- __insn_barrier(); /* Write the message before letting the controller peek
at it. */
- ctrl_if->rx_resp_prod++;
-
- simple_unlock(&ctrl_if_lock);
- restore_flags(flags);
-
- ctrl_if_notify_controller();
-}
-
-int
-ctrl_if_register_receiver(
- uint8_t type,
- ctrl_msg_handler_t hnd,
- unsigned int flags)
-{
- unsigned long _flags;
- int inuse;
-
- save_and_cli(_flags);
- simple_lock(&ctrl_if_lock);
-
- inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler);
-
- if ( inuse )
- {
- printf("Receiver %p already established for control "
- "messages of type %d.\n", ctrl_if_rxmsg_handler[type], type);
- }
- else
- {
- ctrl_if_rxmsg_handler[type] = hnd;
- x86_atomic_clear_bit((unsigned long *)&ctrl_if_rxmsg_blocking_context,
type);
- if ( flags == CALLBACK_IN_BLOCKING_CONTEXT )
- {
- x86_atomic_set_bit((unsigned long
*)&ctrl_if_rxmsg_blocking_context, type);
-#if 0
- if ( !safe_to_schedule_task )
- BUG();
-#endif
- }
- }
-
- simple_unlock(&ctrl_if_lock);
- restore_flags(_flags);
-
- return !inuse;
-}
-
-void
-ctrl_if_unregister_receiver(
- uint8_t type,
- ctrl_msg_handler_t hnd)
-{
- unsigned long flags;
-
- save_and_cli(flags);
- simple_lock(&ctrl_if_lock);
-
- if ( ctrl_if_rxmsg_handler[type] != hnd )
- printf("Receiver %p is not registered for control "
- "messages of type %d.\n", hnd, type);
- else
- ctrl_if_rxmsg_handler[type] = ctrl_if_rxmsg_default_handler;
-
- simple_unlock(&ctrl_if_lock);
- restore_flags(flags);
-
- /* Ensure that @hnd will not be executed after this function returns. */
- if (ctrl_if_softintr)
- softintr_schedule(ctrl_if_softintr);
-}
-
-static void
-ctrl_if_softintr_handler(void *arg)
-{
-
- if ( ctrl_if_rxmsg_deferred_cons != ctrl_if_rxmsg_deferred_prod )
- __ctrl_if_rxmsg_deferred(NULL);
-}
-
-#ifdef notyet
-void ctrl_if_suspend(void)
-{
- free_irq(ctrl_if_irq, NULL);
- unbind_evtchn_from_irq(ctrl_if_evtchn);
-}
-#endif
-
-void ctrl_if_resume(void)
-{
- control_if_t *ctrl_if = get_ctrl_if();
-
- if ( xen_start_info.flags & SIF_INITDOMAIN )
- {
- /*
- * The initial domain must create its own domain-controller link.
- * The controller is probably not running at this point, but will
- * pick up its end of the event channel from
- */
- evtchn_op_t op;
- op.cmd = EVTCHNOP_bind_interdomain;
- op.u.bind_interdomain.dom1 = DOMID_SELF;
- op.u.bind_interdomain.dom2 = DOMID_SELF;
- if ( HYPERVISOR_event_channel_op(&op) != 0 )
- panic("EVTCHNOP_bind_interdomain");
- xen_start_info.domain_controller_evtchn = op.u.bind_interdomain.port1;
- initdom_ctrlif_domcontroller_port = op.u.bind_interdomain.port2;
- }
-
- /* Sync up with shared indexes. */
- ctrl_if_tx_resp_cons = ctrl_if->tx_resp_prod;
- ctrl_if_rx_req_cons = ctrl_if->rx_resp_prod;
-
- ctrl_if_evtchn = xen_start_info.domain_controller_evtchn;
- ctrl_if_irq = bind_evtchn_to_irq(ctrl_if_evtchn);
-
- event_set_handler(ctrl_if_irq, &ctrl_if_interrupt, NULL, IPL_HIGH);
- hypervisor_enable_irq(ctrl_if_irq);
-}
-
-void ctrl_if_early_init(void)
-{
-
- simple_lock_init(&ctrl_if_lock);
-
- ctrl_if_evtchn = xen_start_info.domain_controller_evtchn;
-}
-
-void ctrl_if_init(void)
-{
- int i;
-
- for ( i = 0; i < 256; i++ )
- ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler;
-
- if (ctrl_if_evtchn == -1)
- ctrl_if_early_init();
-
- ctrl_if_softintr = softintr_establish(IPL_SOFTNET,
- ctrl_if_softintr_handler, NULL);
-
- ctrl_if_resume();
-}
-
-
-#if 0
-/* This is called after it is safe to call schedule_task(). */
-static int __init ctrl_if_late_setup(void)
-{
- safe_to_schedule_task = 1;
- return 0;
-}
-__initcall(ctrl_if_late_setup);
-#endif
-
-
-/*
- * !! The following are DANGEROUS FUNCTIONS !!
- * Use with care [for example, see xencons_force_flush()].
- */
-
-int ctrl_if_transmitter_empty(void)
-{
- return (get_ctrl_if()->tx_req_prod == ctrl_if_tx_resp_cons);
-}
-
-void ctrl_if_discard_responses(void)
-{
- ctrl_if_tx_resp_cons = get_ctrl_if()->tx_resp_prod;
-}
-
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,367 +0,0 @@
-/* $NetBSD$ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004, K A Fraser.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/malloc.h>
-#include <sys/reboot.h>
-
-#include <uvm/uvm.h>
-
-#include <machine/intrdefs.h>
-
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-#include <machine/evtchn.h>
-#include <machine/ctrl_if.h>
-#include <machine/xenfunc.h>
-
-struct pic xenev_pic = {
- .pic_dev = {
- .dv_xname = "xen_fakepic",
- },
- .pic_type = PIC_XEN,
- .pic_lock = __SIMPLELOCK_UNLOCKED,
-};
-
-/*
- * This lock protects updates to the following mapping and reference-count
- * arrays. The lock does not need to be acquired to read the mapping tables.
- */
-static struct simplelock irq_mapping_update_lock = SIMPLELOCK_INITIALIZER;
-
-/* IRQ <-> event-channel mappings. */
-int evtchn_to_irq[NR_EVENT_CHANNELS];
-int irq_to_evtchn[NR_IRQS];
-
-/* IRQ <-> VIRQ mapping. */
-static int virq_to_irq[NR_VIRQS];
-
-/* Reference counts for bindings to IRQs. */
-static int irq_bindcount[NR_IRQS];
-
-#if 0
-static int xen_die_handler(void *);
-#endif
-static int xen_debug_handler(void *);
-
-void
-events_default_setup()
-{
- int i;
-
- /* No VIRQ -> IRQ mappings. */
- for (i = 0; i < NR_VIRQS; i++)
- virq_to_irq[i] = -1;
-
- /* No event-channel -> IRQ mappings. */
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- evtchn_to_irq[i] = -1;
- hypervisor_mask_event(i); /* No event channels are 'live' right
now. */
- }
-
- /* No IRQ -> event-channel mappings. */
- for (i = 0; i < NR_IRQS; i++)
- irq_to_evtchn[i] = -1;
-}
-
-void
-init_events()
-{
- int irq;
-
- irq = bind_virq_to_irq(VIRQ_DEBUG);
- event_set_handler(irq, &xen_debug_handler, NULL, IPL_DEBUG);
- hypervisor_enable_irq(irq);
-
- /* This needs to be done early, but after the IRQ subsystem is
- * alive. */
- ctrl_if_init();
-
- enable_intr(); /* at long last... */
-}
-
-unsigned int
-do_event(int irq, struct intrframe *regs)
-{
- struct cpu_info *ci;
- int ilevel;
- struct intrhand *ih;
- int (*ih_fun)(void *, void *);
- extern struct uvmexp uvmexp;
-
- if (irq >= NR_IRQS) {
-#ifdef DIAGNOSTIC
- printf("event irq number %d > NR_IRQS\n", irq);
-#endif
- return ENOENT;
- }
-
- if (0 && irq == 4) {
- ci = &cpu_info_primary;
- printf("do_event %d/%d called, ilevel %d\n", irq,
- irq_to_evtchn[irq], ci->ci_ilevel);
- }
-
- ci = &cpu_info_primary;
-
- hypervisor_acknowledge_irq(irq);
- if (ci->ci_isources[irq] == NULL) {
- hypervisor_enable_irq(irq);
- return 0;
- }
- ilevel = ci->ci_ilevel;
- if (ci->ci_isources[irq]->is_maxlevel <= ilevel) {
- ci->ci_ipending |= 1 << irq;
- /* leave masked */
- return 0;
- }
- uvmexp.intrs++;
- ci->ci_isources[irq]->is_evcnt.ev_count++;
- ci->ci_ilevel = ci->ci_isources[irq]->is_maxlevel;
- /* sti */
- ci->ci_idepth++;
-#ifdef MULTIPROCESSOR
- x86_intlock(regs);
-#endif
- ih = ci->ci_isources[irq]->is_handlers;
- while (ih != NULL) {
- if (ih->ih_level <= ilevel) {
-#ifdef MULTIPROCESSOR
- x86_intunlock(regs);
-#endif
- ci->ci_ipending |= 1 << irq;
- /* leave masked */
- ci->ci_idepth--;
- splx(ilevel);
- return 0;
- }
- ci->ci_ilevel = ih->ih_level;
- ih_fun = (void *)ih->ih_fun;
- ih_fun(ih->ih_arg, regs);
- ih = ih->ih_next;
- }
-#ifdef MULTIPROCESSOR
- x86_intunlock(regs);
-#endif
- hypervisor_enable_irq(irq);
- ci->ci_idepth--;
- splx(ilevel);
-
- if (0 && irq == 4)
- printf("do_event %d done, ipending %08x\n", irq,
- ci->ci_ipending);
-
- return 0;
-}
-
-static int
-find_unbound_irq(void)
-{
- int irq;
-
- for (irq = 0; irq < NR_IRQS; irq++)
- if (irq_bindcount[irq] == 0)
- break;
-
- if (irq == NR_IRQS)
- panic("No available IRQ to bind to: increase NR_IRQS!\n");
-
- return irq;
-}
-
-int
-bind_virq_to_irq(int virq)
-{
- evtchn_op_t op;
- int evtchn, irq;
-
- simple_lock(&irq_mapping_update_lock);
-
- irq = virq_to_irq[virq];
- if (irq == -1) {
- op.cmd = EVTCHNOP_bind_virq;
- op.u.bind_virq.virq = virq;
- if (HYPERVISOR_event_channel_op(&op) != 0)
- panic("Failed to bind virtual IRQ %d\n", virq);
- evtchn = op.u.bind_virq.port;
-
- irq = find_unbound_irq();
- evtchn_to_irq[evtchn] = irq;
- irq_to_evtchn[irq] = evtchn;
-
- virq_to_irq[virq] = irq;
- }
-
- irq_bindcount[irq]++;
-
- simple_unlock(&irq_mapping_update_lock);
-
- return irq;
-}
-
-void
-unbind_virq_from_irq(int virq)
-{
- evtchn_op_t op;
- int irq = virq_to_irq[virq];
- int evtchn = irq_to_evtchn[irq];
-
- simple_lock(&irq_mapping_update_lock);
-
- irq_bindcount[irq]--;
- if (irq_bindcount[irq] == 0) {
- op.cmd = EVTCHNOP_close;
- op.u.close.dom = DOMID_SELF;
- op.u.close.port = evtchn;
- if (HYPERVISOR_event_channel_op(&op) != 0)
- panic("Failed to unbind virtual IRQ %d\n", virq);
-
- evtchn_to_irq[evtchn] = -1;
- irq_to_evtchn[irq] = -1;
- virq_to_irq[virq] = -1;
- }
-
- simple_unlock(&irq_mapping_update_lock);
-}
-
-int bind_evtchn_to_irq(int evtchn)
-{
- int irq;
-
- simple_lock(&irq_mapping_update_lock);
-
- irq = evtchn_to_irq[evtchn];
- if (irq == -1) {
- irq = find_unbound_irq();
- evtchn_to_irq[evtchn] = irq;
- irq_to_evtchn[irq] = evtchn;
- }
-
- irq_bindcount[irq]++;
-
- simple_unlock(&irq_mapping_update_lock);
-
- return irq;
-}
-
-int
-event_set_handler(int irq, ev_handler_t handler, void *arg, int level)
-{
- struct intrsource *isp;
- struct intrhand *ih;
- struct cpu_info *ci;
-
- if (irq >= NR_IRQS) {
-#ifdef DIAGNOSTIC
- printf("irq number %d > NR_IRQS\n", irq);
-#endif
- return ENOENT;
- }
-
-#if 0
- printf("event_set_handler irq %d/%d handler %p level %d\n", irq,
- irq_to_evtchn[irq], handler, level);
-#endif
- /* XXXcl handle already bound irq */
-
- MALLOC(isp, struct intrsource *, sizeof (struct intrsource), M_DEVBUF,
- M_WAITOK|M_ZERO);
- if (isp == NULL)
- panic("can't allocate fixed interrupt source");
- MALLOC(ih, struct intrhand *, sizeof (struct intrhand), M_DEVBUF,
- M_WAITOK|M_ZERO);
- if (ih == NULL)
- panic("can't allocate fixed interrupt source");
-
- ci = &cpu_info_primary;
-
- isp->is_recurse = xenev_stubs[irq].ist_recurse;
- isp->is_resume = xenev_stubs[irq].ist_resume;
- ih->ih_level = level;
- ih->ih_fun = handler;
- ih->ih_arg = arg;
- ih->ih_next = NULL;
- isp->is_handlers = ih;
- isp->is_pic = &xenev_pic;
- ci->ci_isources[irq] = isp;
- evcnt_attach_dynamic(&isp->is_evcnt, EVCNT_TYPE_INTR, NULL,
- ci->ci_dev->dv_xname, "xenev");
-
- intr_calculatemasks(ci);
-
- return 0;
-}
-
-void hypervisor_enable_irq(unsigned int irq)
-{
-
- hypervisor_unmask_event(irq_to_evtchn[irq]);
-}
-
-void hypervisor_disable_irq(unsigned int irq)
-{
-
- hypervisor_mask_event(irq_to_evtchn[irq]);
-}
-
-void hypervisor_acknowledge_irq(unsigned int irq)
-{
-
- hypervisor_mask_event(irq_to_evtchn[irq]);
- hypervisor_clear_event(irq_to_evtchn[irq]);
-}
-
-#if 0
-static int
-xen_die_handler(void *arg)
-{
- printf("hypervisor: DIE event received...\n");
- cpu_reboot(0, NULL);
- /* NOTREACHED */
- return 0;
-}
-#endif
-
-static int
-xen_debug_handler(void *arg)
-{
- printf("debug event\n");
- return 0;
-}
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/xen/hypervisor.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/hypervisor.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,242 +0,0 @@
-/* $NetBSD: hypervisor.c,v 1.7.2.1 2004/05/22 15:58:54 he Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: hypervisor.c,v 1.7.2.1 2004/05/22 15:58:54 he Exp
$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/device.h>
-
-#include "xencons.h"
-#include "xennet.h"
-#include "xbd.h"
-#include "xenkbc.h"
-#include "vga_xen.h"
-#include "npx.h"
-
-#include "opt_xen.h"
-
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-#include <machine/evtchn.h>
-
-#include <sys/dirent.h>
-#include <sys/stat.h>
-#include <sys/tree.h>
-#include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h>
-#include <miscfs/kernfs/kernfs.h>
-#include <machine/kernfs_machdep.h>
-
-#if NXENNET > 0
-#include <net/if.h>
-#include <net/if_ether.h>
-#include <net/if_media.h>
-#include <machine/if_xennetvar.h>
-#endif
-
-#if NXBD > 0
-#include <sys/buf.h>
-#include <sys/disk.h>
-#include <dev/dkvar.h>
-#include <machine/xbdvar.h>
-#endif
-
-#if NXENKBC > 0
-#include <dev/pckbport/pckbportvar.h>
-#include <machine/xenkbcvar.h>
-#endif
-
-#if NVGA_XEN > 0
-#include <machine/bus.h>
-#include <machine/vga_xenvar.h>
-#endif
-
-int hypervisor_match(struct device *, struct cfdata *, void *);
-void hypervisor_attach(struct device *, struct device *, void *);
-
-CFATTACH_DECL(hypervisor, sizeof(struct device),
- hypervisor_match, hypervisor_attach, NULL, NULL);
-
-int hypervisor_print(void *, const char *);
-
-union hypervisor_attach_cookie {
- const char *hac_device; /* first elem of all */
-#if NXENKBC > 0
- struct xenkbc_attach_args hac_xenkbc;
-#endif
-#if NVGA_XEN > 0
- struct xen_vga_attach_args hac_vga_xen;
-#endif
-#if NXENCONS > 0
- struct xencons_attach_args hac_xencons;
-#endif
-#if NXENNET > 0
- struct xennet_attach_args hac_xennet;
-#endif
-#if NXBD > 0
- struct xbd_attach_args hac_xbd;
-#endif
-#if NNPX > 0
- struct xen_npx_attach_args hac_xennpx;
-#endif
-};
-
-
-/*
- * Probe for the hypervisor; always succeeds.
- */
-int
-hypervisor_match(parent, match, aux)
- struct device *parent;
- struct cfdata *match;
- void *aux;
-{
- struct hypervisor_attach_args *haa = aux;
-
- if (strcmp(haa->haa_busname, "hypervisor") == 0)
- return 1;
- return 0;
-}
-
-static void
-scan_finish(struct device *parent)
-{
-
-#if NXENNET > 0
- xennet_scan_finish(parent);
-#endif
-#if NXBD > 0
- xbd_scan_finish(parent);
-#endif
-}
-
-/*
- * Attach the hypervisor.
- */
-void
-hypervisor_attach(parent, self, aux)
- struct device *parent, *self;
- void *aux;
-{
- union hypervisor_attach_cookie hac;
-
- printf("\n");
-
- init_events();
-
-#if NXENKBC > 0
- hac.hac_xenkbc.xa_device = "xenkbc";
- config_found(self, &hac.hac_xenkbc, hypervisor_print);
-#endif
-
-#if NVGA_XEN > 0
- hac.hac_vga_xen.xa_device = "vga_xen";
- hac.hac_vga_xen.xa_iot = X86_BUS_SPACE_IO;
- hac.hac_vga_xen.xa_memt = X86_BUS_SPACE_MEM;
- config_found(self, &hac.hac_vga_xen, hypervisor_print);
-#endif
-
-#if NXENCONS > 0
- hac.hac_xencons.xa_device = "xencons";
- config_found(self, &hac.hac_xencons, hypervisor_print);
-#endif
-#if NXENNET > 0
- hac.hac_xennet.xa_device = "xennet";
- xennet_scan(self, &hac.hac_xennet, hypervisor_print);
-#endif
-#if NXBD > 0
- hac.hac_xbd.xa_device = "xbd";
- xbd_scan(self, &hac.hac_xbd, hypervisor_print);
-#endif
-#if NNPX > 0
- hac.hac_xennpx.xa_device = "npx";
- config_found(self, &hac.hac_xennpx, hypervisor_print);
-#endif
- xenkernfs_init();
-#ifdef DOM0OPS
- if (xen_start_info.flags & SIF_PRIVILEGED) {
- xenprivcmd_init();
- xenmachmem_init();
- xenvfr_init();
- }
-#endif
-#if NXENNET > 0 || NXBD > 0
- config_interrupts(self, scan_finish);
-#endif
-}
-
-int
-hypervisor_print(aux, parent)
- void *aux;
- const char *parent;
-{
- union hypervisor_attach_cookie *hac = aux;
-
- if (parent)
- aprint_normal("%s at %s", hac->hac_device, parent);
- return (UNCONF);
-}
-
-void
-hypervisor_notify_via_evtchn(unsigned int port)
-{
- evtchn_op_t op;
-
- op.cmd = EVTCHNOP_send;
- op.u.send.local_port = port;
- (void)HYPERVISOR_event_channel_op(&op);
-}
-
-#define READ_MODE (S_IRUSR|S_IRGRP|S_IROTH)
-#define DIR_MODE (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
-
-kernfs_parentdir_t *kernxen_pkt;
-
-void
-xenkernfs_init()
-{
- kernfs_entry_t *dkt;
-
- KERNFS_ALLOCENTRY(dkt, M_TEMP, M_WAITOK);
- KERNFS_INITENTRY(dkt, DT_DIR, "xen", NULL, KFSsubdir, VDIR, DIR_MODE);
- kernfs_addentry(NULL, dkt);
- kernxen_pkt = KERNFS_ENTOPARENTDIR(dkt);
-
- KERNFS_ALLOCENTRY(dkt, M_TEMP, M_WAITOK);
- KERNFS_INITENTRY(dkt, DT_REG, "cmdline", xen_start_info.cmd_line,
- KFSstring, VREG, READ_MODE);
- kernfs_addentry(kernxen_pkt, dkt);
-}
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,1284 +0,0 @@
-/* $NetBSD: if_xennet.c,v 1.1.2.1 2004/05/22 15:58:29 he Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_xennet.c,v 1.1.2.1 2004/05/22 15:58:29 he Exp
$");
-
-#include "opt_inet.h"
-#include "rnd.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/mbuf.h>
-#include <sys/syslog.h>
-#include <sys/mount.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/device.h>
-#include <sys/ioctl.h>
-#include <sys/errno.h>
-#if NRND > 0
-#include <sys/rnd.h>
-#endif
-
-#include <net/if.h>
-#include <net/if_types.h>
-#include <net/if_dl.h>
-#include <net/if_ether.h>
-
-#ifdef mediacode
-#include <net/if_media.h>
-#endif
-
-#ifdef INET
-#include <netinet/in.h>
-#include <netinet/if_inarp.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-#include <netinet/ip.h>
-#endif
-
-#include <nfs/rpcv2.h>
-
-#include <nfs/nfsproto.h>
-#include <nfs/nfs.h>
-#include <nfs/nfsmount.h>
-#include <nfs/nfsdiskless.h>
-
-#include "bpfilter.h"
-#if NBPFILTER > 0
-#include <net/bpf.h>
-#include <net/bpfdesc.h>
-#endif
-
-#include <uvm/uvm_extern.h>
-#include <uvm/uvm_page.h>
-
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-#include <machine/evtchn.h>
-#include <machine/ctrl_if.h>
-
-#include <machine/if_xennetvar.h>
-
-#ifdef DEBUG
-#define XENNET_DEBUG
-#endif
-#if defined(XENNET_DEBUG) && !defined(DEBUG)
-#define DEBUG
-#endif
-/* #define XENNET_DEBUG_DUMP */
-
-#ifdef XENNET_DEBUG
-#define XEDB_FOLLOW 0x01
-#define XEDB_INIT 0x02
-#define XEDB_EVENT 0x04
-#define XEDB_MBUF 0x08
-#define XEDB_MEM 0x10
-int xennet_debug = 0x0;
-void printk(char *, ...);
-#define DPRINTF(x) if (xennet_debug) printk x;
-#define DPRINTFN(n,x) if (xennet_debug & (n)) printk x;
-#else
-#define DPRINTF(x)
-#define DPRINTFN(n,x)
-#endif
-#define PRINTF(x) printf x;
-
-#ifdef XENNET_DEBUG_DUMP
-static void xennet_hex_dump(unsigned char *, size_t, char *, int);
-#endif
-
-int xennet_match (struct device *, struct cfdata *, void *);
-void xennet_attach (struct device *, struct device *, void *);
-static void xennet_ctrlif_rx(ctrl_msg_t *, unsigned long);
-static int xennet_driver_count_connected(void);
-static void xennet_driver_status_change(netif_fe_driver_status_t *);
-static void xennet_interface_status_change(netif_fe_interface_status_t *);
-static void xennet_tx_mbuf_free(struct mbuf *, caddr_t, size_t, void *);
-static void xennet_rx_mbuf_free(struct mbuf *, caddr_t, size_t, void *);
-static int xen_network_handler(void *);
-static void network_tx_buf_gc(struct xennet_softc *);
-static void network_alloc_rx_buffers(struct xennet_softc *);
-static void network_alloc_tx_buffers(struct xennet_softc *);
-void xennet_init(struct xennet_softc *);
-void xennet_reset(struct xennet_softc *);
-#ifdef mediacode
-static int xennet_mediachange (struct ifnet *);
-static void xennet_mediastatus(struct ifnet *, struct ifmediareq *);
-#endif
-
-CFATTACH_DECL(xennet, sizeof(struct xennet_softc),
- xennet_match, xennet_attach, NULL, NULL);
-
-#define TX_MAX_ENTRIES (NETIF_TX_RING_SIZE - 2)
-#define RX_MAX_ENTRIES (NETIF_RX_RING_SIZE - 2)
-#define TX_ENTRIES 128
-#define RX_ENTRIES 128
-
-static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE];
-static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1];
-static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
-
-/** Network interface info. */
-struct xennet_ctrl {
- /** Number of interfaces. */
- int xc_interfaces;
- /** Number of connected interfaces. */
- int xc_connected;
- /** Error code. */
- int xc_err;
- /** Driver status. */
- int xc_up;
-
- cfprint_t xc_cfprint;
- struct device *xc_parent;
-};
-
-static struct xennet_ctrl netctrl = { -1, 0, 0 };
-
-#ifdef mediacode
-static int xennet_media[] = {
- IFM_ETHER|IFM_AUTO,
-};
-static int nxennet_media = (sizeof(xennet_media)/sizeof(xennet_media[0]));
-#endif
-
-
-static int
-xennet_wait_for_interfaces(void)
-{
-
- while (netctrl.xc_interfaces != netctrl.xc_connected)
- HYPERVISOR_yield();
- return 0;
-}
-
-int
-xennet_scan(struct device *self, struct xennet_attach_args *xneta,
- cfprint_t print)
-{
- ctrl_msg_t cmsg;
- netif_fe_driver_status_t st;
-
- if ((xen_start_info.flags & SIF_INITDOMAIN) ||
- (xen_start_info.flags & SIF_NET_BE_DOMAIN))
- return 0;
-
- netctrl.xc_parent = self;
- netctrl.xc_cfprint = print;
-
- printf("Initialising Xen virtual ethernet frontend driver.\n");
-
- (void)ctrl_if_register_receiver(CMSG_NETIF_FE, xennet_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
-
- /* Send a driver-UP notification to the domain controller. */
- cmsg.type = CMSG_NETIF_FE;
- cmsg.subtype = CMSG_NETIF_FE_DRIVER_STATUS;
- cmsg.length = sizeof(netif_fe_driver_status_t);
- st.status = NETIF_DRIVER_STATUS_UP;
- st.max_handle = 0;
- memcpy(cmsg.msg, &st, sizeof(st));
- ctrl_if_send_message_block(&cmsg, NULL, 0, 0);
-
- return 0;
-}
-
-void
-xennet_scan_finish(struct device *parent)
-{
- int err;
-
- err = xennet_wait_for_interfaces();
- if (err)
- ctrl_if_unregister_receiver(CMSG_NETIF_FE, xennet_ctrlif_rx);
-}
-
-int
-xennet_match(struct device *parent, struct cfdata *match, void *aux)
-{
- struct xennet_attach_args *xa = (struct xennet_attach_args *)aux;
-
- if (strcmp(xa->xa_device, "xennet") == 0)
- return 1;
- return 0;
-}
-
-void
-xennet_attach(struct device *parent, struct device *self, void *aux)
-{
- struct xennet_attach_args *xneta = (struct xennet_attach_args *)aux;
- struct xennet_softc *sc = (struct xennet_softc *)self;
- struct ifnet *ifp = &sc->sc_ethercom.ec_if;
- int idx;
-
- aprint_normal(": Xen Virtual Network Interface\n");
-
- sc->sc_ifno = xneta->xa_handle;
-
- /* Initialize ifnet structure. */
- memcpy(ifp->if_xname, sc->sc_dev.dv_xname, IFNAMSIZ);
- ifp->if_softc = sc;
- ifp->if_start = xennet_start;
- ifp->if_ioctl = xennet_ioctl;
- ifp->if_watchdog = xennet_watchdog;
- ifp->if_flags = IFF_BROADCAST | IFF_NOTRAILERS;
-
-#ifdef mediacode
- ifmedia_init(&sc->sc_media, 0, xennet_mediachange,
- xennet_mediastatus);
- for (idx = 0; idx < nxennet_media; idx++)
- ifmedia_add(&sc->sc_media, xennet_media[idx], 0, NULL);
- ifmedia_set(&sc->sc_media, xennet_media[0]);
-#endif
-
- for (idx = 0; idx < NETIF_TX_RING_SIZE; idx++)
- sc->sc_tx_bufa[idx].xb_next = idx + 1;
- for (idx = 0; idx < NETIF_RX_RING_SIZE; idx++)
- sc->sc_rx_bufa[idx].xb_next = idx + 1;
-}
-
-static struct xennet_softc *
-find_device(int handle)
-{
- struct device *dv;
- struct xennet_softc *xs = NULL;
-
- for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) {
- if (dv->dv_cfattach == NULL ||
- dv->dv_cfattach->ca_attach != xennet_attach)
- continue;
- xs = (struct xennet_softc *)dv;
- if (xs->sc_ifno == handle)
- break;
- }
- return dv ? xs : NULL;
-}
-
-static void
-xennet_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- int respond = 1;
-
- DPRINTFN(XEDB_EVENT, ("> ctrlif_rx=%d\n", msg->subtype));
- switch (msg->subtype) {
- case CMSG_NETIF_FE_INTERFACE_STATUS:
- if (msg->length != sizeof(netif_fe_interface_status_t))
- goto error;
- xennet_interface_status_change(
- (netif_fe_interface_status_t *)&msg->msg[0]);
- break;
-
- case CMSG_NETIF_FE_DRIVER_STATUS:
- if (msg->length != sizeof(netif_fe_driver_status_t))
- goto error;
- xennet_driver_status_change(
- (netif_fe_driver_status_t *)&msg->msg[0]);
- break;
-
- error:
- default:
- msg->length = 0;
- break;
- }
-
- if (respond)
- ctrl_if_send_response(msg);
-}
-
-static void
-xennet_driver_status_change(netif_fe_driver_status_t *status)
-{
-
- DPRINTFN(XEDB_EVENT, ("xennet_driver_status_change(%d)\n",
- status->status));
-
- netctrl.xc_up = status->status;
- xennet_driver_count_connected();
-}
-
-static int
-xennet_driver_count_connected(void)
-{
- struct device *dv;
- struct xennet_softc *xs = NULL;
-
- netctrl.xc_interfaces = netctrl.xc_connected = 0;
- for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) {
- if (dv->dv_cfattach == NULL ||
- dv->dv_cfattach->ca_attach != xennet_attach)
- continue;
- xs = (struct xennet_softc *)dv;
- netctrl.xc_interfaces++;
- if (xs->sc_backend_state == BEST_CONNECTED)
- netctrl.xc_connected++;
- }
-
- return netctrl.xc_connected;
-}
-
-static void
-xennet_interface_status_change(netif_fe_interface_status_t *status)
-{
- ctrl_msg_t cmsg;
- netif_fe_interface_connect_t up;
- struct xennet_softc *sc;
- struct ifnet *ifp;
- struct xennet_attach_args xneta;
-
- DPRINTFN(XEDB_EVENT,
("xennet_interface_status_change(%d,%d,%02x:%02x:%02x:%02x:%02x:%02x)\n",
- status->status,
- status->handle,
- status->mac[0], status->mac[1], status->mac[2],
- status->mac[3], status->mac[4], status->mac[5]));
-
- sc = find_device(status->handle);
- if (sc == NULL) {
- xneta.xa_device = "xennet";
- xneta.xa_handle = status->handle;
- config_found(netctrl.xc_parent, &xneta, netctrl.xc_cfprint);
- sc = find_device(status->handle);
- if (sc == NULL) {
- printf("Status change: invalid netif handle %u\n",
- status->handle);
- return;
- }
- }
- ifp = &sc->sc_ethercom.ec_if;
-
- DPRINTFN(XEDB_EVENT,
("xennet_interface_status_change(%d,%p,%02x:%02x:%02x:%02x:%02x:%02x)\n",
- status->handle, sc,
- status->mac[0], status->mac[1], status->mac[2],
- status->mac[3], status->mac[4], status->mac[5]));
-
- switch (status->status) {
- case NETIF_INTERFACE_STATUS_CLOSED:
- printf("Unexpected netif-CLOSED message in state %d\n",
- sc->sc_backend_state);
- break;
-
- case NETIF_INTERFACE_STATUS_DISCONNECTED:
-#if 0
- if (sc->sc_backend_state != BEST_CLOSED) {
- printk("Unexpected netif-DISCONNECTED message"
- " in state %d\n", sc->sc_backend_state);
- printk("Attempting to reconnect network interface\n");
-
- /* Begin interface recovery.
- *
- * NB. Whilst we're recovering, we turn the
- * carrier state off. We take measures to
- * ensure that this device isn't used for
- * anything. We also stop the queue for this
- * device. Various different approaches
- * (e.g. continuing to buffer packets) have
- * been tested but don't appear to improve the
- * overall impact on TCP connections.
- *
- * TODO: (MAW) Change the Xend<->Guest
- * protocol so that a recovery is initiated by
- * a special "RESET" message - disconnect
- * could just mean we're not allowed to use
- * this interface any more.
- */
-
- /* Stop old i/f to prevent errors whilst we
- * rebuild the state. */
- spin_lock_irq(&np->tx_lock);
- spin_lock(&np->rx_lock);
- netif_stop_queue(dev);
- np->backend_state = BEST_DISCONNECTED;
- spin_unlock(&np->rx_lock);
- spin_unlock_irq(&np->tx_lock);
-
- /* Free resources. */
- free_irq(np->irq, dev);
- unbind_evtchn_from_irq(np->evtchn);
- free_page((unsigned long)np->tx);
- free_page((unsigned long)np->rx);
- }
-#endif
-
- if (sc->sc_backend_state == BEST_CLOSED) {
- /* Move from CLOSED to DISCONNECTED state. */
- sc->sc_tx = (netif_tx_interface_t *)
- uvm_km_valloc_align(kernel_map, PAGE_SIZE,
PAGE_SIZE);
- if (sc->sc_tx == NULL)
- panic("netif: no tx va");
- sc->sc_rx = (netif_rx_interface_t *)
- uvm_km_valloc_align(kernel_map, PAGE_SIZE,
PAGE_SIZE);
- if (sc->sc_rx == NULL)
- panic("netif: no rx va");
- sc->sc_pg_tx = uvm_pagealloc(NULL, 0, NULL,
UVM_PGA_ZERO);
- if (sc->sc_pg_tx == NULL) {
- panic("netif: no tx pages");
- }
- pmap_kenter_pa((vaddr_t)sc->sc_tx,
VM_PAGE_TO_PHYS(sc->sc_pg_tx),
- VM_PROT_READ | VM_PROT_WRITE);
- sc->sc_pg_rx = uvm_pagealloc(NULL, 0, NULL,
UVM_PGA_ZERO);
- if (sc->sc_pg_rx == NULL) {
- panic("netif: no rx pages");
- }
- pmap_kenter_pa((vaddr_t)sc->sc_rx,
VM_PAGE_TO_PHYS(sc->sc_pg_rx),
- VM_PROT_READ | VM_PROT_WRITE);
- sc->sc_backend_state = BEST_DISCONNECTED;
- }
-
- /* Construct an interface-CONNECT message for the
- * domain controller. */
- cmsg.type = CMSG_NETIF_FE;
- cmsg.subtype = CMSG_NETIF_FE_INTERFACE_CONNECT;
- cmsg.length = sizeof(netif_fe_interface_connect_t);
- up.handle = status->handle;
- up.tx_shmem_frame = xpmap_ptom(VM_PAGE_TO_PHYS(sc->sc_pg_tx))
>> PAGE_SHIFT;
- up.rx_shmem_frame = xpmap_ptom(VM_PAGE_TO_PHYS(sc->sc_pg_rx))
>> PAGE_SHIFT;
- memcpy(cmsg.msg, &up, sizeof(up));
-
- /* Tell the controller to bring up the interface. */
- ctrl_if_send_message_block(&cmsg, NULL, 0, 0);
- break;
-
- case NETIF_INTERFACE_STATUS_CONNECTED:
- if (sc->sc_backend_state == BEST_CLOSED) {
- printf("Unexpected netif-CONNECTED message"
- " in state %d\n", sc->sc_backend_state);
- break;
- }
-
- memcpy(sc->sc_enaddr, status->mac, ETHER_ADDR_LEN);
-#if 0
- if (xen_start_info.flags & SIF_PRIVILEGED) {
- /* XXX for domain-0 change out ethernet address to be
- * different than the physical address since arp
- * replies from other domains will report the physical
- * address.
- */
- if (sc->sc_enaddr[0] != 0xaa)
- sc->sc_enaddr[0] = 0xaa;
- else
- sc->sc_enaddr[0] = 0xab;
- }
-#endif
-
- /* Recovery procedure: */
-
- /* Step 1: Reinitialise variables. */
- sc->sc_rx_resp_cons = sc->sc_tx_resp_cons = /* sc->sc_tx_full =
*/ 0;
- sc->sc_rx->event = sc->sc_tx->event = 1;
-
- /* Step 2: Rebuild the RX and TX ring contents. */
- network_alloc_rx_buffers(sc);
- SLIST_INIT(&sc->sc_tx_bufs);
- network_alloc_tx_buffers(sc);
-
- /* Step 3: All public and private state should now be
- * sane. Get ready to start sending and receiving
- * packets and give the driver domain a kick because
- * we've probably just requeued some packets.
- */
- sc->sc_backend_state = BEST_CONNECTED;
- __insn_barrier();
- hypervisor_notify_via_evtchn(status->evtchn);
- network_tx_buf_gc(sc);
-
- if_attach(ifp);
- ether_ifattach(ifp, sc->sc_enaddr);
-
- sc->sc_evtchn = status->evtchn;
- sc->sc_irq = bind_evtchn_to_irq(sc->sc_evtchn);
- event_set_handler(sc->sc_irq, &xen_network_handler, sc,
IPL_NET);
- hypervisor_enable_irq(sc->sc_irq);
- xennet_driver_count_connected();
-
- aprint_normal("%s: MAC address %s\n", sc->sc_dev.dv_xname,
- ether_sprintf(sc->sc_enaddr));
-
-#if NRND > 0
- rnd_attach_source(&sc->sc_rnd_source, sc->sc_dev.dv_xname,
- RND_TYPE_NET, 0);
-#endif
- break;
-
- default:
- printf("Status change to unknown value %d\n",
- status->status);
- break;
- }
- DPRINTFN(XEDB_EVENT, ("xennet_interface_status_change()\n"));
-}
-
-static void
-xennet_tx_mbuf_free(struct mbuf *m, caddr_t buf, size_t size, void *arg)
-{
- struct xennet_txbuf *txbuf = (struct xennet_txbuf *)arg;
-
- DPRINTFN(XEDB_MBUF, ("xennet_tx_mbuf_free %p pa %p\n", txbuf,
- (void *)txbuf->xt_pa));
- SLIST_INSERT_HEAD(&txbuf->xt_sc->sc_tx_bufs, txbuf, xt_next);
- pool_cache_put(&mbpool_cache, m);
-}
-
-static void
-xennet_rx_push_buffer(struct xennet_softc *sc, int id)
-{
- NETIF_RING_IDX ringidx;
- int nr_pfns;
-
- ringidx = sc->sc_rx->req_prod;
- nr_pfns = 0;
-
- DPRINTFN(XEDB_MEM, ("readding page va %p pa %p ma %p/%p to rx_ring "
- "at %d with id %d\n",
- (void *)sc->sc_rx_bufa[id].xb_rx.xbrx_va,
- (void *)sc->sc_rx_bufa[id].xb_rx.xbrx_pa,
- (void *)(PTE_BASE[x86_btop
- (sc->sc_rx_bufa[id].xb_rx.xbrx_va)] &
- PG_FRAME),
- (void *)xpmap_ptom(sc->sc_rx_bufa[id].xb_rx.xbrx_pa),
- ringidx, id));
-
- sc->sc_rx->ring[MASK_NETIF_RX_IDX(ringidx)].req.id = id;
-
- rx_pfn_array[nr_pfns] = xpmap_ptom(sc->sc_rx_bufa[id].xb_rx.xbrx_pa)
- >> PAGE_SHIFT;
-
- /* Remove this page from pseudo phys map before
- * passing back to Xen. */
- xpmap_phys_to_machine_mapping[(sc->sc_rx_bufa[id].xb_rx.xbrx_pa -
XPMAP_OFFSET) >> PAGE_SHIFT] =
- INVALID_P2M_ENTRY;
-
- rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping;
- rx_mcl[nr_pfns].args[0] = sc->sc_rx_bufa[id].xb_rx.xbrx_va;
- rx_mcl[nr_pfns].args[1] = 0;
- rx_mcl[nr_pfns].args[2] = 0;
-
- nr_pfns++;
-
- sc->sc_rx_bufs_to_notify++;
-
- ringidx++;
-
- /*
- * We may have allocated buffers which have entries
- * outstanding in the page update queue -- make sure we flush
- * those first!
- */
- xpq_flush_queue();
-
- /* After all PTEs have been zapped we blow away stale TLB entries. */
- rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
-
- /* Give away a batch of pages. */
- rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
- rx_mcl[nr_pfns].args[0] = MEMOP_decrease_reservation;
- rx_mcl[nr_pfns].args[1] = (unsigned long)rx_pfn_array;
- rx_mcl[nr_pfns].args[2] = (unsigned long)nr_pfns;
- rx_mcl[nr_pfns].args[3] = 0;
- rx_mcl[nr_pfns].args[4] = DOMID_SELF;
-
- /* Zap PTEs and give away pages in one big multicall. */
- (void)HYPERVISOR_multicall(rx_mcl, nr_pfns+1);
-
- /* Check return status of HYPERVISOR_dom_mem_op(). */
- if ( rx_mcl[nr_pfns].result != nr_pfns )
- panic("Unable to reduce memory reservation\n");
-
- /* Above is a suitable barrier to ensure backend will see requests. */
- sc->sc_rx->req_prod = ringidx;
-}
-
-static void
-xennet_rx_mbuf_free(struct mbuf *m, caddr_t buf, size_t size, void *arg)
-{
- union xennet_bufarray *xb = (union xennet_bufarray *)arg;
- struct xennet_softc *sc = xb->xb_rx.xbrx_sc;
- int id = (xb - sc->sc_rx_bufa);
-
- DPRINTFN(XEDB_MBUF, ("xennet_rx_mbuf_free id %d, mbuf %p, buf %p, "
- "size %d\n", id, m, buf, size));
-
- xennet_rx_push_buffer(sc, id);
-
- pool_cache_put(&mbpool_cache, m);
-}
-
-static int
-xen_network_handler(void *arg)
-{
- struct xennet_softc *sc = arg;
- struct ifnet *ifp = &sc->sc_ethercom.ec_if;
- netif_rx_response_t *rx;
- paddr_t pa;
- NETIF_RING_IDX ringidx;
- mmu_update_t *mmu = rx_mmu;
- multicall_entry_t *mcl = rx_mcl;
- struct mbuf *m;
-
- network_tx_buf_gc(sc);
-
-#if NRND > 0
- rnd_add_uint32(&sc->sc_rnd_source, sc->sc_rx_resp_cons);
-#endif
-
- again:
- for (ringidx = sc->sc_rx_resp_cons;
- ringidx != sc->sc_rx->resp_prod;
- ringidx++) {
- rx = &sc->sc_rx->ring[MASK_NETIF_RX_IDX(ringidx)].resp;
-
- if (rx->status < 0)
- panic("rx->status < 0");
- /* XXXcl check rx->status for error */
-
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (m == NULL) {
- printf("xennet: rx no mbuf\n");
- break;
- }
-
- pa = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_pa;
-
- DPRINTFN(XEDB_EVENT, ("rx event %d for id %d, size %d, "
- "status %d, ma %08lx, pa %08lx\n", ringidx,
- rx->id, rx->status, rx->status, rx->addr, pa));
-
- /* Remap the page. */
- mmu->ptr = (rx->addr & PG_FRAME) | MMU_MACHPHYS_UPDATE;
- mmu->val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
- mmu++;
- mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va;
- mcl->args[1] = (rx->addr & PG_FRAME) | PG_V|PG_KW;
- mcl->args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; // 0;
- mcl++;
-
- xpmap_phys_to_machine_mapping
- [(pa - XPMAP_OFFSET) >> PAGE_SHIFT] =
- rx->addr >> PAGE_SHIFT;
-
- /* Do all the remapping work, and M->P updates, in one
- * big hypercall. */
- if ((mcl - rx_mcl) != 0) {
- mcl->op = __HYPERVISOR_mmu_update;
- mcl->args[0] = (unsigned long)rx_mmu;
- mcl->args[1] = mmu - rx_mmu;
- mcl->args[2] = 0;
- mcl++;
- (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
- }
- if (0)
- printf("page mapped at va %08lx -> %08x/%08lx\n",
- sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va,
- PTE_BASE[x86_btop(sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va)],
- rx->addr);
- mmu = rx_mmu;
- mcl = rx_mcl;
-
- DPRINTFN(XEDB_MBUF, ("rx packet mbuf %p va %p pa %p/%p "
- "ma %p\n", m,
- (void *)sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va,
- (void *)(xpmap_mtop(PTE_BASE[x86_btop
-
(sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va)] & PG_FRAME)), (void *)pa,
- (void *)(PTE_BASE[x86_btop
- (sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va)] & PG_FRAME)));
-
- m->m_len = m->m_pkthdr.len = rx->status;
- m->m_pkthdr.rcvif = ifp;
- if (sc->sc_rx->req_prod != sc->sc_rx->resp_prod) {
- MEXTADD(m, (void *)(sc->sc_rx_bufa[rx->id].xb_rx.
- xbrx_va + (rx->addr & PAGE_MASK)), rx->status,
M_DEVBUF,
- xennet_rx_mbuf_free,
- &sc->sc_rx_bufa[rx->id]);
- } else {
- /*
- * This was our last receive buffer, allocate
- * memory, copy data and push the receive
- * buffer back to the hypervisor.
- */
- MEXTMALLOC(m, rx->status, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- printf("xennet: rx no mbuf 2\n");
- m_free(m);
- break;
- }
- memcpy(m->m_data, (void *)(sc->sc_rx_bufa[rx->id].
- xb_rx.xbrx_va + (rx->addr & PAGE_MASK)),
rx->status);
- xennet_rx_push_buffer(sc, rx->id);
- }
-
-#ifdef XENNET_DEBUG_DUMP
- xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "r",
rx->id);
-#endif
-
-#if NBPFILTER > 0
- /*
- * Pass packet to bpf if there is a listener.
- */
- if (ifp->if_bpf)
- bpf_mtap(ifp->if_bpf, m);
-#endif
-
- ifp->if_ipackets++;
-
- /* Pass the packet up. */
- (*ifp->if_input)(ifp, m);
- }
-
- sc->sc_rx_resp_cons = ringidx;
- sc->sc_rx->event = sc->sc_rx_resp_cons + 1;
-
- if (sc->sc_rx->resp_prod != ringidx)
- goto again;
-
- return 0;
-}
-
-static inline int
-get_bufarray_entry(union xennet_bufarray *a)
-{
- int idx;
-
- idx = a[0].xb_next;
- a[0].xb_next = a[idx].xb_next;
- return idx;
-}
-
-static inline void
-put_bufarray_entry(union xennet_bufarray *a, int idx)
-{
-
- a[idx].xb_next = a[0].xb_next;
- a[0].xb_next = idx;
-}
-
-static void
-network_tx_buf_gc(struct xennet_softc *sc)
-{
- struct ifnet *ifp = &sc->sc_ethercom.ec_if;
- NETIF_RING_IDX idx, prod;
-
- do {
- prod = sc->sc_tx->resp_prod;
-
- for (idx = sc->sc_tx_resp_cons; idx != prod; idx++) {
- DPRINTFN(XEDB_EVENT, ("tx event at pos %d, status: "
- "%d, id: %d, mbuf %p, buf %p\n", idx,
-
sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.status,
-
sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.id,
-
sc->sc_tx_bufa[sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.id].xb_tx.xbtx_m,
-
mtod(sc->sc_tx_bufa[sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.id].xb_tx.xbtx_m,
void *)));
-
m_freem(sc->sc_tx_bufa[sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.id].xb_tx.xbtx_m);
- put_bufarray_entry(sc->sc_tx_bufa,
- sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.id);
- sc->sc_tx_entries--; /* atomic */
- }
-
- sc->sc_tx_resp_cons = prod;
-
- /*
- * Set a new event, then check for race with update of
- * tx_cons.
- */
- sc->sc_tx->event = /* atomic */
- prod + (sc->sc_tx_entries >> 1) + 1;
- __insn_barrier();
- } while (prod != sc->sc_tx->resp_prod);
-
- if (sc->sc_tx->resp_prod == sc->sc_tx->req_prod)
- ifp->if_timer = 0;
- /* KDASSERT(sc->sc_net_idx->tx_req_prod == */
- /* TX_RING_ADD(sc->sc_net_idx->tx_resp_prod, sc->sc_tx_entries)); */
-}
-
-static void
-network_alloc_rx_buffers(struct xennet_softc *sc)
-{
- vaddr_t rxpages, va;
- paddr_t pa;
- struct vm_page *pg;
- int id, nr_pfns;
- NETIF_RING_IDX ringidx;
- int s;
-
- ringidx = sc->sc_rx->req_prod;
- if ((ringidx - sc->sc_rx_resp_cons) > (RX_MAX_ENTRIES / 2))
- return;
-
- nr_pfns = 0;
-
- rxpages = uvm_km_valloc_align(kernel_map, RX_ENTRIES * PAGE_SIZE,
- PAGE_SIZE);
-
- s = splnet();
- for (va = rxpages; va < rxpages + RX_ENTRIES * PAGE_SIZE;
- va += PAGE_SIZE) {
- pg = uvm_pagealloc(NULL, 0, NULL, 0);
- if (pg == NULL)
- panic("network_alloc_rx_buffers: no pages");
- pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
- VM_PROT_READ | VM_PROT_WRITE);
-
- id = get_bufarray_entry(sc->sc_rx_bufa);
- sc->sc_rx_bufa[id].xb_rx.xbrx_va = va;
- sc->sc_rx_bufa[id].xb_rx.xbrx_sc = sc;
-
- pa = VM_PAGE_TO_PHYS(pg);
- DPRINTFN(XEDB_MEM, ("adding page va %p pa %p/%p "
- "ma %p/%p to rx_ring at %d with id %d\n", (void *)va,
- (void *)(VM_PAGE_TO_PHYS(pg) & PG_FRAME), (void
*)xpmap_mtop(PTE_BASE[x86_btop(va)]),
- (void *)(PTE_BASE[x86_btop(va)] & PG_FRAME),
- (void *)xpmap_ptom(VM_PAGE_TO_PHYS(pg)),
- ringidx, id));
- sc->sc_rx_bufa[id].xb_rx.xbrx_pa = pa;
- sc->sc_rx->ring[MASK_NETIF_RX_IDX(ringidx)].req.id = id;
-
- rx_pfn_array[nr_pfns] = xpmap_ptom(pa) >> PAGE_SHIFT;
-
- /* Remove this page from pseudo phys map before
- * passing back to Xen. */
- xpmap_phys_to_machine_mapping[(pa - XPMAP_OFFSET) >>
PAGE_SHIFT] =
- INVALID_P2M_ENTRY;
-
- rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping;
- rx_mcl[nr_pfns].args[0] = va;
- rx_mcl[nr_pfns].args[1] = 0;
- rx_mcl[nr_pfns].args[2] = 0;
-
- nr_pfns++;
-
- sc->sc_rx_bufs_to_notify++;
-
- ringidx++;
- if ((ringidx - sc->sc_rx_resp_cons) == RX_MAX_ENTRIES)
- break;
- }
-
- if (nr_pfns == 0) {
- splx(s);
- return;
- }
-
- /*
- * We may have allocated buffers which have entries
- * outstanding in the page update queue -- make sure we flush
- * those first!
- */
- xpq_flush_queue();
-
- /* After all PTEs have been zapped we blow away stale TLB entries. */
- rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
-
- /* Give away a batch of pages. */
- rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
- rx_mcl[nr_pfns].args[0] = MEMOP_decrease_reservation;
- rx_mcl[nr_pfns].args[1] = (unsigned long)rx_pfn_array;
- rx_mcl[nr_pfns].args[2] = (unsigned long)nr_pfns;
- rx_mcl[nr_pfns].args[3] = 0;
- rx_mcl[nr_pfns].args[4] = DOMID_SELF;
-
- /* Zap PTEs and give away pages in one big multicall. */
- (void)HYPERVISOR_multicall(rx_mcl, nr_pfns+1);
-
- /* Check return status of HYPERVISOR_dom_mem_op(). */
- if (rx_mcl[nr_pfns].result != nr_pfns)
- panic("Unable to reduce memory reservation\n");
-
- /* Above is a suitable barrier to ensure backend will see requests. */
- sc->sc_rx->req_prod = ringidx;
-
- splx(s);
-
-}
-
-static void
-network_alloc_tx_buffers(struct xennet_softc *sc)
-{
- vaddr_t txpages, va;
- struct vm_page *pg;
- struct xennet_txbuf *txbuf;
- int i;
-
- txpages = uvm_km_valloc_align(kernel_map,
- (TX_ENTRIES / TXBUF_PER_PAGE) * PAGE_SIZE, PAGE_SIZE);
- for (va = txpages;
- va < txpages + (TX_ENTRIES / TXBUF_PER_PAGE) * PAGE_SIZE;
- va += PAGE_SIZE) {
- pg = uvm_pagealloc(NULL, 0, NULL, 0);
- if (pg == NULL)
- panic("network_alloc_tx_buffers: no pages");
- pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
- VM_PROT_READ | VM_PROT_WRITE);
-
- for (i = 0; i < TXBUF_PER_PAGE; i++) {
- txbuf = (struct xennet_txbuf *)
- (va + i * (PAGE_SIZE / TXBUF_PER_PAGE));
- txbuf->xt_sc = sc;
- txbuf->xt_pa = VM_PAGE_TO_PHYS(pg) +
- i * (PAGE_SIZE / TXBUF_PER_PAGE) +
- sizeof(struct xennet_txbuf);
- SLIST_INSERT_HEAD(&sc->sc_tx_bufs, txbuf, xt_next);
- }
- }
-}
-
-/*
- * Called at splnet.
- */
-void
-xennet_start(struct ifnet *ifp)
-{
- struct xennet_softc *sc = ifp->if_softc;
- struct mbuf *m, *new_m;
- struct xennet_txbuf *txbuf;
- netif_tx_request_t *txreq;
- NETIF_RING_IDX idx;
- paddr_t pa;
- int bufid;
-
- DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start()\n", sc->sc_dev.dv_xname));
-
-#ifdef DIAGNOSTIC
- IFQ_POLL(&ifp->if_snd, m);
- if (m == 0)
- panic("%s: No packet to start", sc->sc_dev.dv_xname);
-#endif
-
-#if NRND > 0
- rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx->req_prod);
-#endif
-
- if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
- return;
-
- idx = sc->sc_tx->req_prod;
- while (/*CONSTCOND*/1) {
-
- IFQ_POLL(&ifp->if_snd, m);
- if (m == NULL)
- break;
-
- switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) {
- case M_EXT|M_EXT_CLUSTER:
- pa = m->m_ext.ext_paddr +
- (m->m_data - m->m_ext.ext_buf);
- break;
- default:
- case 0:
- pa = m->m_paddr + M_BUFOFFSET(m) +
- (m->m_data - M_BUFADDR(m));
- break;
- }
-
- if (m->m_pkthdr.len != m->m_len ||
- (pa ^ (pa + m->m_pkthdr.len)) & PG_FRAME) {
- txbuf = SLIST_FIRST(&sc->sc_tx_bufs);
- if (txbuf == NULL) {
- // printf("xennet: no tx bufs\n");
- break;
- }
-
- MGETHDR(new_m, M_DONTWAIT, MT_DATA);
- if (new_m == NULL) {
- printf("xennet: no mbuf\n");
- break;
- }
-
- SLIST_REMOVE_HEAD(&sc->sc_tx_bufs, xt_next);
- IFQ_DEQUEUE(&ifp->if_snd, m);
-
- KASSERT(m->m_flags & M_PKTHDR);
- M_COPY_PKTHDR(new_m, m);
- m_copydata(m, 0, m->m_pkthdr.len, txbuf->xt_buf);
- MEXTADD(new_m, txbuf->xt_buf, m->m_pkthdr.len,
- M_DEVBUF, xennet_tx_mbuf_free, txbuf);
- new_m->m_ext.ext_paddr = txbuf->xt_pa;
- new_m->m_len = new_m->m_pkthdr.len = m->m_pkthdr.len;
-
- m_freem(m);
- m = new_m;
-
- pa = m->m_ext.ext_paddr +
- (m->m_data - m->m_ext.ext_buf);
- } else
- IFQ_DEQUEUE(&ifp->if_snd, m);
-
- bufid = get_bufarray_entry(sc->sc_tx_bufa);
- sc->sc_tx_bufa[bufid].xb_tx.xbtx_m = m;
-
- DPRINTFN(XEDB_MBUF, ("xennet_start id %d, mbuf %p, buf %p/%p, "
- "size %d\n", bufid, m, mtod(m, void *),
- (void *)pa, m->m_pkthdr.len));
-#ifdef XENNET_DEBUG_DUMP
- xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s", bufid);
-#endif
-
- txreq = &sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].req;
- txreq->id = bufid;
- txreq->addr = xpmap_ptom(pa);
- txreq->size = m->m_pkthdr.len;
-
- __insn_barrier();
- idx++;
- sc->sc_tx->req_prod = idx;
-
- sc->sc_tx_entries++; /* XXX atomic */
-
-#ifdef XENNET_DEBUG
- DPRINTFN(XEDB_MEM, ("packet addr %p/%p, physical %p/%p, "
- "m_paddr %p, len %d/%d\n", M_BUFADDR(m), mtod(m, void *),
- (void *)*kvtopte(mtod(m, vaddr_t)),
- (void *)xpmap_mtop(*kvtopte(mtod(m, vaddr_t))),
- (void *)m->m_paddr, m->m_pkthdr.len, m->m_len));
-#endif
-
-#if NBPFILTER > 0
- /*
- * Pass packet to bpf if there is a listener.
- */
- if (ifp->if_bpf)
- bpf_mtap(ifp->if_bpf, m);
-#endif
- }
-
- ifp->if_flags &= ~IFF_OACTIVE;
-
- network_tx_buf_gc(sc);
-
- __insn_barrier();
- if (sc->sc_tx->resp_prod != idx)
- hypervisor_notify_via_evtchn(sc->sc_evtchn);
-
- ifp->if_timer = 5;
-
- ifp->if_opackets++;
-
- DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n",
- sc->sc_dev.dv_xname));
-}
-
-int
-xennet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct xennet_softc *sc = ifp->if_softc;
- struct ifaddr *ifa = (struct ifaddr *)data;
-#ifdef mediacode
- struct ifreq *ifr = (struct ifreq *)data;
-#endif
- int s, error = 0;
-
- s = splnet();
-
- DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n", sc->sc_dev.dv_xname));
-
- switch(cmd) {
- case SIOCSIFADDR:
- DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() SIOCSIFADDR\n",
- sc->sc_dev.dv_xname));
- ifp->if_flags |= IFF_UP;
- switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
- case AF_INET:
- xennet_init(sc);
- arp_ifinit(ifp, ifa);
- break;
-#endif
- default:
- xennet_init(sc);
- break;
- }
- break;
-
- case SIOCSIFFLAGS:
- DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() SIOCSIFFLAGS\n",
- sc->sc_dev.dv_xname));
- break;
-
- case SIOCADDMULTI:
- case SIOCDELMULTI:
- DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() SIOC*MULTI\n",
- sc->sc_dev.dv_xname));
- break;
-
-#ifdef mediacode
- case SIOCGIFMEDIA:
- case SIOCSIFMEDIA:
- DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() SIOC*IFMEDIA\n",
- sc->sc_dev.dv_xname));
- error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
- break;
-#endif
-
- default:
- DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl(0x%lx) unknown cmd\n",
- sc->sc_dev.dv_xname, cmd));
- error = EINVAL;
- break;
- }
-
- splx(s);
-
- DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n",
- sc->sc_dev.dv_xname, error));
-
- return error;
-}
-
-void
-xennet_watchdog(struct ifnet *ifp)
-{
-
- panic("xennet_watchdog\n");
-}
-
-void
-xennet_init(struct xennet_softc *sc)
-{
- struct ifnet *ifp = &sc->sc_ethercom.ec_if;
-
- DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n", sc->sc_dev.dv_xname));
-
- if (ifp->if_flags & IFF_UP) {
- if ((ifp->if_flags & IFF_RUNNING) == 0)
- xennet_reset(sc);
-
- ifp->if_flags |= IFF_RUNNING;
- ifp->if_flags &= ~IFF_OACTIVE;
- ifp->if_timer = 0;
- } else {
- ifp->if_flags &= ~IFF_RUNNING;
- xennet_reset(sc);
- }
-}
-
-void
-xennet_reset(struct xennet_softc *sc)
-{
-
- DPRINTFN(XEDB_FOLLOW, ("%s: xennet_reset()\n", sc->sc_dev.dv_xname));
-}
-
-#ifdef mediacode
-/*
- * Media change callback.
- */
-static int
-xennet_mediachange(struct ifnet *ifp)
-{
- struct xennet_softc *sc = ifp->if_softc;
-
- switch IFM_SUBTYPE(sc->sc_media.ifm_media) {
- case IFM_AUTO:
- break;
- default:
- return (1);
- break;
- }
-
- return (0);
-}
-
-/*
- * Media status callback.
- */
-static void
-xennet_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr)
-{
- struct xennet_softc *sc = ifp->if_softc;
-
- if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_AUTO)
- ifmr->ifm_active = sc->sc_media.ifm_cur->ifm_data;
-
- ifmr->ifm_status &= ~IFM_AVALID;
-}
-#endif
-
-int
-xennet_bootstatic_callback(struct nfs_diskless *nd)
-{
- struct ifnet *ifp = nd->nd_ifp;
- struct xennet_softc *sc = (struct xennet_softc *)ifp->if_softc;
- union xen_cmdline_parseinfo xcp;
- struct sockaddr_in *sin;
-
- memset(&xcp, 0, sizeof(xcp.xcp_netinfo));
- xcp.xcp_netinfo.xi_ifno = sc->sc_ifno;
- xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host;
- xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp);
-
- nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]);
- nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]);
- nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]);
-
- sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr;
- memset((caddr_t)sin, 0, sizeof(*sin));
- sin->sin_len = sizeof(*sin);
- sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]);
-
- return (NFS_BOOTSTATIC_HAS_MYIP|NFS_BOOTSTATIC_HAS_GWIP|
- NFS_BOOTSTATIC_HAS_MASK|NFS_BOOTSTATIC_HAS_SERVADDR|
- NFS_BOOTSTATIC_HAS_SERVER);
-}
-
-
-#ifdef XENNET_DEBUG_DUMP
-#define XCHR(x) "0123456789abcdef"[(x) & 0xf]
-static void
-xennet_hex_dump(unsigned char *pkt, size_t len, char *type, int id)
-{
- size_t i, j;
-
- printf("pkt %p len %d/%x type %s id %d\n", pkt, len, len, type, id);
- printf("00000000 ");
- for(i=0; i<len; i++) {
- printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));
- if ((i+1) % 16 == 8)
- printf(" ");
- if ((i+1) % 16 == 0) {
- printf(" %c", '|');
- for(j=0; j<16; j++)
- printf("%c", pkt[i-15+j]>=32 &&
- pkt[i-15+j]<127?pkt[i-15+j]:'.');
- printf("%c\n%c%c%c%c%c%c%c%c ", '|',
- XCHR((i+1)>>28), XCHR((i+1)>>24),
- XCHR((i+1)>>20), XCHR((i+1)>>16),
- XCHR((i+1)>>12), XCHR((i+1)>>8),
- XCHR((i+1)>>4), XCHR(i+1));
- }
- }
- printf("\n");
-}
-#undef XCHR
-#endif
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/xen/xbd.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xbd.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,1689 +0,0 @@
-/* $NetBSD: xbd.c,v 1.9.2.1 2004/05/22 15:59:11 he Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbd.c,v 1.9.2.1 2004/05/22 15:59:11 he Exp $");
-
-#include "xbd.h"
-#include "rnd.h"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/errno.h>
-#include <sys/buf.h>
-#include <sys/malloc.h>
-#include <sys/pool.h>
-#include <sys/ioctl.h>
-#include <sys/device.h>
-#include <sys/disk.h>
-#include <sys/disklabel.h>
-#include <sys/fcntl.h>
-#include <sys/vnode.h>
-#include <sys/lock.h>
-#include <sys/conf.h>
-#include <sys/queue.h>
-#include <sys/stat.h>
-#include <sys/sysctl.h>
-#include <sys/kernel.h>
-#include <sys/kthread.h>
-
-#include <uvm/uvm.h>
-
-#if NRND > 0
-#include <sys/rnd.h>
-#endif
-
-#include <dev/dkvar.h>
-#include <machine/xbdvar.h>
-
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-#include <machine/evtchn.h>
-#include <machine/ctrl_if.h>
-
-
-static void control_send(blkif_request_t *, blkif_response_t *);
-static void send_interface_connect(void);
-
-static void xbd_attach(struct device *, struct device *, void *);
-static int xbd_detach(struct device *, int);
-
-#if NXBD > 0
-int xbd_match(struct device *, struct cfdata *, void *);
-CFATTACH_DECL(xbd, sizeof(struct xbd_softc),
- xbd_match, xbd_attach, xbd_detach, NULL);
-
-extern struct cfdriver xbd_cd;
-#endif
-
-#if NWD > 0
-int xbd_wd_match(struct device *, struct cfdata *, void *);
-CFATTACH_DECL(wd, sizeof(struct xbd_softc),
- xbd_wd_match, xbd_attach, xbd_detach, NULL);
-
-extern struct cfdriver wd_cd;
-#endif
-
-#if NSD > 0
-int xbd_sd_match(struct device *, struct cfdata *, void *);
-CFATTACH_DECL(sd, sizeof(struct xbd_softc),
- xbd_sd_match, xbd_attach, xbd_detach, NULL);
-
-extern struct cfdriver sd_cd;
-#endif
-
-#if NCD > 0
-int xbd_cd_match(struct device *, struct cfdata *, void *);
-CFATTACH_DECL(cd, sizeof(struct xbd_softc),
- xbd_cd_match, xbd_attach, xbd_detach, NULL);
-
-extern struct cfdriver cd_cd;
-#endif
-
-
-dev_type_open(xbdopen);
-dev_type_close(xbdclose);
-dev_type_read(xbdread);
-dev_type_write(xbdwrite);
-dev_type_ioctl(xbdioctl);
-dev_type_ioctl(xbdioctl_cdev);
-dev_type_strategy(xbdstrategy);
-dev_type_dump(xbddump);
-dev_type_size(xbdsize);
-
-#if NXBD > 0
-const struct bdevsw xbd_bdevsw = {
- xbdopen, xbdclose, xbdstrategy, xbdioctl,
- xbddump, xbdsize, D_DISK
-};
-
-const struct cdevsw xbd_cdevsw = {
- xbdopen, xbdclose, xbdread, xbdwrite, xbdioctl_cdev,
- nostop, notty, nopoll, nommap, nokqfilter, D_DISK
-};
-
-static dev_t xbd_major;
-#endif
-
-#if NWD > 0
-const struct bdevsw wd_bdevsw = {
- xbdopen, xbdclose, xbdstrategy, xbdioctl,
- xbddump, xbdsize, D_DISK
-};
-
-const struct cdevsw wd_cdevsw = {
- xbdopen, xbdclose, xbdread, xbdwrite, xbdioctl_cdev,
- nostop, notty, nopoll, nommap, nokqfilter, D_DISK
-};
-
-static dev_t xbd_wd_major;
-static dev_t xbd_wd_cdev_major;
-#endif
-
-#if NSD > 0
-const struct bdevsw sd_bdevsw = {
- xbdopen, xbdclose, xbdstrategy, xbdioctl,
- xbddump, xbdsize, D_DISK
-};
-
-const struct cdevsw sd_cdevsw = {
- xbdopen, xbdclose, xbdread, xbdwrite, xbdioctl_cdev,
- nostop, notty, nopoll, nommap, nokqfilter, D_DISK
-};
-
-static dev_t xbd_sd_major;
-static dev_t xbd_sd_cdev_major;
-#endif
-
-#if NCD > 0
-const struct bdevsw cd_bdevsw = {
- xbdopen, xbdclose, xbdstrategy, xbdioctl,
- xbddump, xbdsize, D_DISK
-};
-
-const struct cdevsw cd_cdevsw = {
- xbdopen, xbdclose, xbdread, xbdwrite, xbdioctl_cdev,
- nostop, notty, nopoll, nommap, nokqfilter, D_DISK
-};
-
-static dev_t xbd_cd_major;
-static dev_t xbd_cd_cdev_major;
-#endif
-
-
-static int xbdstart(struct dk_softc *, struct buf *);
-static int xbd_response_handler(void *);
-#if 0
-static void xbd_update_create_kthread(void *);
-static void xbd_update_kthread(void *);
-static int xbd_update_handler(void *);
-#endif
-
-static int xbdinit(struct xbd_softc *, vdisk_t *, struct dk_intf *);
-
-/* Pseudo-disk Interface */
-static struct dk_intf dkintf_esdi = {
- DTYPE_ESDI,
- "Xen Virtual ESDI",
- xbdopen,
- xbdclose,
- xbdstrategy,
- xbdstart,
-};
-#if NSD > 0
-static struct dk_intf dkintf_scsi = {
- DTYPE_SCSI,
- "Xen Virtual SCSI",
- xbdopen,
- xbdclose,
- xbdstrategy,
- xbdstart,
-};
-#endif
-
-#if NXBD > 0
-static struct xbd_attach_args xbd_ata = {
- .xa_device = "xbd",
- .xa_dkintf = &dkintf_esdi,
-};
-#endif
-
-#if NWD > 0
-static struct xbd_attach_args wd_ata = {
- .xa_device = "wd",
- .xa_dkintf = &dkintf_esdi,
-};
-#endif
-
-#if NSD > 0
-static struct xbd_attach_args sd_ata = {
- .xa_device = "sd",
- .xa_dkintf = &dkintf_scsi,
-};
-#endif
-
-#if NCD > 0
-static struct xbd_attach_args cd_ata = {
- .xa_device = "cd",
- .xa_dkintf = &dkintf_esdi,
-};
-#endif
-
-static struct sysctlnode *diskcookies;
-
-
-#if defined(XBDDEBUG) && !defined(DEBUG)
-#define DEBUG
-#endif
-
-#ifdef DEBUG
-int xbddebug = 0;
-
-#define XBDB_FOLLOW 0x1
-#define XBDB_IO 0x2
-#define XBDB_SETUP 0x4
-#define XBDB_HOTPLUG 0x8
-
-#define IFDEBUG(x,y) if (xbddebug & (x)) y
-#define DPRINTF(x,y) IFDEBUG(x, printf y)
-#define DPRINTF_FOLLOW(y) DPRINTF(XBDB_FOLLOW, y)
-#define DEBUG_MARK_UNUSED(_xr) (_xr)->xr_sc = (void *)0xdeadbeef
-
-struct xbdreq *xbd_allxr;
-#else
-#define IFDEBUG(x,y)
-#define DPRINTF(x,y)
-#define DPRINTF_FOLLOW(y)
-#define DEBUG_MARK_UNUSED(_xr)
-#endif
-
-#ifdef DIAGNOSTIC
-#define DIAGPANIC(x) panic x
-#define DIAGCONDPANIC(x,y) if (x) panic y
-#else
-#define DIAGPANIC(x)
-#define DIAGCONDPANIC(x,y)
-#endif
-
-
-struct xbdreq {
- union {
- SLIST_ENTRY(xbdreq) _unused; /* ptr. to next free xbdreq */
- SIMPLEQ_ENTRY(xbdreq) _suspended;
- /* link when on suspended queue. */
- } _link;
- struct xbdreq *xr_parent; /* ptr. to parent xbdreq */
- struct buf *xr_bp; /* ptr. to original I/O buf */
- daddr_t xr_bn; /* block no. to process */
- long xr_bqueue; /* bytes left to queue */
- long xr_bdone; /* bytes left */
- vaddr_t xr_data; /* ptr. to data to be proc. */
- vaddr_t xr_aligned; /* ptr. to aligned data */
- long xr_breq; /* bytes in this req. */
- struct xbd_softc *xr_sc; /* ptr. to xbd softc */
-};
-#define xr_unused _link._unused
-#define xr_suspended _link._suspended
-
-SLIST_HEAD(,xbdreq) xbdreqs =
- SLIST_HEAD_INITIALIZER(xbdreqs);
-static SIMPLEQ_HEAD(, xbdreq) xbdr_suspended =
- SIMPLEQ_HEAD_INITIALIZER(xbdr_suspended);
-
-#define CANGET_XBDREQ() (!SLIST_EMPTY(&xbdreqs))
-
-#define GET_XBDREQ(_xr) do { \
- (_xr) = SLIST_FIRST(&xbdreqs); \
- if (__predict_true(_xr)) \
- SLIST_REMOVE_HEAD(&xbdreqs, xr_unused); \
-} while (/*CONSTCOND*/0)
-
-#define PUT_XBDREQ(_xr) do { \
- DEBUG_MARK_UNUSED(_xr); \
- SLIST_INSERT_HEAD(&xbdreqs, _xr, xr_unused); \
-} while (/*CONSTCOND*/0)
-
-static struct bufq_state bufq;
-static int bufq_users = 0;
-
-#define XEN_MAJOR(_dev) ((_dev) >> 8)
-#define XEN_MINOR(_dev) ((_dev) & 0xff)
-
-#define XEN_SCSI_DISK0_MAJOR 8
-#define XEN_SCSI_DISK1_MAJOR 65
-#define XEN_SCSI_DISK2_MAJOR 66
-#define XEN_SCSI_DISK3_MAJOR 67
-#define XEN_SCSI_DISK4_MAJOR 68
-#define XEN_SCSI_DISK5_MAJOR 69
-#define XEN_SCSI_DISK6_MAJOR 70
-#define XEN_SCSI_DISK7_MAJOR 71
-#define XEN_SCSI_DISK8_MAJOR 128
-#define XEN_SCSI_DISK9_MAJOR 129
-#define XEN_SCSI_DISK10_MAJOR 130
-#define XEN_SCSI_DISK11_MAJOR 131
-#define XEN_SCSI_DISK12_MAJOR 132
-#define XEN_SCSI_DISK13_MAJOR 133
-#define XEN_SCSI_DISK14_MAJOR 134
-#define XEN_SCSI_DISK15_MAJOR 135
-#define XEN_SCSI_CDROM_MAJOR 11
-
-#define XEN_IDE0_MAJOR 3
-#define XEN_IDE1_MAJOR 22
-#define XEN_IDE2_MAJOR 33
-#define XEN_IDE3_MAJOR 34
-#define XEN_IDE4_MAJOR 56
-#define XEN_IDE5_MAJOR 57
-#define XEN_IDE6_MAJOR 88
-#define XEN_IDE7_MAJOR 89
-#define XEN_IDE8_MAJOR 90
-#define XEN_IDE9_MAJOR 91
-
-#define XEN_BSHIFT 9 /* log2(XEN_BSIZE) */
-#define XEN_BSIZE (1 << XEN_BSHIFT)
-
-#define MAX_VBDS 64
-static int nr_vbds;
-static vdisk_t *vbd_info;
-
-static blkif_ring_t *blk_ring = NULL;
-static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
-static BLKIF_RING_IDX req_prod; /* Private request producer. */
-static BLKIF_RING_IDX last_req_prod; /* Request producer at last trap. */
-
-#define STATE_CLOSED 0
-#define STATE_DISCONNECTED 1
-#define STATE_CONNECTED 2
-static unsigned int state = STATE_CLOSED;
-static unsigned int blkif_evtchn = 0;
-static unsigned int blkif_irq = 0;
-static unsigned int blkif_handle = 0;
-
-static int blkif_control_rsp_valid = 0;
-static blkif_response_t blkif_control_rsp;
-
-/** Network interface info. */
-struct xbd_ctrl {
-
- cfprint_t xc_cfprint;
- struct device *xc_parent;
-};
-
-static struct xbd_ctrl blkctrl;
-
-#define XBDUNIT(x) DISKUNIT(x)
-#define GETXBD_SOFTC(_xs, x) if (!((_xs) = getxbd_softc(x))) return ENXIO
-#define GETXBD_SOFTC_CDEV(_xs, x) do { \
- dev_t bx = devsw_chr2blk((x)); \
- if (bx == NODEV) \
- return ENXIO; \
- if (!((_xs) = getxbd_softc(bx))) \
- return ENXIO; \
-} while (/*CONSTCOND*/0)
-
-static struct xbd_softc *
-getxbd_softc(dev_t dev)
-{
- int unit = XBDUNIT(dev);
-
- DPRINTF_FOLLOW(("getxbd_softc(0x%x): major = %d unit = %d\n", dev,
- major(dev), unit));
-#if NXBD > 0
- if (major(dev) == xbd_major)
- return device_lookup(&xbd_cd, unit);
-#endif
-#if NWD > 0
- if (major(dev) == xbd_wd_major || major(dev) == xbd_wd_cdev_major)
- return device_lookup(&wd_cd, unit);
-#endif
-#if NSD > 0
- if (major(dev) == xbd_sd_major || major(dev) == xbd_sd_cdev_major)
- return device_lookup(&sd_cd, unit);
-#endif
-#if NCD > 0
- if (major(dev) == xbd_cd_major || major(dev) == xbd_cd_cdev_major)
- return device_lookup(&cd_cd, unit);
-#endif
- return NULL;
-}
-
-static int
-get_vbd_info(vdisk_t *disk_info)
-{
- vdisk_t *buf;
- int nr;
- blkif_request_t req;
- blkif_response_t rsp;
- paddr_t pa;
-
- buf = (vdisk_t *)uvm_km_kmemalloc1(kmem_map, NULL,
- PAGE_SIZE, PAGE_SIZE, UVM_UNKNOWN_OFFSET, 0);
- pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
- /* Probe for disk information. */
- memset(&req, 0, sizeof(req));
- req.operation = BLKIF_OP_PROBE;
- req.nr_segments = 1;
- req.frame_and_sects[0] = xpmap_ptom_masked(pa) | 7;
-
- control_send(&req, &rsp);
- nr = rsp.status > MAX_VBDS ? MAX_VBDS : rsp.status;
-
- if (rsp.status < 0)
- printf("WARNING: Could not probe disks (%d)\n", rsp.status);
-
- memcpy(disk_info, buf, nr * sizeof(vdisk_t));
-
- uvm_km_free(kmem_map, (vaddr_t)buf, PAGE_SIZE);
-
- return nr;
-}
-
-static struct xbd_attach_args *
-get_xbda(vdisk_t *xd)
-{
-
- switch (XEN_MAJOR(xd->device)) {
-#if NSD > 0
- case XEN_SCSI_DISK0_MAJOR:
- case XEN_SCSI_DISK1_MAJOR ... XEN_SCSI_DISK7_MAJOR:
- case XEN_SCSI_DISK8_MAJOR ... XEN_SCSI_DISK15_MAJOR:
- if (xd->capacity == 0)
- return NULL;
- return &sd_ata;
- case XEN_SCSI_CDROM_MAJOR:
- return &cd_ata;
-#endif
-#if NWD > 0
- case XEN_IDE0_MAJOR:
- case XEN_IDE1_MAJOR:
- case XEN_IDE2_MAJOR:
- case XEN_IDE3_MAJOR:
- case XEN_IDE4_MAJOR:
- case XEN_IDE5_MAJOR:
- case XEN_IDE6_MAJOR:
- case XEN_IDE7_MAJOR:
- case XEN_IDE8_MAJOR:
- case XEN_IDE9_MAJOR:
- if (xd->info & VDISK_CDROM)
- return &cd_ata;
- if (xd->capacity == 0)
- return NULL;
- return &wd_ata;
-#endif
- default:
- if (xd->capacity == 0)
- return NULL;
- return &xbd_ata;
- }
- return NULL;
-}
-
-static void
-free_interface(void)
-{
-
- /* Prevent new requests being issued until we fix things up. */
- // simple_lock(&blkif_io_lock);
- // recovery = 1;
- state = STATE_DISCONNECTED;
- // simple_unlock(&blkif_io_lock);
-
- /* Free resources associated with old device channel. */
- if (blk_ring) {
- uvm_km_free(kmem_map, (vaddr_t)blk_ring, PAGE_SIZE);
- blk_ring = NULL;
- }
-
- if (blkif_irq) {
-#if 0
- free_irq(blkif_irq, NULL);
-#endif
- blkif_irq = 0;
- }
-
- if (blkif_evtchn) {
-#if 0
- unbind_evtchn_from_irq(blkif_evtchn);
-#endif
- blkif_evtchn = 0;
- }
-}
-
-static void
-close_interface(void){
-}
-
-static void
-disconnect_interface(void)
-{
-
- if (blk_ring == NULL)
- blk_ring = (blkif_ring_t *)uvm_km_kmemalloc1(kmem_map, NULL,
- PAGE_SIZE, PAGE_SIZE, UVM_UNKNOWN_OFFSET, 0);
- memset(blk_ring, 0, PAGE_SIZE);
- blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod =
- last_req_prod = 0;
- state = STATE_DISCONNECTED;
- send_interface_connect();
-}
-
-static void
-reset_interface(void)
-{
-
- printf("Recovering virtual block device driver\n");
- free_interface();
- disconnect_interface();
-}
-
-static void
-connect_interface(blkif_fe_interface_status_t *status)
-{
- // unsigned long flags;
- struct xbd_attach_args *xbda;
- vdisk_t *xd;
- int i;
-
- blkif_evtchn = status->evtchn;
- blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
-
- event_set_handler(blkif_irq, &xbd_response_handler, NULL, IPL_BIO);
- hypervisor_enable_irq(blkif_irq);
-
- /* Transition to connected in case we need to do
- * a partition probe on a whole disk. */
- state = STATE_CONNECTED;
-
- /* Probe for discs attached to the interface. */
- // xlvbd_init();
- MALLOC(vbd_info, vdisk_t *, MAX_VBDS * sizeof(vdisk_t),
- M_DEVBUF, M_WAITOK);
- memset(vbd_info, 0, MAX_VBDS * sizeof(vdisk_t));
- nr_vbds = get_vbd_info(vbd_info);
- if (nr_vbds <= 0)
- goto out;
-
- for (i = 0; i < nr_vbds; i++) {
- xd = &vbd_info[i];
- xbda = get_xbda(xd);
- if (xbda) {
- xbda->xa_xd = xd;
- config_found(blkctrl.xc_parent, xbda,
- blkctrl.xc_cfprint);
- }
- }
-
-#if 0
- /* Kick pending requests. */
- save_and_cli(flags);
- // simple_lock(&blkif_io_lock);
- kick_pending_request_queues();
- // simple_unlock(&blkif_io_lock);
- restore_flags(flags);
-#endif
- return;
-
- out:
- FREE(vbd_info, M_DEVBUF);
- vbd_info = NULL;
- return;
-}
-
-static void
-unexpected(blkif_fe_interface_status_t *status)
-{
-
- printf("Unexpected blkif status %d in state %d\n",
- status->status, state);
-}
-
-#if 0
-static struct device *
-find_device(vdisk_t *xd)
-{
- struct device *dv;
- struct xbd_softc *xs = NULL;
-
- for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) {
- if (dv->dv_cfattach == NULL ||
- dv->dv_cfattach->ca_attach != xbd_attach)
- continue;
- xs = (struct xbd_softc *)dv;
- if (xd == NULL || xs->sc_xd_device == xd->device)
- break;
- }
- return dv;
-}
-#endif
-
-static void
-blkif_status(blkif_fe_interface_status_t *status)
-{
-
- if (status->handle != blkif_handle) {
- printf("Invalid blkif: handle=%u", status->handle);
- return;
- }
-
- switch (status->status) {
- case BLKIF_INTERFACE_STATUS_CLOSED:
- switch (state) {
- case STATE_CLOSED:
- unexpected(status);
- break;
- case STATE_DISCONNECTED:
- case STATE_CONNECTED:
- unexpected(status);
- close_interface();
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_DISCONNECTED:
- switch (state) {
- case STATE_CLOSED:
- disconnect_interface();
- break;
- case STATE_DISCONNECTED:
- case STATE_CONNECTED:
- unexpected(status);
- reset_interface();
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CONNECTED:
- switch (state) {
- case STATE_CLOSED:
- unexpected(status);
- disconnect_interface();
- connect_interface(status);
- break;
- case STATE_DISCONNECTED:
- connect_interface(status);
- break;
- case STATE_CONNECTED:
- unexpected(status);
- connect_interface(status);
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CHANGED:
- switch (state) {
- case STATE_CLOSED:
- case STATE_DISCONNECTED:
- unexpected(status);
- break;
- case STATE_CONNECTED:
-#if 0
- vbd_update();
-#endif
- break;
- }
- break;
-
- default:
- printf(" Invalid blkif status: %d\n", status->status);
- break;
- }
-}
-
-
-static void
-xbd_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- switch (msg->subtype) {
- case CMSG_BLKIF_FE_INTERFACE_STATUS:
- if (msg->length != sizeof(blkif_fe_interface_status_t))
- goto parse_error;
- blkif_status((blkif_fe_interface_status_t *)
- &msg->msg[0]);
- break;
- default:
- goto parse_error;
- }
-
- ctrl_if_send_response(msg);
- return;
-
- parse_error:
- msg->length = 0;
- ctrl_if_send_response(msg);
-}
-
-#if 0
-static void
-enable_update_events(struct device *self)
-{
-
- kthread_create(xbd_update_create_kthread, self);
- event_set_handler(_EVENT_VBD_UPD, &xbd_update_handler, self, IPL_BIO);
- hypervisor_enable_event(_EVENT_VBD_UPD);
-}
-#endif
-
-static void
-signal_requests_to_xen(void)
-{
-
- DPRINTF(XBDB_IO, ("signal_requests_to_xen: %x -> %x\n",
- blk_ring->req_prod, req_prod));
- blk_ring->req_prod = req_prod;
- last_req_prod = req_prod;
-
- hypervisor_notify_via_evtchn(blkif_evtchn);
- return;
-}
-
-static void
-control_send(blkif_request_t *req, blkif_response_t *rsp)
-{
- unsigned long flags;
- struct xbdreq *xr;
-
- retry:
- while ((req_prod - resp_cons) == BLKIF_RING_SIZE) {
- tsleep((caddr_t) &req_prod, PUSER | PCATCH,
- "blkfront", 0);
- }
-
- save_and_cli(flags);
- // simple_lock(&blkif_io_lock);
- if ((req_prod - resp_cons) == BLKIF_RING_SIZE) {
- // simple_unlock(&blkif_io_lock);
- restore_flags(flags);
- goto retry;
- }
-
- blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req = *req;
-
- GET_XBDREQ(xr);
- blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = (unsigned long)xr;
- // rec_ring[id].id = (unsigned long) req;
-
- // translate_req_to_pfn( &rec_ring[id], req );
-
- req_prod++;
- signal_requests_to_xen();
-
- // simple_unlock(&blkif_io_lock);
- restore_flags(flags);
-
- while (!blkif_control_rsp_valid) {
- /* XXXcl: sleep/wakeup not ready yet - busy wait for now.
- * interrupts are still of, so we pick up the control
- * channel response on return from HYPERVISOR_yield().
- */
-#if 0
- tsleep((caddr_t)&blkif_control_rsp_valid, PUSER | PCATCH,
- "blkfront", 0);
-#else
- HYPERVISOR_yield();
-#endif
- }
-
- memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
- blkif_control_rsp_valid = 0;
-}
-
-/* Send a driver status notification to the domain controller. */
-static void
-send_driver_status(int ok)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_BLKIF_FE,
- .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
- .length = sizeof(blkif_fe_driver_status_t),
- };
- blkif_fe_driver_status_t *msg = (blkif_fe_driver_status_t *)cmsg.msg;
-
- msg->status = ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN;
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, 0);
-}
-
-/* Tell the controller to bring up the interface. */
-static void
-send_interface_connect(void)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_BLKIF_FE,
- .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
- .length = sizeof(blkif_fe_interface_connect_t),
- };
- blkif_fe_interface_connect_t *msg =
- (blkif_fe_interface_connect_t *)cmsg.msg;
- paddr_t pa;
-
- pmap_extract(pmap_kernel(), (vaddr_t)blk_ring, &pa);
-
- msg->handle = 0;
- msg->shmem_frame = xpmap_ptom_masked(pa) >> PAGE_SHIFT;
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, 0);
-}
-
-static void
-setup_sysctl(void)
-{
- struct sysctlnode *pnode;
-
- sysctl_createv(NULL, 0, NULL, NULL,
- 0,
- CTLTYPE_NODE, "machdep", NULL,
- NULL, 0, NULL, 0,
- CTL_MACHDEP, CTL_EOL);
-
- sysctl_createv(NULL, 0, NULL, &pnode,
- 0,
- CTLTYPE_NODE, "domain0", NULL,
- NULL, 0, NULL, 0,
- CTL_MACHDEP, CTL_CREATE, CTL_EOL);
-
- if (pnode == NULL)
- return;
-
- sysctl_createv(NULL, 0, &pnode, &pnode,
- 0,
- CTLTYPE_NODE, "diskcookie", NULL,
- NULL, 0, NULL, 0,
- CTL_CREATE, CTL_EOL);
-
- if (pnode)
- diskcookies = pnode;
-}
-
-static int
-xbd_wait_for_interfaces(void)
-{
-
- while (state != STATE_CONNECTED)
- HYPERVISOR_yield();
- return 0;
-}
-
-int
-xbd_scan(struct device *self, struct xbd_attach_args *mainbus_xbda,
- cfprint_t print)
-{
- struct xbdreq *xr;
- int i;
-
- blkctrl.xc_parent = self;
- blkctrl.xc_cfprint = print;
-
- if (xen_start_info.flags & SIF_PRIVILEGED)
- setup_sysctl();
-
-#if NXBD > 0
- xbd_major = devsw_name2blk("xbd", NULL, 0);
-#endif
-#if NWD > 0
- xbd_wd_major = devsw_name2blk("wd", NULL, 0);
- /* XXX Also handle the cdev majors since stuff like
- * read_sector calls strategy on the cdev. This only works if
- * all the majors we care about are different.
- */
- xbd_wd_cdev_major = major(devsw_blk2chr(makedev(xbd_wd_major, 0)));
-#endif
-#if NSD > 0
- xbd_sd_major = devsw_name2blk("sd", NULL, 0);
- xbd_sd_cdev_major = major(devsw_blk2chr(makedev(xbd_sd_major, 0)));
-#endif
-#if NCD > 0
- xbd_cd_major = devsw_name2blk("cd", NULL, 0);
- xbd_cd_cdev_major = major(devsw_blk2chr(makedev(xbd_cd_major, 0)));
-#endif
-
- MALLOC(xr, struct xbdreq *, BLKIF_RING_SIZE * sizeof(struct xbdreq),
- M_DEVBUF, M_WAITOK | M_ZERO);
-#ifdef DEBUG
- xbd_allxr = xr;
-#endif
- for (i = 0; i < BLKIF_RING_SIZE - 1; i++)
- PUT_XBDREQ(&xr[i]);
-
- (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, xbd_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
-
- send_driver_status(1);
-
- return 0;
-}
-
-void
-xbd_scan_finish(struct device *parent)
-{
- int err;
-
- err = xbd_wait_for_interfaces();
- if (err)
- ctrl_if_unregister_receiver(CMSG_NETIF_FE, xbd_ctrlif_rx);
-}
-
-#if NXBD > 0
-int
-xbd_match(struct device *parent, struct cfdata *match, void *aux)
-{
- struct xbd_attach_args *xa = (struct xbd_attach_args *)aux;
-
- if (strcmp(xa->xa_device, "xbd") == 0)
- return 1;
- return 0;
-}
-#endif
-
-#if NWD > 0
-int
-xbd_wd_match(struct device *parent, struct cfdata *match, void *aux)
-{
- struct xbd_attach_args *xa = (struct xbd_attach_args *)aux;
-
- if (strcmp(xa->xa_device, "wd") == 0)
- return 1;
- return 0;
-}
-#endif
-
-#if NSD > 0
-int
-xbd_sd_match(struct device *parent, struct cfdata *match, void *aux)
-{
- struct xbd_attach_args *xa = (struct xbd_attach_args *)aux;
-
- if (strcmp(xa->xa_device, "sd") == 0)
- return 1;
- return 0;
-}
-#endif
-
-#if NCD > 0
-int
-xbd_cd_match(struct device *parent, struct cfdata *match, void *aux)
-{
- struct xbd_attach_args *xa = (struct xbd_attach_args *)aux;
-
- if (strcmp(xa->xa_device, "cd") == 0)
- return 1;
- return 0;
-}
-#endif
-
-static void
-xbd_attach(struct device *parent, struct device *self, void *aux)
-{
- struct xbd_attach_args *xbda = (struct xbd_attach_args *)aux;
- struct xbd_softc *xs = (struct xbd_softc *)self;
-
- aprint_normal(": Xen Virtual Block Device");
-
- simple_lock_init(&xs->sc_slock);
- dk_sc_init(&xs->sc_dksc, xs, xs->sc_dev.dv_xname);
- xbdinit(xs, xbda->xa_xd, xbda->xa_dkintf);
- if (diskcookies) {
- /* XXX beware that xs->sc_xd_device is a long */
- sysctl_createv(NULL, 0, &diskcookies, NULL,
- 0,
- CTLTYPE_INT, xs->sc_dev.dv_xname, NULL,
- NULL, 0, &xs->sc_xd_device, 0,
- CTL_CREATE, CTL_EOL);
- }
-
-#if NRND > 0
- rnd_attach_source(&xs->sc_rnd_source, xs->sc_dev.dv_xname,
- RND_TYPE_DISK, 0);
-#endif
-}
-
-static int
-xbd_detach(struct device *dv, int flags)
-{
- struct xbd_softc *xs = (struct xbd_softc *)dv;
-
- /*
- * Mark disk about to be removed (between now and when the xs
- * will be freed).
- */
- xs->sc_shutdown = 1;
-
- /* And give it some time to settle if it's busy. */
- if (xs->sc_dksc.sc_dkdev.dk_busy > 0)
- tsleep(&xs, PWAIT, "xbdetach", hz);
-
- /* Detach the disk. */
- disk_detach(&xs->sc_dksc.sc_dkdev);
-
- /* XXX decrement bufq_users and free? */
-
- /* XXX no need to remove sysctl nodes since they only exist
- * in domain0 and domain0's devices are never removed.
- */
-
- return 0;
-}
-
-int
-xbdopen(dev_t dev, int flags, int fmt, struct proc *p)
-{
- struct xbd_softc *xs;
-
- DPRINTF_FOLLOW(("xbdopen(0x%04x, %d)\n", dev, flags));
- switch (fmt) {
- case S_IFCHR:
- GETXBD_SOFTC_CDEV(xs, dev);
- break;
- case S_IFBLK:
- GETXBD_SOFTC(xs, dev);
- break;
- default:
- return ENXIO;
- }
- return dk_open(xs->sc_di, &xs->sc_dksc, dev, flags, fmt, p);
-}
-
-int
-xbdclose(dev_t dev, int flags, int fmt, struct proc *p)
-{
- struct xbd_softc *xs;
-
- DPRINTF_FOLLOW(("xbdclose(%d, %d)\n", dev, flags));
- switch (fmt) {
- case S_IFCHR:
- GETXBD_SOFTC_CDEV(xs, dev);
- break;
- case S_IFBLK:
- GETXBD_SOFTC(xs, dev);
- break;
- default:
- return ENXIO;
- }
- return dk_close(xs->sc_di, &xs->sc_dksc, dev, flags, fmt, p);
-}
-
-void
-xbdstrategy(struct buf *bp)
-{
- struct xbd_softc *xs = getxbd_softc(bp->b_dev);
-
- DPRINTF_FOLLOW(("xbdstrategy(%p): b_bcount = %ld\n", bp,
- (long)bp->b_bcount));
-
- if (xs == NULL || xs->sc_shutdown) {
- bp->b_flags |= B_ERROR;
- bp->b_error = EIO;
- biodone(bp);
- return;
- }
-
- dk_strategy(xs->sc_di, &xs->sc_dksc, bp);
- return;
-}
-
-int
-xbdsize(dev_t dev)
-{
- struct xbd_softc *xs = getxbd_softc(dev);
-
- DPRINTF_FOLLOW(("xbdsize(%d)\n", dev));
- if (xs == NULL || xs->sc_shutdown)
- return -1;
- return dk_size(xs->sc_di, &xs->sc_dksc, dev);
-}
-
-static void
-map_align(struct xbdreq *xr)
-{
- int s;
-
- s = splvm();
- xr->xr_aligned = uvm_km_kmemalloc1(kmem_map, NULL,
- xr->xr_bqueue, XEN_BSIZE, UVM_UNKNOWN_OFFSET,
- 0/* UVM_KMF_NOWAIT */);
- splx(s);
- DPRINTF(XBDB_IO, ("map_align(%p): bp %p addr %p align 0x%08lx "
- "size 0x%04lx\n", xr, xr->xr_bp, xr->xr_bp->b_data,
- xr->xr_aligned, xr->xr_bqueue));
- xr->xr_data = xr->xr_aligned;
- if ((xr->xr_bp->b_flags & B_READ) == 0)
- memcpy((void *)xr->xr_aligned, xr->xr_bp->b_data,
- xr->xr_bqueue);
-}
-
-static void
-unmap_align(struct xbdreq *xr)
-{
- int s;
-
- if (xr->xr_bp->b_flags & B_READ)
- memcpy(xr->xr_bp->b_data, (void *)xr->xr_aligned,
- xr->xr_bp->b_bcount);
- DPRINTF(XBDB_IO, ("unmap_align(%p): bp %p addr %p align 0x%08lx "
- "size 0x%04lx\n", xr, xr->xr_bp, xr->xr_bp->b_data,
- xr->xr_aligned, xr->xr_bp->b_bcount));
- s = splvm();
- uvm_km_free(kmem_map, xr->xr_aligned, xr->xr_bp->b_bcount);
- splx(s);
- xr->xr_aligned = (vaddr_t)0;
-}
-
-static void
-fill_ring(struct xbdreq *xr)
-{
- struct xbdreq *pxr = xr->xr_parent;
- paddr_t pa;
- unsigned long ma;
- vaddr_t addr, off;
- blkif_request_t *ring_req;
- int breq, nr_sectors, fsect, lsect;
-
- /* Fill out a communications ring structure. */
- ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
- ring_req->id = (unsigned long)xr;
- ring_req->operation = pxr->xr_bp->b_flags & B_READ ? BLKIF_OP_READ :
- BLKIF_OP_WRITE;
- ring_req->sector_number = pxr->xr_bn;
- ring_req->device = pxr->xr_sc->sc_xd_device;
-
- DPRINTF(XBDB_IO, ("fill_ring(%d): bp %p sector %llu pxr %p xr %p\n",
- MASK_BLKIF_IDX(req_prod), pxr->xr_bp,
- (unsigned long long)pxr->xr_bn,
- pxr, xr));
-
- xr->xr_breq = 0;
- ring_req->nr_segments = 0;
- addr = trunc_page(pxr->xr_data);
- off = pxr->xr_data - addr;
- while (pxr->xr_bqueue > 0) {
-#if 0
- pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map),
- addr, &pa);
-#else
- pmap_extract(pmap_kernel(), addr, &pa);
-#endif
- ma = xpmap_ptom_masked(pa);
- DIAGCONDPANIC((ma & (XEN_BSIZE - 1)) != 0,
- ("xbd request ma not sector aligned"));
-
- if (pxr->xr_bqueue > PAGE_SIZE - off)
- breq = PAGE_SIZE - off;
- else
- breq = pxr->xr_bqueue;
-
- nr_sectors = breq >> XEN_BSHIFT;
- DIAGCONDPANIC(nr_sectors >= XEN_BSIZE,
- ("xbd request nr_sectors >= XEN_BSIZE"));
-
- fsect = off >> XEN_BSHIFT;
- lsect = fsect + nr_sectors - 1;
- DIAGCONDPANIC(fsect > 7, ("xbd request fsect > 7"));
- DIAGCONDPANIC(lsect > 7, ("xbd request lsect > 7"));
-
- DPRINTF(XBDB_IO, ("fill_ring(%d): va 0x%08lx pa 0x%08lx "
- "ma 0x%08lx, sectors %d, left %ld/%ld\n",
- MASK_BLKIF_IDX(req_prod), addr, pa, ma, nr_sectors,
- pxr->xr_bqueue >> XEN_BSHIFT, pxr->xr_bqueue));
-
- ring_req->frame_and_sects[ring_req->nr_segments++] =
- ma | (fsect<<3) | lsect;
- addr += PAGE_SIZE;
- pxr->xr_bqueue -= breq;
- pxr->xr_bn += nr_sectors;
- xr->xr_breq += breq;
- off = 0;
- if (ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST)
- break;
- }
- pxr->xr_data = addr;
-
- req_prod++;
-}
-
-static void
-xbdresume(void)
-{
- struct xbdreq *pxr, *xr;
- struct xbd_softc *xs;
- struct buf *bp;
-
- while ((pxr = SIMPLEQ_FIRST(&xbdr_suspended)) != NULL) {
- DPRINTF(XBDB_IO, ("xbdstart: resuming xbdreq %p for bp %p\n",
- pxr, pxr->xr_bp));
- bp = pxr->xr_bp;
- xs = getxbd_softc(bp->b_dev);
- if (xs == NULL || xs->sc_shutdown) {
- bp->b_flags |= B_ERROR;
- bp->b_error = EIO;
- }
- if (bp->b_flags & B_ERROR) {
- pxr->xr_bdone -= pxr->xr_bqueue;
- pxr->xr_bqueue = 0;
- if (pxr->xr_bdone == 0) {
- bp->b_resid = bp->b_bcount;
- if (pxr->xr_aligned)
- unmap_align(pxr);
- PUT_XBDREQ(pxr);
- if (xs) {
- disk_unbusy(&xs->sc_dksc.sc_dkdev,
- (bp->b_bcount - bp->b_resid),
- (bp->b_flags & B_READ));
-#if NRND > 0
- rnd_add_uint32(&xs->sc_rnd_source,
- bp->b_blkno);
-#endif
- }
- biodone(bp);
- }
- continue;
- }
- while (__predict_true(pxr->xr_bqueue > 0)) {
- GET_XBDREQ(xr);
- if (__predict_false(xr == NULL))
- goto out;
- xr->xr_parent = pxr;
- fill_ring(xr);
- }
- DPRINTF(XBDB_IO, ("xbdstart: resumed xbdreq %p for bp %p\n",
- pxr, bp));
- SIMPLEQ_REMOVE_HEAD(&xbdr_suspended, xr_suspended);
- }
-
- out:
- return;
-}
-
-static int
-xbdstart(struct dk_softc *dksc, struct buf *bp)
-{
- struct xbd_softc *xs;
- struct xbdreq *pxr, *xr;
- struct partition *pp;
- daddr_t bn;
- int ret, runqueue;
-
- DPRINTF_FOLLOW(("xbdstart(%p, %p)\n", dksc, bp));
-
- runqueue = 1;
- ret = -1;
-
- xs = getxbd_softc(bp->b_dev);
- if (xs == NULL || xs->sc_shutdown) {
- bp->b_flags |= B_ERROR;
- bp->b_error = EIO;
- biodone(bp);
- return 0;
- }
- dksc = &xs->sc_dksc;
-
- /* XXXrcd:
- * Translate partition relative blocks to absolute blocks,
- * this probably belongs (somehow) in dksubr.c, since it
- * is independant of the underlying code... This will require
- * that the interface be expanded slightly, though.
- */
- bn = bp->b_blkno;
- if (DISKPART(bp->b_dev) != RAW_PART) {
- pp = &xs->sc_dksc.sc_dkdev.dk_label->
- d_partitions[DISKPART(bp->b_dev)];
- bn += pp->p_offset;
- }
-
- DPRINTF(XBDB_IO, ("xbdstart: addr %p, sector %llu, "
- "count %ld [%s]\n", bp->b_data, (unsigned long long)bn,
- bp->b_bcount, bp->b_flags & B_READ ? "read" : "write"));
-
- GET_XBDREQ(pxr);
- if (__predict_false(pxr == NULL))
- goto out;
-
- disk_busy(&dksc->sc_dkdev); /* XXX: put in dksubr.c */
- /*
- * We have a request slot, return 0 to make dk_start remove
- * the bp from the work queue.
- */
- ret = 0;
-
- pxr->xr_bp = bp;
- pxr->xr_parent = pxr;
- pxr->xr_bn = bn;
- pxr->xr_bqueue = bp->b_bcount;
- pxr->xr_bdone = bp->b_bcount;
- pxr->xr_data = (vaddr_t)bp->b_data;
- pxr->xr_sc = xs;
-
- if (pxr->xr_data & (XEN_BSIZE - 1))
- map_align(pxr);
-
- fill_ring(pxr);
-
- while (__predict_false(pxr->xr_bqueue > 0)) {
- GET_XBDREQ(xr);
- if (__predict_false(xr == NULL))
- break;
- xr->xr_parent = pxr;
- fill_ring(xr);
- }
-
- if (__predict_false(pxr->xr_bqueue > 0)) {
- SIMPLEQ_INSERT_TAIL(&xbdr_suspended, pxr,
- xr_suspended);
- DPRINTF(XBDB_IO, ("xbdstart: suspended xbdreq %p "
- "for bp %p\n", pxr, bp));
- } else if (CANGET_XBDREQ() && BUFQ_PEEK(&bufq) != NULL) {
- /*
- * We have enough resources to start another bp and
- * there are additional bps on the queue, dk_start
- * will call us again and we'll run the queue then.
- */
- runqueue = 0;
- }
-
- out:
- if (runqueue && last_req_prod != req_prod)
- signal_requests_to_xen();
-
- return ret;
-}
-
-static int
-xbd_response_handler(void *arg)
-{
- struct buf *bp;
- struct xbd_softc *xs;
- blkif_response_t *ring_resp;
- struct xbdreq *pxr, *xr;
- BLKIF_RING_IDX i, rp;
-
- rp = blk_ring->resp_prod;
- __insn_barrier(); /* Ensure we see queued responses up to 'rp'. */
-
- for (i = resp_cons; i != rp; i++) {
- ring_resp = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
- xr = (struct xbdreq *)ring_resp->id;
-
- switch (ring_resp->operation) {
- case BLKIF_OP_READ:
- case BLKIF_OP_WRITE:
- pxr = xr->xr_parent;
-
- DPRINTF(XBDB_IO, ("xbd_response_handler(%d): pxr %p "
- "xr %p bdone %04lx breq %04lx\n", i, pxr,
- xr, pxr->xr_bdone, xr->xr_breq));
- pxr->xr_bdone -= xr->xr_breq;
- DIAGCONDPANIC(pxr->xr_bdone < 0,
- ("xbd_response_handler: pxr->xr_bdone < 0"));
-
- if (__predict_false(ring_resp->status)) {
- pxr->xr_bp->b_flags |= B_ERROR;
- pxr->xr_bp->b_error = EIO;
- }
-
- if (xr != pxr) {
- PUT_XBDREQ(xr);
- if (!SIMPLEQ_EMPTY(&xbdr_suspended))
- xbdresume();
- }
-
- if (pxr->xr_bdone == 0) {
- bp = pxr->xr_bp;
- xs = getxbd_softc(bp->b_dev);
- if (xs == NULL) { /* don't fail bp if we're
shutdown */
- bp->b_flags |= B_ERROR;
- bp->b_error = EIO;
- }
- DPRINTF(XBDB_IO, ("xbd_response_handler(%d): "
- "completed bp %p\n", i, bp));
- if (bp->b_flags & B_ERROR)
- bp->b_resid = bp->b_bcount;
- else
- bp->b_resid = 0;
-
- if (pxr->xr_aligned)
- unmap_align(pxr);
-
- PUT_XBDREQ(pxr);
- if (xs) {
- disk_unbusy(&xs->sc_dksc.sc_dkdev,
- (bp->b_bcount - bp->b_resid),
- (bp->b_flags & B_READ));
-#if NRND > 0
- rnd_add_uint32(&xs->sc_rnd_source,
- bp->b_blkno);
-#endif
- }
- biodone(bp);
- if (!SIMPLEQ_EMPTY(&xbdr_suspended))
- xbdresume();
- /* XXX possible lockup if this was the only
- * active device and requests were held back in
- * the queue.
- */
- if (xs)
- dk_iodone(xs->sc_di, &xs->sc_dksc);
- }
- break;
- case BLKIF_OP_PROBE:
- memcpy(&blkif_control_rsp, ring_resp,
- sizeof(*ring_resp));
- blkif_control_rsp_valid = 1;
- wakeup((caddr_t)&blkif_control_rsp_valid);
- break;
- default:
- panic("unknown response");
- }
- }
- resp_cons = i;
- /* check if xbdresume queued any requests */
- if (last_req_prod != req_prod)
- signal_requests_to_xen();
- return 0;
-}
-
-#if 0
-static void
-xbd_update_create_kthread(void *arg)
-{
-
- kthread_create1(xbd_update_kthread, arg, NULL, "xbdupdate");
-}
-
-static void
-xbd_update_kthread(void *arg)
-{
- struct device *parent = arg;
- struct xbd_attach_args *xbda;
- struct device *dev;
- vdisk_t *xd;
- vdisk_t *vbd_info_update, *vbd_info_old;
- int i, j, new_nr_vbds;
- extern int hypervisor_print(void *, const char *);
-
- MALLOC(vbd_info_update, vdisk_t *, MAX_VBDS *
- sizeof(vdisk_t), M_DEVBUF, M_WAITOK);
-
- for (;;) {
- memset(vbd_info_update, 0, MAX_VBDS * sizeof(vdisk_t));
- new_nr_vbds = get_vbd_info(vbd_info_update);
-
- if (memcmp(vbd_info, vbd_info_update, MAX_VBDS *
- sizeof(vdisk_t)) == 0) {
- FREE(vbd_info_update, M_DEVBUF);
- tsleep(parent, PWAIT, "xbdupd", 0);
- MALLOC(vbd_info_update, vdisk_t *, MAX_VBDS *
- sizeof(vdisk_t), M_DEVBUF, M_WAITOK);
- continue;
- }
-
- j = 0;
- for (i = 0; i < new_nr_vbds; i++) {
- while (j < nr_vbds &&
- vbd_info[j].device < vbd_info_update[i].device) {
- DPRINTF(XBDB_HOTPLUG,
- ("delete device %x size %lx\n",
- vbd_info[j].device,
- vbd_info[j].capacity));
- xd = &vbd_info[j];
- dev = find_device(xd);
- if (dev)
- config_detach(dev, DETACH_FORCE);
- j++;
- }
- if (j < nr_vbds &&
- vbd_info[j].device == vbd_info_update[i].device) {
- DPRINTF(XBDB_HOTPLUG,
- ("update device %x size %lx size %lx\n",
- vbd_info_update[i].device,
- vbd_info[j].capacity,
- vbd_info_update[i].capacity));
- j++;
- } else {
- DPRINTF(XBDB_HOTPLUG,
- ("add device %x size %lx\n",
- vbd_info_update[i].device,
- vbd_info_update[i].capacity));
- xd = &vbd_info_update[i];
- xbda = get_xbda(xd);
- if (xbda) {
- xbda->xa_xd = xd;
- config_found(parent, xbda,
hypervisor_print);
- }
- }
- }
-
- while (j < nr_vbds) {
- DPRINTF(XBDB_HOTPLUG, ("delete device %x\n",
- vbd_info[j].device));
- xd = &vbd_info[j];
- dev = find_device(xd);
- if (dev)
- config_detach(dev, DETACH_FORCE);
- j++;
- }
-
- nr_vbds = new_nr_vbds;
-
- vbd_info_old = vbd_info;
- vbd_info = vbd_info_update;
- vbd_info_update = vbd_info_old;
- }
-}
-
-static int
-xbd_update_handler(void *arg)
-{
-
- wakeup(arg);
-
- return 0;
-}
-#endif
-
-/* XXX: we should probably put these into dksubr.c, mostly */
-int
-xbdread(dev_t dev, struct uio *uio, int flags)
-{
- struct xbd_softc *xs;
- struct dk_softc *dksc;
-
- DPRINTF_FOLLOW(("xbdread(%d, %p, %d)\n", dev, uio, flags));
- GETXBD_SOFTC_CDEV(xs, dev);
- dksc = &xs->sc_dksc;
- if ((dksc->sc_flags & DKF_INITED) == 0)
- return ENXIO;
- /* XXX see the comments about minphys in ccd.c */
- return physio(xbdstrategy, NULL, dev, B_READ, minphys, uio);
-}
-
-/* XXX: we should probably put these into dksubr.c, mostly */
-int
-xbdwrite(dev_t dev, struct uio *uio, int flags)
-{
- struct xbd_softc *xs;
- struct dk_softc *dksc;
-
- DPRINTF_FOLLOW(("xbdwrite(%d, %p, %d)\n", dev, uio, flags));
- GETXBD_SOFTC_CDEV(xs, dev);
- dksc = &xs->sc_dksc;
- if ((dksc->sc_flags & DKF_INITED) == 0)
- return ENXIO;
- /* XXX see the comments about minphys in ccd.c */
- return physio(xbdstrategy, NULL, dev, B_WRITE, minphys, uio);
-}
-
-int
-xbdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
-{
- struct xbd_softc *xs;
- struct dk_softc *dksc;
- int ret;
-
- DPRINTF_FOLLOW(("xbdioctl(%d, %08lx, %p, %d, %p)\n",
- dev, cmd, data, flag, p));
- GETXBD_SOFTC(xs, dev);
- dksc = &xs->sc_dksc;
-
- if ((ret = lockmgr(&dksc->sc_lock, LK_EXCLUSIVE, NULL)) != 0)
- return ret;
-
- switch (cmd) {
- default:
- ret = dk_ioctl(xs->sc_di, dksc, dev, cmd, data, flag, p);
- break;
- }
-
- lockmgr(&dksc->sc_lock, LK_RELEASE, NULL);
- return ret;
-}
-
-int
-xbdioctl_cdev(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
-{
- dev_t bdev;
-
- bdev = devsw_chr2blk(dev);
- if (bdev == NODEV)
- return ENXIO;
- return xbdioctl(bdev, cmd, data, flag, p);
-}
-
-int
-xbddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
-{
- struct xbd_softc *xs;
-
- DPRINTF_FOLLOW(("xbddump(%d, %" PRId64 ", %p, %lu)\n", dev, blkno, va,
- (unsigned long)size));
- GETXBD_SOFTC(xs, dev);
- return dk_dump(xs->sc_di, &xs->sc_dksc, dev, blkno, va, size);
-}
-
-static int
-xbdinit(struct xbd_softc *xs, vdisk_t *xd, struct dk_intf *dkintf)
-{
- struct dk_geom *pdg;
- char buf[9];
- int ret;
-
- ret = 0;
-
- xs->sc_dksc.sc_size = xd->capacity;
- xs->sc_xd_device = xd->device;
- xs->sc_di = dkintf;
- xs->sc_shutdown = 0;
-
- /*
- * XXX here we should probe the underlying device. If we
- * are accessing a partition of type RAW_PART, then
- * we should populate our initial geometry with the
- * geometry that we discover from the device.
- */
- pdg = &xs->sc_dksc.sc_geom;
- pdg->pdg_secsize = DEV_BSIZE;
- pdg->pdg_ntracks = 1;
- pdg->pdg_nsectors = 1024 * (1024 / pdg->pdg_secsize);
- pdg->pdg_ncylinders = xs->sc_dksc.sc_size / pdg->pdg_nsectors;
-
- /*
- * We have one shared bufq for all devices because otherwise
- * requests can stall if there were no free request slots
- * available in xbdstart and this device had no requests
- * in-flight which would trigger a dk_start from the interrupt
- * handler.
- * XXX this assumes that we can just memcpy struct bufq_state
- * to share it between devices.
- * XXX we reference count the usage in case so we can de-alloc
- * the bufq if all devices are deconfigured.
- */
- if (bufq_users == 0) {
- bufq_alloc(&bufq, BUFQ_FCFS);
- bufq_users = 1;
- }
- memcpy(&xs->sc_dksc.sc_bufq, &bufq, sizeof(struct bufq_state));
-
- xs->sc_dksc.sc_flags |= DKF_INITED;
-
- /* Attach the disk. */
- disk_attach(&xs->sc_dksc.sc_dkdev);
-
- /* Try and read the disklabel. */
- dk_getdisklabel(xs->sc_di, &xs->sc_dksc, 0 /* XXX ? */);
-
- format_bytes(buf, sizeof(buf), (uint64_t)xs->sc_dksc.sc_size *
- pdg->pdg_secsize);
- printf(" %s\n", buf);
-
-/* out: */
- return ret;
-}
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/xen/xen_debug.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xen_debug.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,444 +0,0 @@
-/* $NetBSD: xen_debug.c,v 1.1.2.1 2004/05/22 15:59:31 he Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- *
- * Copyright (c) 2002-2003, K A Fraser & R Neugebauer
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xen_debug.c,v 1.1.2.1 2004/05/22 15:59:31 he Exp
$");
-
-#define XENDEBUG
-
-#include <sys/param.h>
-#include <sys/systm.h>
-
-#include <machine/stdarg.h>
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-
-#ifdef XENDEBUG
-
-#define PRINTK_BUFSIZE 1024
-void
-printk(const char *fmt, ...)
-{
- va_list ap;
- int ret;
- static char buf[PRINTK_BUFSIZE];
-
- va_start(ap, fmt);
- ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
- va_end(ap);
- buf[ret] = 0;
- (void)HYPERVISOR_console_io(CONSOLEIO_write, ret, buf);
-}
-
-void
-vprintk(const char *fmt, va_list ap)
-{
- int ret;
- static char buf[PRINTK_BUFSIZE];
-
- ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
- buf[ret] = 0;
- (void)HYPERVISOR_console_io(CONSOLEIO_write, ret, buf);
-}
-
-#endif
-
-#ifdef XENDEBUG_LOW
-
-int xen_once = 0;
-
-void hypervisor_callback(void);
-void failsafe_callback(void);
-
-void xen_dbglow_init(void);
-void
-xen_dbglow_init()
-{
- start_info_t *si;
-#if 0
- int i;
-#endif
-
- si = &xen_start_info;
-
- HYPERVISOR_set_callbacks(
- __KERNEL_CS, (unsigned long)hypervisor_callback,
- __KERNEL_CS, (unsigned long)failsafe_callback);
-
- trap_init();
-
- /* __sti(); */
-
- /* print out some useful information */
- printk(version);
- printk("start_info: %p\n", si);
- printk(" nr_pages: %lu", si->nr_pages);
- printk(" shared_inf: %p (was %p)\n", HYPERVISOR_shared_info,
- si->shared_info);
- printk(" pt_base: %p", (void *)si->pt_base);
- printk(" mod_start: 0x%lx\n", si->mod_start);
- printk(" mod_len: %lu\n", si->mod_len);
-#if 0
- printk(" net_rings: ");
- for (i = 0; i < MAX_DOMAIN_VIFS; i++) {
- if (si->net_rings[i] == 0)
- break;
- printk(" %lx", si->net_rings[i]);
- };
- printk("\n");
- printk(" blk_ring: 0x%lx\n", si->blk_ring);
-#endif
- printk(" dom_id: %d\n", si->dom_id);
- printk(" flags: 0x%lx\n", si->flags);
- printk(" cmd_line: %s\n", si->cmd_line ?
- (const char *)si->cmd_line : "NULL");
-}
-
-
-void xen_dbg0(char *);
-void
-xen_dbg0(char *end)
-{
- struct cpu_info *ci;
-
- ci = &cpu_info_primary;
- if (xen_once)
- printk("xencpu level %d ipending %08x master %08x\n",
- ci->ci_ilevel, ci->ci_ipending,
- HYPERVISOR_shared_info->events_mask);
- /* ipending %08x imask %08x iunmask %08x */
- /* ci->ci_imask[IPL_NET], ci->ci_iunmask[IPL_NET]); */
-}
-
-void xen_dbg1(void *esp, int ss);
-void
-xen_dbg1(void *esp, int ss)
-{
-#if 1
- struct cpu_info *ci;
-
- ci = &cpu_info_primary;
- if (xen_once)
- printk("xenhighlevel %d ipending %08x master %08x events %08x\n",
- ci->ci_ilevel, ci->ci_ipending,
- HYPERVISOR_shared_info->events_mask,
HYPERVISOR_shared_info->events);
-#else
- printk("stack switch %p %d/%d, sp %p\n", esp, ss, IDXSEL(ss), &ss);
-#endif
-}
-
-void xen_dbg2(void);
-void
-xen_dbg2(void)
-{
- if (xen_once)
- printk("xen_dbg2\n");
-}
-
-void xen_dbg3(void *, void *);
-void
-xen_dbg3(void *ss, void *esp)
-{
- if (xen_once)
- printk("xen_dbg3 %p %p\n", ss, esp);
-}
-
-void xen_dbg4(void *);
-void
-xen_dbg4(void *esi)
-{
-
- printk("xen_dbg4 %p\n", esi);
- for(;;);
-}
-
-
-
-
-static void do_exit(void);
-
-/*
- * These are assembler stubs in vector.S.
- * They are the actual entry points for virtual exceptions.
- */
-void divide_error(void);
-void debug(void);
-void int3(void);
-void overflow(void);
-void bounds(void);
-void invalid_op(void);
-void device_not_available(void);
-void double_fault(void);
-void coprocessor_segment_overrun(void);
-void invalid_TSS(void);
-void segment_not_present(void);
-void stack_segment(void);
-void general_protection(void);
-void page_fault(void);
-void coprocessor_error(void);
-void simd_coprocessor_error(void);
-void alignment_check(void);
-void spurious_interrupt_bug(void);
-void machine_check(void);
-
-static void
-dump_regs(struct pt_regs *regs)
-{
- int in_kernel = 1;
- unsigned long esp;
- unsigned short ss;
-
- esp = (unsigned long) (®s->esp);
- ss = __KERNEL_DS;
- if (regs->xcs & 2) {
- in_kernel = 0;
- esp = regs->esp;
- ss = regs->xss & 0xffff;
- }
- printf("EIP: %04x:[<%08lx>]\n",
- 0xffff & regs->xcs, regs->eip);
- printf("EFLAGS: %08lx\n",regs->eflags);
- printf("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
- regs->eax, regs->ebx, regs->ecx, regs->edx);
- printf("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
- regs->esi, regs->edi, regs->ebp, esp);
- printf("ds: %04x es: %04x ss: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff, ss);
- printf("\n");
-}
-
-
-static inline void
-dump_code(unsigned eip)
-{
- unsigned *ptr = (unsigned *)eip;
- int x;
-
- printk("Bytes at eip:\n");
- for (x = -4; x < 5; x++)
- printf("%x", ptr[x]);
-}
-
-
-/*
- * C handlers here have their parameter-list constructed by the
- * assembler stubs above. Each one gets a pointer to a list
- * of register values (to be restored at end of exception).
- * Some will also receive an error code -- this is the code that
- * was generated by the processor for the underlying real exception.
- *
- * Note that the page-fault exception is special. It also receives
- * the faulting linear address. Normally this would be found in
- * register CR2, but that is not accessible in a virtualised OS.
- */
-
-static void inline
-do_trap(int trapnr, char *str, struct pt_regs *regs, long error_code)
-{
-
- printk("FATAL: Unhandled Trap (see mini-os:traps.c)");
- printf("%d %s", trapnr, str);
- dump_regs(regs);
- dump_code(regs->eip);
-
- do_exit();
-}
-
-#define DO_ERROR(trapnr, str, name) \
-void do_##name(struct pt_regs *regs, long error_code); \
-void do_##name(struct pt_regs *regs, long error_code) \
-{ \
- do_trap(trapnr, str, regs, error_code); \
-}
-
-#define DO_ERROR_INFO(trapnr, str, name, sicode, siaddr) \
-void do_##name(struct pt_regs *regs, long error_code); \
-void do_##name(struct pt_regs *regs, long error_code) \
-{ \
- do_trap(trapnr, str, regs, error_code); \
-}
-
-DO_ERROR_INFO( 0, "divide error", divide_error, FPE_INTDIV, regs->eip)
-DO_ERROR( 3, "int3", int3)
-DO_ERROR( 4, "overflow", overflow)
-DO_ERROR( 5, "bounds", bounds)
-DO_ERROR_INFO( 6, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
-DO_ERROR( 7, "device not available", device_not_available)
-DO_ERROR( 8, "double fault", double_fault)
-DO_ERROR( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
-DO_ERROR(10, "invalid TSS", invalid_TSS)
-DO_ERROR(11, "segment not present", segment_not_present)
-DO_ERROR(12, "stack segment", stack_segment)
-DO_ERROR_INFO(17, "alignment check", alignment_check, BUS_ADRALN, 0)
-DO_ERROR(18, "machine check", machine_check)
-
-void do_page_fault(struct pt_regs *, long, unsigned long);
-void
-do_page_fault(struct pt_regs *regs, long error_code, unsigned long address)
-{
-
- printk("Page fault\n");
- printk("Address: 0x%lx", address);
- printk("Error Code: 0x%lx", error_code);
- printk("eip: \t 0x%lx", regs->eip);
- do_exit();
-}
-
-void do_general_protection(struct pt_regs *, long);
-void
-do_general_protection(struct pt_regs *regs, long error_code)
-{
-
- HYPERVISOR_shared_info->events_mask = 0;
- printk("GPF\n");
- printk("Error Code: 0x%lx", error_code);
- dump_regs(regs);
- dump_code(regs->eip);
- do_exit();
-}
-
-
-void do_debug(struct pt_regs *, long);
-void
-do_debug(struct pt_regs *regs, long error_code)
-{
-
- printk("Debug exception\n");
-#define TF_MASK 0x100
- regs->eflags &= ~TF_MASK;
- dump_regs(regs);
- do_exit();
-}
-
-
-
-void do_coprocessor_error(struct pt_regs *, long);
-void
-do_coprocessor_error(struct pt_regs *regs, long error_code)
-{
-
- printk("Copro error\n");
- dump_regs(regs);
- dump_code(regs->eip);
- do_exit();
-}
-
-void simd_math_error(void *);
-void
-simd_math_error(void *eip)
-{
-
- printk("SIMD error\n");
-}
-
-void do_simd_coprocessor_error(struct pt_regs *, long);
-void
-do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
-{
-
- printk("SIMD copro error\n");
-}
-
-void do_spurious_interrupt_bug(struct pt_regs *, long);
-void
-do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
-{
-}
-
-static void
-do_exit(void)
-{
-
- HYPERVISOR_exit();
-}
-
-/*
- * Submit a virtual IDT to teh hypervisor. This consists of tuples
- * (interrupt vector, privilege ring, CS:EIP of handler).
- * The 'privilege ring' field specifies the least-privileged ring that
- * can trap to that vector using a software-interrupt instruction (INT).
- */
-static trap_info_t trap_table[] = {
- { 0, 0, __KERNEL_CS, (unsigned long)divide_error },
- { 1, 0, __KERNEL_CS, (unsigned long)debug },
- { 3, 3, __KERNEL_CS, (unsigned long)int3 },
- { 4, 3, __KERNEL_CS, (unsigned long)overflow },
- { 5, 3, __KERNEL_CS, (unsigned long)bounds },
- { 6, 0, __KERNEL_CS, (unsigned long)invalid_op },
- { 7, 0, __KERNEL_CS, (unsigned long)device_not_available },
- { 8, 0, __KERNEL_CS, (unsigned long)double_fault },
- { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
- { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS },
- { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present },
- { 12, 0, __KERNEL_CS, (unsigned long)stack_segment },
- { 13, 0, __KERNEL_CS, (unsigned long)general_protection },
- { 14, 0, __KERNEL_CS, (unsigned long)page_fault },
- { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug },
- { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error },
- { 17, 0, __KERNEL_CS, (unsigned long)alignment_check },
- { 18, 0, __KERNEL_CS, (unsigned long)machine_check },
- { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error },
- { 0, 0, 0, 0 }
-};
-
-void
-trap_init(void)
-{
-
- HYPERVISOR_set_trap_table(trap_table);
-}
-#endif
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/xen/xencons.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xencons.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,414 +0,0 @@
-/* $NetBSD: xencons.c,v 1.1.2.1 2004/05/22 15:59:21 he Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xencons.c,v 1.1.2.1 2004/05/22 15:59:21 he Exp $");
-
-#include <sys/param.h>
-#include <sys/ioctl.h>
-#include <sys/proc.h>
-#include <sys/tty.h>
-#include <sys/systm.h>
-#include <sys/device.h>
-#include <sys/conf.h>
-
-#include <machine/stdarg.h>
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-#include <machine/evtchn.h>
-#include <machine/ctrl_if.h>
-
-#include <dev/cons.h>
-
-#include <ddb/db_output.h> /* XXX for db_max_line */
-
-static int xencons_isconsole = 0;
-
-#define XENCONS_UNIT(x) (minor(x))
-#define XENCONS_BURST 128
-
-int xencons_match (struct device *, struct cfdata *, void *);
-void xencons_attach (struct device *, struct device *, void *);
-/* int xencons_intr (void *); */
-void xencons_init (void);
-
-struct xencons_softc {
- struct device sc_dev;
- struct tty *sc_tty;
-};
-
-CFATTACH_DECL(xencons, sizeof(struct xencons_softc),
- xencons_match, xencons_attach, NULL, NULL);
-
-extern struct cfdriver xencons_cd;
-
-dev_type_open(xencons_open);
-dev_type_close(xencons_close);
-dev_type_read(xencons_read);
-dev_type_write(xencons_write);
-dev_type_ioctl(xencons_ioctl);
-dev_type_stop(xencons_stop);
-dev_type_tty(xencons_tty);
-dev_type_poll(xencons_poll);
-
-const struct cdevsw xencons_cdevsw = {
- xencons_open, xencons_close, xencons_read, xencons_write,
- xencons_ioctl, xencons_stop, xencons_tty, xencons_poll,
- NULL, ttykqfilter, D_TTY
-};
-
-
-static void xencons_rx(ctrl_msg_t *, unsigned long);
-void xenconscn_attach(void);
-int xenconscn_getc(dev_t);
-void xenconscn_putc(dev_t, int);
-void xenconscn_pollc(dev_t, int);
-
-static struct consdev xencons = {
- NULL, NULL, xenconscn_getc, xenconscn_putc, xenconscn_pollc,
- NULL, NULL, NULL, NODEV, CN_NORMAL
-};
-
-void xencons_start (struct tty *);
-int xencons_param (struct tty *, struct termios *);
-
-int
-xencons_match(struct device *parent, struct cfdata *match, void *aux)
-{
- struct xencons_attach_args *xa = (struct xencons_attach_args *)aux;
-
- if (strcmp(xa->xa_device, "xencons") == 0)
- return 1;
- return 0;
-}
-
-void
-xencons_attach(struct device *parent, struct device *self, void *aux)
-{
- struct xencons_softc *sc = (void *)self;
-
- aprint_normal(": Xen Virtual Console Driver\n");
-
- if (xencons_isconsole) {
- int maj;
-
- /* Locate the major number. */
- maj = cdevsw_lookup_major(&xencons_cdevsw);
-
- /* There can be only one, but it can have any unit number. */
- cn_tab->cn_dev = makedev(maj, sc->sc_dev.dv_unit);
-
- aprint_verbose("%s: console major %d, unit %d\n",
- sc->sc_dev.dv_xname, maj, sc->sc_dev.dv_unit);
-
- /* Set db_max_line to avoid paging. */
- db_max_line = 0x7fffffff;
-
- (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0);
- }
-}
-
-int
-xencons_open(dev_t dev, int flag, int mode, struct proc *p)
-{
- struct xencons_softc *sc;
- int unit = XENCONS_UNIT(dev);
- struct tty *tp;
-
- sc = device_lookup(&xencons_cd, unit);
- if (sc == NULL)
- return (ENXIO);
-
- if (!sc->sc_tty) {
- tp = sc->sc_tty = ttymalloc();
- tty_attach(tp);
- } else
- tp = sc->sc_tty;
-
- tp->t_oproc = xencons_start;
- tp->t_param = xencons_param;
- tp->t_dev = dev;
- if ((tp->t_state & TS_ISOPEN) == 0) {
- ttychars(tp);
- tp->t_iflag = TTYDEF_IFLAG;
- tp->t_oflag = TTYDEF_OFLAG;
- tp->t_cflag = TTYDEF_CFLAG;
- tp->t_lflag = TTYDEF_LFLAG;
- tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
- xencons_param(tp, &tp->t_termios);
- ttsetwater(tp);
- } else if (tp->t_state&TS_XCLUDE && p->p_ucred->cr_uid != 0)
- return (EBUSY);
- tp->t_state |= TS_CARR_ON;
-
- return ((*tp->t_linesw->l_open)(dev, tp));
-}
-
-int
-xencons_close(dev_t dev, int flag, int mode, struct proc *p)
-{
- struct xencons_softc *sc = device_lookup(&xencons_cd,
- XENCONS_UNIT(dev));
- struct tty *tp = sc->sc_tty;
-
- if (tp == NULL)
- return (0);
- (*tp->t_linesw->l_close)(tp, flag);
- ttyclose(tp);
-#ifdef notyet /* XXX */
- ttyfree(tp);
-#endif
- return (0);
-}
-
-int
-xencons_read(dev_t dev, struct uio *uio, int flag)
-{
- struct xencons_softc *sc = device_lookup(&xencons_cd,
- XENCONS_UNIT(dev));
- struct tty *tp = sc->sc_tty;
-
- return ((*tp->t_linesw->l_read)(tp, uio, flag));
-}
-
-int
-xencons_write(dev_t dev, struct uio *uio, int flag)
-{
- struct xencons_softc *sc = device_lookup(&xencons_cd,
- XENCONS_UNIT(dev));
- struct tty *tp = sc->sc_tty;
-
- return ((*tp->t_linesw->l_write)(tp, uio, flag));
-}
-
-int
-xencons_poll(dev_t dev, int events, struct proc *p)
-{
- struct xencons_softc *sc = device_lookup(&xencons_cd,
- XENCONS_UNIT(dev));
- struct tty *tp = sc->sc_tty;
-
- return ((*tp->t_linesw->l_poll)(tp, events, p));
-}
-
-struct tty *
-xencons_tty(dev_t dev)
-{
- struct xencons_softc *sc = device_lookup(&xencons_cd,
- XENCONS_UNIT(dev));
- struct tty *tp = sc->sc_tty;
-
- return (tp);
-}
-
-int
-xencons_ioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
-{
- struct xencons_softc *sc = device_lookup(&xencons_cd,
- XENCONS_UNIT(dev));
- struct tty *tp = sc->sc_tty;
- int error;
-
- error = (*tp->t_linesw->l_ioctl)(tp, cmd, data, flag, p);
- if (error != EPASSTHROUGH)
- return (error);
-
- error = ttioctl(tp, cmd, data, flag, p);
- if (error != EPASSTHROUGH)
- return (error);
-
- switch (cmd) {
- default:
- return (EPASSTHROUGH);
- }
-
-#ifdef DIAGNOSTIC
- panic("xencons_ioctl: impossible");
-#endif
-}
-
-void
-xencons_start(struct tty *tp)
-{
- struct clist *cl;
- int s, len;
-
- s = spltty();
- if (tp->t_state & (TS_TIMEOUT | TS_BUSY | TS_TTSTOP))
- goto out;
- tp->t_state |= TS_BUSY;
- splx(s);
-
- /*
- * We need to do this outside spl since it could be fairly
- * expensive and we don't want our serial ports to overflow.
- */
- cl = &tp->t_outq;
- if (xen_start_info.flags & SIF_INITDOMAIN) {
- u_char buf[XENCONS_BURST+1];
-
- len = q_to_b(cl, buf, XENCONS_BURST);
- (void)HYPERVISOR_console_io(CONSOLEIO_write, len, buf);
- } else {
- ctrl_msg_t msg;
-
- len = q_to_b(cl, msg.msg, sizeof(msg.msg));
- msg.type = CMSG_CONSOLE;
- msg.subtype = CMSG_CONSOLE_DATA;
- msg.length = len;
- ctrl_if_send_message_noblock(&msg, NULL, 0);
- /* XXX check return value and queue wait for space
- * thread/softint */
- }
-
- s = spltty();
- tp->t_state &= ~TS_BUSY;
- if (cl->c_cc) {
- tp->t_state |= TS_TIMEOUT;
- callout_reset(&tp->t_rstrt_ch, 1, ttrstrt, tp);
- }
- if (cl->c_cc <= tp->t_lowat) {
- if (tp->t_state & TS_ASLEEP) {
- tp->t_state &= ~TS_ASLEEP;
- wakeup(cl);
- }
- selwakeup(&tp->t_wsel);
- }
-out:
- splx(s);
-}
-
-void
-xencons_stop(struct tty *tp, int flag)
-{
-
-}
-
-
-/* Non-privileged receive callback. */
-static void
-xencons_rx(ctrl_msg_t *msg, unsigned long id)
-{
- int i;
- int s;
- // unsigned long flags;
- struct xencons_softc *sc;
- struct tty *tp;
-
- sc = device_lookup(&xencons_cd, XENCONS_UNIT(cn_tab->cn_dev));
- if (sc == NULL)
- goto out;
-
- tp = sc->sc_tty;
- if (tp == NULL)
- goto out;
-
- s = spltty();
- // save_and_cli(flags);
- // simple_lock(&xencons_lock);
- for (i = 0; i < msg->length; i++)
- (*tp->t_linesw->l_rint)(msg->msg[i], tp);
- // simple_unlock(&xencons_lock);
- // restore_flags(flags);
- splx(s);
-
- out:
- msg->length = 0;
- ctrl_if_send_response(msg);
-}
-
-void
-xenconscn_attach()
-{
-
- cn_tab = &xencons;
-
- ctrl_if_early_init();
-
- xencons_isconsole = 1;
-}
-
-int
-xenconscn_getc(dev_t dev)
-{
-
- printf("\n");
- for (;;);
-}
-
-void
-xenconscn_putc(dev_t dev, int c)
-{
- extern int ctrl_if_evtchn;
-
- if (xen_start_info.flags & SIF_INITDOMAIN ||
- ctrl_if_evtchn == -1) {
- u_char buf[1];
-
- buf[0] = c;
- (void)HYPERVISOR_console_io(CONSOLEIO_write, 1, buf);
- } else {
- ctrl_msg_t msg;
-
- msg.type = CMSG_CONSOLE;
- msg.subtype = CMSG_CONSOLE_DATA;
- msg.length = 1;
- msg.msg[0] = c;
- while (ctrl_if_send_message_noblock(&msg, NULL, 0) == EAGAIN) {
- HYPERVISOR_yield();
- /* XXX check return value and queue wait for space
- * thread/softint */
- }
- }
-}
-
-void
-xenconscn_pollc(dev_t dev, int on)
-{
-
-}
-
-/*
- * Set line parameters.
- */
-int
-xencons_param(struct tty *tp, struct termios *t)
-{
-
- tp->t_ispeed = t->c_ispeed;
- tp->t_ospeed = t->c_ospeed;
- tp->t_cflag = t->c_cflag;
- return (0);
-}
-
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/arch/xen/xen/xenkbc.c
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xenkbc.c Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,600 +0,0 @@
-/* $NetBSD: xenkbc.c,v 1.3.2.1 2004/05/22 15:57:43 he Exp $ */
-
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright (c) 2004 Ben Harris.
- * Copyright (c) 1998
- * Matthias Drochner. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xenkbc.c,v 1.3.2.1 2004/05/22 15:57:43 he Exp $");
-
-#include <sys/param.h>
-#include <sys/device.h>
-#include <sys/malloc.h>
-#include <sys/systm.h>
-
-#include <dev/pckbport/pckbportvar.h>
-#include <dev/ic/i8042reg.h>
-
-#include <machine/intr.h>
-
-#include <machine/xenkbcvar.h>
-#include <machine/xen.h>
-#include <machine/hypervisor.h>
-#include <machine/xen-public/kbd.h>
-#include <machine/evtchn.h>
-
-#define KBC_DELAY DELAY(1000)
-#define KBC_TIMEOUT 250
-
-#define XENKBC_NSLOTS 2
-
-/* data per slave device */
-struct xenkbc_slotdata {
- int xsd_polling; /* don't process data in interrupt handler */
- int xsd_poll_data; /* data read from inr handler if polling */
- int xsd_poll_stat; /* status read from inr handler if polling */
-#if NRND > 0
- rndsource_element_t xsd_rnd_source;
-#endif
-};
-
-struct xenkbc_internal {
- struct xenkbc_softc *xi_sc;
- struct pckbport_tag *xi_pt;
- struct xenkbc_slotdata *xi_slotdata[XENKBC_NSLOTS];
- int xi_flags;
- int xi_data;
- int xi_8042cmdbyte;
-};
-
-#define XI_CONSOLE_FLAG 0x01
-#define XI_HASAUX_FLAG 0x02
-
-#define XI_CONSOLE(xi) ((xi)->xi_flags & XI_CONSOLE_FLAG)
-#define XI_HASAUX(xi) ((xi)->xi_flags & XI_HASAUX_FLAG)
-
-#define XI_SETCONSOLE(xi,on) \
- ((on) ? ((xi)->xi_flags |= XI_CONSOLE_FLAG) : \
- ((xi)->xi_flags &= ~XI_CONSOLE_FLAG))
-#define XI_SETHASAUX(xi,on) \
- ((on) ? ((xi)->xi_flags |= XI_HASAUX_FLAG) : \
- ((xi)->xi_flags &= ~XI_HASAUX_FLAG))
-
-static int xenkbc_match(struct device *, struct cfdata *, void *);
-static void xenkbc_attach(struct device *, struct device *, void *);
-
-static int xenkbc_xt_translation(void *, pckbport_slot_t, int);
-static void xenkbc_init_slotdata(struct xenkbc_slotdata *);
-
-static int xenkbc_get8042cmd (struct xenkbc_internal *);
-static int xenkbc_put8042cmd (struct xenkbc_internal *);
-static int xenkbc_send_devcmd(void *, pckbport_slot_t, u_char);
-static int xenkbc_send_cmd(void *, u_char);
-static int xenkbc_send_data(void *, u_char);
-static int xenkbc_poll_data1(void *, pckbport_slot_t);
-
-static void xenkbc_slot_enable(void *, pckbport_slot_t, int);
-static void xenkbc_intr_establish(void *, pckbport_slot_t);
-static void xenkbc_set_poll(void *, pckbport_slot_t, int);
-
-static int xenkbc_intr(void *);
-
-CFATTACH_DECL(xenkbc, sizeof(struct xenkbc_softc),
- xenkbc_match, xenkbc_attach, NULL, NULL);
-
-static struct pckbport_accessops const xenkbc_ops = {
- xenkbc_xt_translation,
- xenkbc_send_devcmd,
- xenkbc_poll_data1,
- xenkbc_slot_enable,
- xenkbc_intr_establish,
- xenkbc_set_poll
-};
-
-static struct xenkbc_internal xenkbc_consdata;
-static struct xenkbc_slotdata xenkbc_cons_slotdata;
-
-/* #define XENKBCDEBUG */
-#ifdef XENKBCDEBUG
-#define DPRINTF(x) printf x
-#else
-#define DPRINTF(x)
-#endif
-
-
-static int
-xenkbc_getstatus(struct xenkbc_internal *xi)
-{
- long res;
-
- res = HYPERVISOR_kbd_op(KBD_OP_READ, 0);
- if (res < 0) {
- xi->xi_data = 0;
- return 0;
- }
- xi->xi_data = KBD_CODE_SCANCODE(res);
- return KBD_CODE_STATUS(res);
-}
-
-static int
-xenkbc_wait_output(struct xenkbc_internal *xi)
-{
- u_int i;
-
- for (i = KBC_TIMEOUT; i; i--) {
- if ((xenkbc_getstatus(xi) & KBS_IBF) == 0)
- return (1);
- KBC_DELAY;
- }
- return (0);
-}
-
-static int
-xenkbc_match(struct device *parent, struct cfdata *cf, void *aux)
-{
- struct xenkbc_attach_args *xa = aux;
-
- if ((xen_start_info.flags & SIF_PRIVILEGED) == 0)
- return 0;
-
- if (strcmp(xa->xa_device, "xenkbc"))
- return 0;
-
- return 1;
-}
-
-static int
-xenkbc_attach_slot(struct xenkbc_softc *xs, pckbport_slot_t slot)
-{
- struct xenkbc_internal *xi = xs->sc_xi;
- struct device *child;
- int alloced = 0;
-
- if (xi->xi_slotdata[slot] == NULL) {
- xi->xi_slotdata[slot] = malloc(sizeof(struct xenkbc_slotdata),
- M_DEVBUF, M_NOWAIT);
- if (xi->xi_slotdata[slot] == NULL) {
- printf("%s: no memory\n", xs->sc_dev.dv_xname);
- return 0;
- }
- xenkbc_init_slotdata(xi->xi_slotdata[slot]);
- alloced++;
- }
-
- child = pckbport_attach_slot(&xs->sc_dev, xi->xi_pt, slot);
-
- if (child == NULL && alloced) {
- free(xi->xi_slotdata[slot], M_DEVBUF);
- xi->xi_slotdata[slot] = NULL;
- }
-
-#if NRND > 0
- if (child != NULL && xi->xi_slotdata[slot] != NULL)
- rnd_attach_source(&xi->xi_slotdata[slot]->xsd_rnd_source,
- child->dv_xname, RND_TYPE_TTY, 0);
-#endif
-
- return child != NULL;
-}
-
-static void
-xenkbc_attach(struct device *parent, struct device *self, void *aux)
-{
- /* struct xenkbc_attach_args *xa = aux; */
- struct xenkbc_softc *xs = (struct xenkbc_softc *)self;
- struct xenkbc_internal *xi;
- int res;
- u_char cmdbits = 0;
-
- if (XI_CONSOLE(&xenkbc_consdata))
- xi = &xenkbc_consdata;
- else {
- xi = malloc(sizeof(struct xenkbc_internal), M_DEVBUF,
- M_NOWAIT | M_ZERO);
- if (xi == NULL) {
- aprint_error(": no memory\n");
- return;
- }
- xi->xi_8042cmdbyte = KC8_CPU;
- }
-
- aprint_normal(": Xen Keyboard/Mouse Device\n");
-
- xs->sc_xi = xi;
- xi->xi_sc = xs;
-
- event_set_handler(_EVENT_PS2, &xenkbc_intr, xi, IPL_TTY);
- hypervisor_enable_event(_EVENT_PS2);
-
- xi->xi_pt = pckbport_attach(xi, &xenkbc_ops);
-
- /* flush */
- xenkbc_poll_data1(xi, PCKBPORT_KBD_SLOT);
-
- /* set initial cmd byte */
- if (!xenkbc_put8042cmd(xi)) {
- printf("kbc: cmd word write error\n");
- return;
- }
-
- if (xenkbc_attach_slot(xs, PCKBPORT_KBD_SLOT))
- cmdbits |= KC8_KENABLE;
-
- /*
- * Check aux port ok.
- */
- if (!xenkbc_send_cmd(xi, KBC_AUXECHO)) {
- printf("kbc: aux echo error 1\n");
- goto nomouse;
- }
- if (!xenkbc_wait_output(xi)) {
- printf("kbc: aux echo error 2\n");
- goto nomouse;
- }
- XI_SETHASAUX(xi, 1);
- xenkbc_send_data(xi, 0x5a); /* a random value */
- res = xenkbc_poll_data1(xi, PCKBPORT_AUX_SLOT);
- if (res != -1) {
- /*
- * In most cases, the 0x5a gets echoed.
- * Some older controllers (Gateway 2000 circa 1993)
- * return 0xfe here.
- * We are satisfied if there is anything in the
- * aux output buffer.
- */
- if (xenkbc_attach_slot(xs, PCKBPORT_AUX_SLOT))
- cmdbits |= KC8_MENABLE;
- } else {
-#ifdef XENKBCDEBUG
- printf("kbc: aux echo test failed\n");
-#endif
- XI_SETHASAUX(xi, 0);
- }
-
- nomouse:
- /* enable needed interrupts */
- xi->xi_8042cmdbyte |= cmdbits;
- if (!xenkbc_put8042cmd(xi))
- printf("kbc: cmd word write error\n");
-}
-
-static void
-xenkbc_init_slotdata(struct xenkbc_slotdata *xsd)
-{
-
- xsd->xsd_polling = 0;
-}
-
-/*
- * Get the current command byte.
- */
-static int
-xenkbc_get8042cmd(struct xenkbc_internal *xi)
-{
- int data;
-
- if (!xenkbc_send_cmd(xi, K_RDCMDBYTE))
- return 0;
- data = xenkbc_poll_data1(xi, PCKBPORT_KBD_SLOT);
- if (data == -1)
- return 0;
- xi->xi_8042cmdbyte = data;
- return 1;
-}
-
-/*
- * Pass command byte to keyboard controller (8042).
- */
-static int
-xenkbc_put8042cmd(struct xenkbc_internal *xi)
-{
-
- if (!xenkbc_send_cmd(xi, K_LDCMDBYTE))
- return 0;
- if (!xenkbc_wait_output(xi))
- return 0;
- return xenkbc_send_data(xi, xi->xi_8042cmdbyte);
-}
-
-static int
-xenkbc_send_devcmd(void *cookie, pckbport_slot_t slot, u_char devcmd)
-{
-
- DPRINTF(("send_devcmd %x\n", devcmd));
-
- if (slot == PCKBPORT_AUX_SLOT) {
- if (!xenkbc_send_cmd(cookie, KBC_AUXWRITE)) {
- DPRINTF(("xenkbc_send_devcmd: KBC_AUXWRITE failed\n"));
- return 0;
- }
- }
- if (!xenkbc_wait_output(cookie)) {
- DPRINTF(("xenkbc_send_devcmd: wait_output failed\n"));
- return 0;
- }
- return xenkbc_send_data(cookie, devcmd);
-}
-
-static int
-xenkbc_send_cmd(void *cookie, u_char cmd)
-{
- struct xenkbc_internal *xi = cookie;
-
- DPRINTF(("send_cmd %x\n", cmd));
- xenkbc_wait_output(xi);
- return !HYPERVISOR_kbd_op(KBD_OP_WRITECOMMAND, cmd);
-}
-
-static int
-xenkbc_send_data(void *cookie, u_char output)
-{
- struct xenkbc_internal *xi = cookie;
-
- DPRINTF(("send_data %x\n", output));
- xenkbc_wait_output(xi);
- return !HYPERVISOR_kbd_op(KBD_OP_WRITEOUTPUT, output);
-}
-
-static int
-xenkbc_poll_data1(void *cookie, pckbport_slot_t slot)
-{
- struct xenkbc_internal *xi = cookie;
- struct xenkbc_slotdata *xsd = xi->xi_slotdata[slot];
- int s;
- u_char stat, c;
- int i = 1000;
-
- s = splhigh();
-
- if (xsd && xsd->xsd_polling && xsd->xsd_poll_data != -1 &&
- xsd->xsd_poll_stat != -1) {
- stat = xsd->xsd_poll_stat;
- c = xsd->xsd_poll_data;
- xsd->xsd_poll_data = -1;
- xsd->xsd_poll_stat = -1;
- goto process;
- }
-
- DELAY(10);
- for (; i; i--) {
- stat = xenkbc_getstatus(xi);
- if (stat & KBS_DIB) {
- c = xi->xi_data;
- DELAY(10);
- process:
- if (XI_HASAUX(xi) && (stat & 0x20)) { /* aux data */
- if (slot != PCKBPORT_AUX_SLOT) {
-#ifdef XENKBCDEBUG
- printf("lost aux 0x%x\n", c);
-#endif
- continue;
- }
- } else {
- if (slot == PCKBPORT_AUX_SLOT) {
-#ifdef XENKBCDEBUG
- printf("lost kbd 0x%x\n", c);
-#endif
- continue;
- }
- }
- splx(s);
- DPRINTF(("poll -> %x stat %x\n", c, stat));
- return c;
- }
- }
-
- DPRINTF(("poll failed -> -1\n"));
- splx(s);
- return -1;
-}
-
-/*
- * switch scancode translation on / off
- * return nonzero on success
- */
-static int
-xenkbc_xt_translation(void *cookie, pckbport_slot_t slot, int on)
-{
- struct xenkbc_internal *xi = cookie;
- int ison;
-
- if (slot != PCKBPORT_KBD_SLOT) {
- /* translation only for kbd slot */
- if (on)
- return 0;
- else
- return 1;
- }
-
- ison = xi->xi_8042cmdbyte & KC8_TRANS;
- if ((on && ison) || (!on && !ison))
- return 1;
-
- xi->xi_8042cmdbyte ^= KC8_TRANS;
- if (!xenkbc_put8042cmd(xi))
- return 0;
-
- /* read back to be sure */
- if (!xenkbc_get8042cmd(xi))
- return 0;
-
- ison = xi->xi_8042cmdbyte & KC8_TRANS;
- if ((on && ison) || (!on && !ison))
- return 1;
- return 0;
-}
-
-static const struct xenkbc_portcmd {
- u_char cmd_en, cmd_dis;
-} xenkbc_portcmd[2] = {
- {
- KBC_KBDENABLE, KBC_KBDDISABLE,
- }, {
- KBC_AUXENABLE, KBC_AUXDISABLE,
- }
-};
-
-static void
-xenkbc_slot_enable(void *cookie, pckbport_slot_t slot, int on)
-{
- struct xenkbc_internal *xi = cookie;
- const struct xenkbc_portcmd *cmd;
-
- cmd = &xenkbc_portcmd[slot];
-
- DPRINTF(("slot enable %d -> %d\n", slot, on));
- xenkbc_send_cmd(xi, on ? cmd->cmd_en : cmd->cmd_dis);
-}
-
-
-static void
-xenkbc_intr_establish(void *cookie, pckbport_slot_t slot)
-{
-
-}
-
-static void
-xenkbc_set_poll(void *cookie, pckbport_slot_t slot, int on)
-{
- struct xenkbc_internal *xi = cookie;
-
- DPRINTF(("xenkbc_set_poll %d -> %d\n", slot, on));
-
- xi->xi_slotdata[slot]->xsd_polling = on;
-
- if (on) {
- xi->xi_slotdata[slot]->xsd_poll_data = -1;
- xi->xi_slotdata[slot]->xsd_poll_stat = -1;
- } else {
- int s;
-
- /*
- * If disabling polling on a device that's been configured,
- * make sure there are no bytes left in the FIFO, holding up
- * the interrupt line. Otherwise we won't get any further
- * interrupts.
- */
- s = spltty();
- xenkbc_intr(xi);
- splx(s);
- }
-}
-
-static int
-xenkbc_intr(void *self)
-{
- struct xenkbc_internal *xi = self;
- u_char stat;
- pckbport_slot_t slot;
- struct xenkbc_slotdata *xsd;
- int served = 0;
-
- for (;;) {
- stat = xenkbc_getstatus(xi);
- if (!(stat & KBS_DIB))
- break;
-
- served = 1;
-
- slot = (XI_HASAUX(xi) && (stat & 0x20)) ?
- PCKBPORT_AUX_SLOT : PCKBPORT_KBD_SLOT;
- xsd = xi->xi_slotdata[slot];
-
- if (xsd == NULL)
- continue;
-
-#if NRND > 0
- rnd_add_uint32(&xsd->xsd_rnd_source,
- (stat << 8) | xi->xi_data);
-#endif
-
- if (xsd->xsd_polling) {
- xsd->xsd_poll_data = xi->xi_data;
- xsd->xsd_poll_stat = stat;
- break; /* xenkbc_poll_data() will get it */
- }
-
- pckbportintr(xi->xi_pt, slot, xi->xi_data);
- }
-
- return served;
-}
-
-int
-xenkbc_cnattach(pckbport_slot_t slot)
-{
- struct xenkbc_internal *xi = &xenkbc_consdata;
- int ret;
-
- /* flush */
- (void) xenkbc_poll_data1(xi, PCKBPORT_KBD_SLOT);
-
- /* init cmd byte, enable ports */
- xenkbc_consdata.xi_8042cmdbyte = KC8_CPU;
- if (!xenkbc_put8042cmd(xi)) {
- printf("kbc: cmd word write error\n");
- return EIO;
- }
-
- ret = pckbport_cnattach(xi, &xenkbc_ops, slot);
-
- xi->xi_slotdata[slot] = &xenkbc_cons_slotdata;
- xenkbc_init_slotdata(xi->xi_slotdata[slot]);
- XI_SETCONSOLE(xi, 1);
-
- return ret;
-}
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs.h
--- a/netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs.h Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,205 +0,0 @@
-/* $NetBSD: kernfs.h,v 1.20.2.3 2004/05/23 10:46:05 tron Exp $ */
-
-/*
- * Copyright (c) 1992, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software donated to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)kernfs.h 8.6 (Berkeley) 3/29/95
- */
-
-#define _PATH_KERNFS "/kern" /* Default mountpoint */
-
-#ifdef _KERNEL
-#include <sys/queue.h>
-
-/*
- * The different types of node in a kernfs filesystem
- */
-typedef enum {
- KFSkern, /* the filesystem itself (.) */
- KFSroot, /* the filesystem root (..) */
- KFSnull, /* none aplicable */
- KFStime, /* boottime */
- KFSint, /* integer */
- KFSstring, /* string */
- KFShostname, /* hostname */
- KFSavenrun, /* loadavg */
- KFSdevice, /* device file (rootdev/rrootdev) */
- KFSmsgbuf, /* msgbuf */
- KFSipsecsadir, /* ipsec security association (top dir) */
- KFSipsecspdir, /* ipsec security policy (top dir) */
- KFSipsecsa, /* ipsec security association entry */
- KFSipsecsp, /* ipsec security policy entry */
- KFSsubdir, /* directory */
- KFSlasttype, /* last used type */
- KFSmaxtype = (1<<6) - 1 /* last possible type */
-} kfstype;
-
-/*
- * Control data for the kern file system.
- */
-struct kern_target {
- u_char kt_type;
- u_char kt_namlen;
- const char *kt_name;
- void *kt_data;
- kfstype kt_tag;
- u_char kt_vtype;
- mode_t kt_mode;
-};
-
-struct dyn_kern_target {
- struct kern_target dkt_kt;
- SIMPLEQ_ENTRY(dyn_kern_target) dkt_queue;
-};
-
-struct kernfs_subdir {
- SIMPLEQ_HEAD(,dyn_kern_target) ks_entries;
- unsigned int ks_nentries;
- unsigned int ks_dirs;
- const struct kern_target *ks_parent;
-};
-
-struct kernfs_node {
- LIST_ENTRY(kernfs_node) kfs_hash; /* hash chain */
- TAILQ_ENTRY(kernfs_node) kfs_list; /* flat list */
- struct vnode *kfs_vnode; /* vnode associated with this pfsnode */
- kfstype kfs_type; /* type of procfs node */
- mode_t kfs_mode; /* mode bits for stat() */
- long kfs_fileno; /* unique file id */
- u_int32_t kfs_value; /* SA id or SP id (KFSint) */
- const struct kern_target *kfs_kt;
- void *kfs_v; /* pointer to secasvar/secpolicy/mbuf */
- long kfs_cookie; /* fileno cookie */
-};
-
-struct kernfs_mount {
- TAILQ_HEAD(, kernfs_node) nodelist;
- long fileno_cookie;
-};
-
-#define UIO_MX 32
-
-#define KERNFS_FILENO(kt, typ, cookie) \
- ((kt >= &kern_targets[0] && kt < &kern_targets[static_nkern_targets]) \
- ? 2 + ((kt) - &kern_targets[0]) \
- : (((cookie + 1) << 6) | (typ)))
-#define KERNFS_TYPE_FILENO(typ, cookie) \
- (((cookie + 1) << 6) | (typ))
-
-#define VFSTOKERNFS(mp) ((struct kernfs_mount *)((mp)->mnt_data))
-#define VTOKERN(vp) ((struct kernfs_node *)(vp)->v_data)
-#define KERNFSTOV(kfs) ((kfs)->kfs_vnode)
-
-extern const struct kern_target kern_targets[];
-extern int nkern_targets;
-extern const int static_nkern_targets;
-extern int (**kernfs_vnodeop_p) __P((void *));
-extern struct vfsops kernfs_vfsops;
-extern dev_t rrootdev;
-
-struct secasvar;
-struct secpolicy;
-
-int kernfs_root __P((struct mount *, struct vnode **));
-
-void kernfs_hashinit __P((void));
-void kernfs_hashreinit __P((void));
-void kernfs_hashdone __P((void));
-int kernfs_freevp __P((struct vnode *));
-int kernfs_allocvp __P((struct mount *, struct vnode **, kfstype,
- const struct kern_target *, u_int32_t));
-
-void kernfs_revoke_sa __P((struct secasvar *));
-void kernfs_revoke_sp __P((struct secpolicy *));
-
-/*
- * Data types for the kernfs file operations.
- */
-typedef enum {
- KERNFS_XREAD,
- KERNFS_XWRITE,
- KERNFS_FILEOP_CLOSE,
- KERNFS_FILEOP_GETATTR,
- KERNFS_FILEOP_IOCTL,
- KERNFS_FILEOP_MMAP,
- KERNFS_FILEOP_OPEN,
- KERNFS_FILEOP_READ,
- KERNFS_FILEOP_WRITE,
-} kfsfileop;
-
-struct kernfs_fileop {
- kfstype kf_type;
- kfsfileop kf_fileop;
- union {
- void *_kf_genop;
- int (*_kf_vop)(void *);
- int (*_kf_xread)
- (const struct kernfs_node *, int, char **, size_t,
- size_t *);
- int (*_kf_xwrite)
- (const struct kernfs_node *, char *, size_t);
- } _kf_opfn;
- SPLAY_ENTRY(kernfs_fileop) kf_node;
-};
-#define kf_genop _kf_opfn
-#define kf_vop _kf_opfn._kf_vop
-#define kf_xwrite _kf_opfn._kf_xwrite
-#define kf_xread _kf_opfn._kf_xread
-
-typedef struct kern_target kernfs_parentdir_t;
-typedef struct dyn_kern_target kernfs_entry_t;
-
-/*
- * Functions for adding kernfs datatypes and nodes.
- */
-kfstype kernfs_alloctype(int, const struct kernfs_fileop *);
-#define KERNFS_ALLOCTYPE(kf) kernfs_alloctype(sizeof((kf)) / \
- sizeof((kf)[0]), (kf))
-#define KERNFS_ALLOCENTRY(dkt, m_type, m_flags)
\
- dkt = (struct dyn_kern_target *)malloc( \
- sizeof(struct dyn_kern_target), (m_type), (m_flags))
-#define KERNFS_INITENTRY(dkt, type, name, data, tag, vtype, mode) do {
\
- (dkt)->dkt_kt.kt_type = (type); \
- (dkt)->dkt_kt.kt_namlen = strlen((name)); \
- (dkt)->dkt_kt.kt_name = (name); \
- (dkt)->dkt_kt.kt_data = (data); \
- (dkt)->dkt_kt.kt_tag = (tag); \
- (dkt)->dkt_kt.kt_vtype = (vtype); \
- (dkt)->dkt_kt.kt_mode = (mode); \
-} while (/*CONSTCOND*/0)
-#define KERNFS_ENTOPARENTDIR(dkt) &(dkt)->dkt_kt
-int kernfs_addentry __P((kernfs_parentdir_t *, kernfs_entry_t *));
-
-#ifdef SYSCTL_SETUP_PROTO
-SYSCTL_SETUP_PROTO(sysctl_vfs_kernfs_setup);
-#endif /* SYSCTL_SETUP_PROTO */
-
-#endif /* _KERNEL */
diff -r 64cd054aa143 -r 0255f48b757f
netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs_vnops.c
--- a/netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs_vnops.c Sun Dec 4
17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,1583 +0,0 @@
-/* $NetBSD: kernfs_vnops.c,v 1.98.2.3 2004/05/15 13:35:27 tron Exp $
*/
-
-/*
- * Copyright (c) 1992, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software donated to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95
- */
-
-/*
- * Kernel parameter filesystem (/kern)
- */
-
-#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kernfs_vnops.c,v 1.98.2.3 2004/05/15 13:35:27 tron
Exp $");
-
-#ifdef _KERNEL_OPT
-#include "opt_ipsec.h"
-#endif
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/vmmeter.h>
-#include <sys/time.h>
-#include <sys/proc.h>
-#include <sys/vnode.h>
-#include <sys/malloc.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/mount.h>
-#include <sys/namei.h>
-#include <sys/buf.h>
-#include <sys/dirent.h>
-#include <sys/msgbuf.h>
-
-#include <miscfs/genfs/genfs.h>
-#include <miscfs/kernfs/kernfs.h>
-
-#ifdef IPSEC
-#include <sys/mbuf.h>
-#include <net/route.h>
-#include <netinet/in.h>
-#include <netinet6/ipsec.h>
-#include <netkey/key.h>
-#endif
-
-#include <uvm/uvm_extern.h>
-
-#define KSTRING 256 /* Largest I/O available via this
filesystem */
-#define UIO_MX 32
-
-#define READ_MODE (S_IRUSR|S_IRGRP|S_IROTH)
-#define WRITE_MODE (S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH)
-#define UREAD_MODE (S_IRUSR)
-#define DIR_MODE
(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
-#define UDIR_MODE (S_IRUSR|S_IXUSR)
-
-#define N(s) sizeof(s)-1, s
-const struct kern_target kern_targets[] = {
-/* NOTE: The name must be less than UIO_MX-16 chars in length */
- /* name data tag type ro/rw */
- { DT_DIR, N("."), 0, KFSkern, VDIR, DIR_MODE
},
- { DT_DIR, N(".."), 0, KFSroot, VDIR, DIR_MODE
},
- { DT_REG, N("boottime"), &boottime.tv_sec, KFSint, VREG, READ_MODE
},
- /* XXX cast away const */
- { DT_REG, N("copyright"), (void *)copyright,
- KFSstring, VREG, READ_MODE },
- { DT_REG, N("hostname"), 0, KFShostname, VREG, WRITE_MODE
},
- { DT_REG, N("hz"), &hz, KFSint, VREG, READ_MODE
},
-#ifdef IPSEC
- { DT_DIR, N("ipsecsa"), 0, KFSipsecsadir, VDIR, UDIR_MODE },
- { DT_DIR, N("ipsecsp"), 0, KFSipsecspdir, VDIR, UDIR_MODE },
-#endif
- { DT_REG, N("loadavg"), 0, KFSavenrun, VREG, READ_MODE
},
- { DT_REG, N("msgbuf"), 0, KFSmsgbuf, VREG, READ_MODE },
- { DT_REG, N("pagesize"), &uvmexp.pagesize, KFSint, VREG, READ_MODE
},
- { DT_REG, N("physmem"), &physmem, KFSint, VREG, READ_MODE
},
-#if 0
- { DT_DIR, N("root"), 0, KFSnull, VDIR, DIR_MODE
},
-#endif
- { DT_BLK, N("rootdev"), &rootdev, KFSdevice, VBLK, READ_MODE
},
- { DT_CHR, N("rrootdev"), &rrootdev, KFSdevice, VCHR, READ_MODE
},
- { DT_REG, N("time"), 0, KFStime, VREG, READ_MODE
},
- /* XXX cast away const */
- { DT_REG, N("version"), (void *)version,
- KFSstring, VREG, READ_MODE },
-};
-const struct kern_target subdir_targets[] = {
-/* NOTE: The name must be less than UIO_MX-16 chars in length */
- /* name data tag type ro/rw */
- { DT_DIR, N("."), 0, KFSsubdir, VDIR, DIR_MODE
},
- { DT_DIR, N(".."), 0, KFSkern, VDIR, DIR_MODE
},
-};
-#ifdef IPSEC
-const struct kern_target ipsecsa_targets[] = {
-/* NOTE: The name must be less than UIO_MX-16 chars in length */
- /* name data tag type ro/rw */
- { DT_DIR, N("."), 0, KFSipsecsadir, VDIR, DIR_MODE
},
- { DT_DIR, N(".."), 0, KFSkern, VDIR, DIR_MODE
},
-};
-const struct kern_target ipsecsp_targets[] = {
-/* NOTE: The name must be less than UIO_MX-16 chars in length */
- /* name data tag type ro/rw */
- { DT_DIR, N("."), 0, KFSipsecspdir, VDIR, DIR_MODE
},
- { DT_DIR, N(".."), 0, KFSkern, VDIR, DIR_MODE
},
-};
-const struct kern_target ipsecsa_kt =
- { DT_DIR, N(""), 0, KFSipsecsa, VREG, UREAD_MODE
};
-const struct kern_target ipsecsp_kt =
- { DT_DIR, N(""), 0, KFSipsecsp, VREG, UREAD_MODE
};
-#endif
-#undef N
-SIMPLEQ_HEAD(,dyn_kern_target) dyn_kern_targets =
- SIMPLEQ_HEAD_INITIALIZER(dyn_kern_targets);
-int nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]);
-const int static_nkern_targets = sizeof(kern_targets) /
sizeof(kern_targets[0]);
-#ifdef IPSEC
-int nipsecsa_targets = sizeof(ipsecsa_targets) / sizeof(ipsecsa_targets[0]);
-int nipsecsp_targets = sizeof(ipsecsp_targets) / sizeof(ipsecsp_targets[0]);
-int nkern_dirs = 4; /* 2 extra subdirs */
-#else
-int nkern_dirs = 2;
-#endif
-
-int kernfs_try_fileop(kfstype, kfsfileop, void *, int);
-int kernfs_try_xread(kfstype, const struct kernfs_node *, int, char **,
- size_t, size_t *, int);
-int kernfs_try_xwrite(kfstype, const struct kernfs_node *, char *,
- size_t, int);
-
-static int kernfs_default_xread(void *v);
-static int kernfs_default_xwrite(void *v);
-static int kernfs_default_fileop_getattr(void *);
-
-/* must include all fileop's */
-const struct kernfs_fileop kernfs_default_fileops[] = {
- { .kf_fileop = KERNFS_XREAD },
- { .kf_fileop = KERNFS_XWRITE },
- { .kf_fileop = KERNFS_FILEOP_OPEN },
- { .kf_fileop = KERNFS_FILEOP_GETATTR,
- .kf_genop = {kernfs_default_fileop_getattr} },
- { .kf_fileop = KERNFS_FILEOP_IOCTL },
- { .kf_fileop = KERNFS_FILEOP_MMAP },
- { .kf_fileop = KERNFS_FILEOP_CLOSE },
- { .kf_fileop = KERNFS_FILEOP_READ, .kf_genop = {kernfs_default_xread} },
- { .kf_fileop = KERNFS_FILEOP_WRITE, .kf_genop = {kernfs_default_xwrite} },
-};
-
-int kernfs_lookup __P((void *));
-#define kernfs_create genfs_eopnotsupp
-#define kernfs_mknod genfs_eopnotsupp
-int kernfs_open __P((void *));
-int kernfs_close __P((void *));
-int kernfs_access __P((void *));
-int kernfs_getattr __P((void *));
-int kernfs_setattr __P((void *));
-int kernfs_read __P((void *));
-int kernfs_write __P((void *));
-#define kernfs_fcntl genfs_fcntl
-int kernfs_ioctl __P((void *));
-#define kernfs_poll genfs_poll
-#define kernfs_revoke genfs_revoke
-int kernfs_mmap __P((void *));
-#define kernfs_fsync genfs_nullop
-#define kernfs_seek genfs_nullop
-#define kernfs_remove genfs_eopnotsupp
-int kernfs_link __P((void *));
-#define kernfs_rename genfs_eopnotsupp
-#define kernfs_mkdir genfs_eopnotsupp
-#define kernfs_rmdir genfs_eopnotsupp
-int kernfs_symlink __P((void *));
-int kernfs_readdir __P((void *));
-#define kernfs_readlink genfs_eopnotsupp
-#define kernfs_abortop genfs_abortop
-int kernfs_inactive __P((void *));
-int kernfs_reclaim __P((void *));
-#define kernfs_lock genfs_lock
-#define kernfs_unlock genfs_unlock
-#define kernfs_bmap genfs_badop
-#define kernfs_strategy genfs_badop
-int kernfs_print __P((void *));
-#define kernfs_islocked genfs_islocked
-int kernfs_pathconf __P((void *));
-#define kernfs_advlock genfs_einval
-#define kernfs_blkatoff genfs_eopnotsupp
-#define kernfs_valloc genfs_eopnotsupp
-#define kernfs_vfree genfs_nullop
-#define kernfs_truncate genfs_eopnotsupp
-#define kernfs_update genfs_nullop
-#define kernfs_bwrite genfs_eopnotsupp
-#define kernfs_putpages genfs_putpages
-
-static int kernfs_xread __P((struct kernfs_node *, int, char **, size_t,
size_t *));
-static int kernfs_xwrite __P((const struct kernfs_node *, char *, size_t));
-
-int (**kernfs_vnodeop_p) __P((void *));
-const struct vnodeopv_entry_desc kernfs_vnodeop_entries[] = {
- { &vop_default_desc, vn_default_error },
- { &vop_lookup_desc, kernfs_lookup }, /* lookup */
- { &vop_create_desc, kernfs_create }, /* create */
- { &vop_mknod_desc, kernfs_mknod }, /* mknod */
- { &vop_open_desc, kernfs_open }, /* open */
- { &vop_close_desc, kernfs_close }, /* close */
- { &vop_access_desc, kernfs_access }, /* access */
- { &vop_getattr_desc, kernfs_getattr }, /* getattr */
- { &vop_setattr_desc, kernfs_setattr }, /* setattr */
- { &vop_read_desc, kernfs_read }, /* read */
- { &vop_write_desc, kernfs_write }, /* write */
- { &vop_fcntl_desc, kernfs_fcntl }, /* fcntl */
- { &vop_ioctl_desc, kernfs_ioctl }, /* ioctl */
- { &vop_poll_desc, kernfs_poll }, /* poll */
- { &vop_revoke_desc, kernfs_revoke }, /* revoke */
- { &vop_mmap_desc, kernfs_mmap }, /* mmap */
- { &vop_fsync_desc, kernfs_fsync }, /* fsync */
- { &vop_seek_desc, kernfs_seek }, /* seek */
- { &vop_remove_desc, kernfs_remove }, /* remove */
- { &vop_link_desc, kernfs_link }, /* link */
- { &vop_rename_desc, kernfs_rename }, /* rename */
- { &vop_mkdir_desc, kernfs_mkdir }, /* mkdir */
- { &vop_rmdir_desc, kernfs_rmdir }, /* rmdir */
- { &vop_symlink_desc, kernfs_symlink }, /* symlink */
- { &vop_readdir_desc, kernfs_readdir }, /* readdir */
- { &vop_readlink_desc, kernfs_readlink }, /* readlink */
- { &vop_abortop_desc, kernfs_abortop }, /* abortop */
- { &vop_inactive_desc, kernfs_inactive }, /* inactive */
- { &vop_reclaim_desc, kernfs_reclaim }, /* reclaim */
- { &vop_lock_desc, kernfs_lock }, /* lock */
- { &vop_unlock_desc, kernfs_unlock }, /* unlock */
- { &vop_bmap_desc, kernfs_bmap }, /* bmap */
- { &vop_strategy_desc, kernfs_strategy }, /* strategy */
- { &vop_print_desc, kernfs_print }, /* print */
- { &vop_islocked_desc, kernfs_islocked }, /* islocked */
- { &vop_pathconf_desc, kernfs_pathconf }, /* pathconf */
- { &vop_advlock_desc, kernfs_advlock }, /* advlock */
- { &vop_blkatoff_desc, kernfs_blkatoff }, /* blkatoff */
- { &vop_valloc_desc, kernfs_valloc }, /* valloc */
- { &vop_vfree_desc, kernfs_vfree }, /* vfree */
- { &vop_truncate_desc, kernfs_truncate }, /* truncate */
- { &vop_update_desc, kernfs_update }, /* update */
- { &vop_bwrite_desc, kernfs_bwrite }, /* bwrite */
- { &vop_putpages_desc, kernfs_putpages }, /* putpages */
- { NULL, NULL }
-};
-const struct vnodeopv_desc kernfs_vnodeop_opv_desc =
- { &kernfs_vnodeop_p, kernfs_vnodeop_entries };
-
-static __inline int
-kernfs_fileop_compare(struct kernfs_fileop *a, struct kernfs_fileop *b)
-{
- if (a->kf_type < b->kf_type)
- return -1;
- if (a->kf_type > b->kf_type)
- return 1;
- if (a->kf_fileop < b->kf_fileop)
- return -1;
- if (a->kf_fileop > b->kf_fileop)
- return 1;
- return (0);
-}
-
-SPLAY_HEAD(kfsfileoptree, kernfs_fileop) kfsfileoptree =
- SPLAY_INITIALIZER(kfsfileoptree);
-SPLAY_PROTOTYPE(kfsfileoptree, kernfs_fileop, kf_node, kernfs_fileop_compare);
-SPLAY_GENERATE(kfsfileoptree, kernfs_fileop, kf_node, kernfs_fileop_compare);
-
-kfstype
-kernfs_alloctype(int nkf, const struct kernfs_fileop *kf)
-{
- static u_char nextfreetype = KFSlasttype;
- struct kernfs_fileop *dkf, *fkf, skf;
- int i;
-
- /* XXX need to keep track of dkf's memory if we support
- deallocating types */
- dkf = malloc(sizeof(kernfs_default_fileops), M_TEMP, M_WAITOK);
- memcpy(dkf, kernfs_default_fileops, sizeof(kernfs_default_fileops));
-
- for (i = 0; i < sizeof(kernfs_default_fileops) /
- sizeof(kernfs_default_fileops[0]); i++) {
- dkf[i].kf_type = nextfreetype;
- SPLAY_INSERT(kfsfileoptree, &kfsfileoptree, &dkf[i]);
- }
-
- for (i = 0; i < nkf; i++) {
- skf.kf_type = nextfreetype;
- skf.kf_fileop = kf[i].kf_fileop;
- if ((fkf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf)))
- fkf->kf_genop = kf[i].kf_genop;
- }
-
- return nextfreetype++;
-}
-
-int
-kernfs_try_fileop(kfstype type, kfsfileop fileop, void *v, int error)
-{
- const struct kernfs_fileop *kf;
- struct kernfs_fileop skf;
-
- skf.kf_type = type;
- skf.kf_fileop = fileop;
- kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf);
- if (kf == NULL)
- kf = &kernfs_default_fileops[fileop];
- if (kf->kf_vop)
- return kf->kf_vop(v);
- return error;
-}
-
-int
-kernfs_try_xread(kfstype type, const struct kernfs_node *kfs, int off,
- char **bufp, size_t len, size_t *wrlen, int error)
-{
- const struct kernfs_fileop *kf;
- struct kernfs_fileop skf;
-
- skf.kf_type = type;
- skf.kf_fileop = KERNFS_XREAD;
- kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf);
- if (kf == NULL)
- kf = &kernfs_default_fileops[KERNFS_XREAD];
- if (kf->kf_xread)
- return kf->kf_xread(kfs, off, bufp, len, wrlen);
- *wrlen = 0;
- return error;
-}
-
-int
-kernfs_try_xwrite(kfstype type, const struct kernfs_node *kfs, char *buf,
- size_t len, int error)
-{
- const struct kernfs_fileop *kf;
- struct kernfs_fileop skf;
-
- skf.kf_type = type;
- skf.kf_fileop = KERNFS_XWRITE;
- kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf);
- if (kf == NULL)
- kf = &kernfs_default_fileops[KERNFS_XWRITE];
- if (kf->kf_xwrite)
- return kf->kf_xwrite(kfs, buf, len);
- return error;
-}
-
-int
-kernfs_addentry(kernfs_parentdir_t *pkt, kernfs_entry_t *dkt)
-{
- struct kernfs_subdir *ks, *parent;
-
- if (pkt == NULL) {
- SIMPLEQ_INSERT_TAIL(&dyn_kern_targets, dkt, dkt_queue);
- nkern_targets++;
- if (dkt->dkt_kt.kt_vtype == VDIR)
- nkern_dirs++;
- } else {
- parent = (struct kernfs_subdir *)pkt->kt_data;
- SIMPLEQ_INSERT_TAIL(&parent->ks_entries, dkt, dkt_queue);
- parent->ks_nentries++;
- if (dkt->dkt_kt.kt_vtype == VDIR)
- parent->ks_dirs++;
- }
- if (dkt->dkt_kt.kt_vtype == VDIR && dkt->dkt_kt.kt_data == NULL) {
- ks = malloc(sizeof(struct kernfs_subdir),
- M_TEMP, M_WAITOK);
- SIMPLEQ_INIT(&ks->ks_entries);
- ks->ks_nentries = 2; /* . and .. */
- ks->ks_dirs = 2;
- ks->ks_parent = pkt ? pkt : &kern_targets[0];
- dkt->dkt_kt.kt_data = ks;
- }
- return 0;
-}
-
-static int
-kernfs_xread(kfs, off, bufp, len, wrlen)
- struct kernfs_node *kfs;
- int off;
- char **bufp;
- size_t len;
- size_t *wrlen;
-{
- const struct kern_target *kt;
-#ifdef IPSEC
- struct mbuf *m;
-#endif
-
- kt = kfs->kfs_kt;
-
- switch (kfs->kfs_type) {
- case KFStime: {
- struct timeval tv;
-
- microtime(&tv);
- snprintf(*bufp, len, "%ld %ld\n", tv.tv_sec, tv.tv_usec);
- break;
- }
-
- case KFSint: {
- int *ip = kt->kt_data;
-
- snprintf(*bufp, len, "%d\n", *ip);
- break;
- }
-
- case KFSstring: {
- char *cp = kt->kt_data;
-
- *bufp = cp;
- break;
- }
-
- case KFSmsgbuf: {
- long n;
-
- /*
- * deal with cases where the message buffer has
- * become corrupted.
- */
- if (!msgbufenabled || msgbufp->msg_magic != MSG_MAGIC) {
- msgbufenabled = 0;
- return (ENXIO);
- }
-
- /*
- * Note that reads of /kern/msgbuf won't necessarily yield
- * consistent results, if the message buffer is modified
- * while the read is in progress. The worst that can happen
- * is that incorrect data will be read. There's no way
- * that this can crash the system unless the values in the
- * message buffer header are corrupted, but that'll cause
- * the system to die anyway.
- */
- if (off >= msgbufp->msg_bufs) {
- *wrlen = 0;
- return (0);
- }
- n = msgbufp->msg_bufx + off;
- if (n >= msgbufp->msg_bufs)
- n -= msgbufp->msg_bufs;
- len = min(msgbufp->msg_bufs - n, msgbufp->msg_bufs - off);
- *bufp = msgbufp->msg_bufc + n;
- *wrlen = len;
- return (0);
- }
-
- case KFShostname: {
- char *cp = hostname;
- int xlen = hostnamelen;
-
- if (xlen >= (len - 2))
- return (EINVAL);
-
- memcpy(*bufp, cp, xlen);
- (*bufp)[xlen] = '\n';
- (*bufp)[xlen+1] = '\0';
- len = strlen(*bufp);
- break;
- }
-
- case KFSavenrun:
- averunnable.fscale = FSCALE;
- snprintf(*bufp, len, "%d %d %d %ld\n",
- averunnable.ldavg[0], averunnable.ldavg[1],
- averunnable.ldavg[2], averunnable.fscale);
- break;
-
-#ifdef IPSEC
- case KFSipsecsa:
- /*
- * Note that SA configuration could be changed during the
- * read operation, resulting in garbled output.
- */
- m = key_setdumpsa_spi(htonl(kfs->kfs_value));
- if (!m)
- return (ENOBUFS);
- if (off >= m->m_pkthdr.len) {
- *wrlen = 0;
- m_freem(m);
- return (0);
- }
- if (len > m->m_pkthdr.len - off)
- len = m->m_pkthdr.len - off;
- m_copydata(m, off, len, *bufp);
- *wrlen = len;
- m_freem(m);
- return (0);
-
- case KFSipsecsp:
- /*
- * Note that SP configuration could be changed during the
- * read operation, resulting in garbled output.
- */
- if (!kfs->kfs_v) {
- struct secpolicy *sp;
-
- sp = key_getspbyid(kfs->kfs_value);
- if (sp)
- kfs->kfs_v = sp;
- else
- return (ENOENT);
- }
- m = key_setdumpsp((struct secpolicy *)kfs->kfs_v,
- SADB_X_SPDGET, 0, 0);
- if (!m)
- return (ENOBUFS);
- if (off >= m->m_pkthdr.len) {
- *wrlen = 0;
- m_freem(m);
- return (0);
- }
- if (len > m->m_pkthdr.len - off)
- len = m->m_pkthdr.len - off;
- m_copydata(m, off, len, *bufp);
- *wrlen = len;
- m_freem(m);
- return (0);
-#endif
-
- default:
- return kernfs_try_xread(kfs->kfs_type, kfs, off, bufp, len,
- wrlen, 0);
- }
-
- len = strlen(*bufp);
- if (len <= off)
- *wrlen = 0;
- else {
- *bufp += off;
- *wrlen = len - off;
- }
- return (0);
-}
-
-static int
-kernfs_xwrite(kfs, buf, len)
- const struct kernfs_node *kfs;
- char *buf;
- size_t len;
-{
-
- switch (kfs->kfs_type) {
- case KFShostname:
- if (buf[len-1] == '\n')
- --len;
- memcpy(hostname, buf, len);
- hostname[len] = '\0';
- hostnamelen = (size_t) len;
- return (0);
-
- default:
- return kernfs_try_xwrite(kfs->kfs_type, kfs, buf, len, EIO);
- }
-}
-
-
-/*
- * vp is the current namei directory
- * ndp is the name to locate in that directory...
- */
-int
-kernfs_lookup(v)
- void *v;
-{
- struct vop_lookup_args /* {
- struct vnode * a_dvp;
- struct vnode ** a_vpp;
- struct componentname * a_cnp;
- } */ *ap = v;
- struct componentname *cnp = ap->a_cnp;
- struct vnode **vpp = ap->a_vpp;
- struct vnode *dvp = ap->a_dvp;
- const char *pname = cnp->cn_nameptr;
- const struct kernfs_node *kfs;
- const struct kern_target *kt;
- const struct dyn_kern_target *dkt;
- const struct kernfs_subdir *ks;
- int error, i, wantpunlock;
-#ifdef IPSEC
- char *ep;
- u_int32_t id;
-#endif
-
- *vpp = NULLVP;
- cnp->cn_flags &= ~PDIRUNLOCK;
-
- if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
- return (EROFS);
-
- if (cnp->cn_namelen == 1 && *pname == '.') {
- *vpp = dvp;
- VREF(dvp);
- return (0);
- }
-
- wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN));
- kfs = VTOKERN(dvp);
- switch (kfs->kfs_type) {
- case KFSkern:
- /*
- * Shouldn't get here with .. in the root node.
- */
- if (cnp->cn_flags & ISDOTDOT)
- return (EIO);
-
- for (i = 0; i < static_nkern_targets; i++) {
- kt = &kern_targets[i];
- if (cnp->cn_namelen == kt->kt_namlen &&
- memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0)
- goto found;
- }
- SIMPLEQ_FOREACH(dkt, &dyn_kern_targets, dkt_queue) {
- if (cnp->cn_namelen == dkt->dkt_kt.kt_namlen &&
- memcmp(dkt->dkt_kt.kt_name, pname, cnp->cn_namelen)
== 0) {
- kt = &dkt->dkt_kt;
- goto found;
- }
- }
- break;
-
- found:
- error = kernfs_allocvp(dvp->v_mount, vpp, kt->kt_tag, kt, 0);
- if ((error == 0) && wantpunlock) {
- VOP_UNLOCK(dvp, 0);
- cnp->cn_flags |= PDIRUNLOCK;
- }
- return (error);
-
- case KFSsubdir:
- ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data;
- if (cnp->cn_flags & ISDOTDOT) {
- kt = ks->ks_parent;
- goto found;
- }
-
- SIMPLEQ_FOREACH(dkt, &ks->ks_entries, dkt_queue) {
- if (cnp->cn_namelen == dkt->dkt_kt.kt_namlen &&
- memcmp(dkt->dkt_kt.kt_name, pname, cnp->cn_namelen)
== 0) {
- kt = &dkt->dkt_kt;
- goto found;
- }
- }
- break;
-
-#ifdef IPSEC
- case KFSipsecsadir:
- if (cnp->cn_flags & ISDOTDOT) {
- kt = &kern_targets[0];
- goto found;
- }
-
- for (i = 2; i < nipsecsa_targets; i++) {
- kt = &ipsecsa_targets[i];
- if (cnp->cn_namelen == kt->kt_namlen &&
- memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0)
- goto found;
- }
-
- ep = NULL;
- id = strtoul(pname, &ep, 10);
- if (!ep || *ep || ep == pname)
- break;
-
- error = kernfs_allocvp(dvp->v_mount, vpp, KFSipsecsa,
&ipsecsa_kt, id);
- if ((error == 0) && wantpunlock) {
- VOP_UNLOCK(dvp, 0);
- cnp->cn_flags |= PDIRUNLOCK;
- }
- return (error);
-
- case KFSipsecspdir:
- if (cnp->cn_flags & ISDOTDOT) {
- kt = &kern_targets[0];
- goto found;
- }
-
- for (i = 2; i < nipsecsp_targets; i++) {
- kt = &ipsecsp_targets[i];
- if (cnp->cn_namelen == kt->kt_namlen &&
- memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0)
- goto found;
- }
-
- ep = NULL;
- id = strtoul(pname, &ep, 10);
- if (!ep || *ep || ep == pname)
- break;
-
- error = kernfs_allocvp(dvp->v_mount, vpp, KFSipsecsp,
&ipsecsp_kt, id);
- if ((error == 0) && wantpunlock) {
- VOP_UNLOCK(dvp, 0);
- cnp->cn_flags |= PDIRUNLOCK;
- }
- return (error);
-#endif
-
- default:
- return (ENOTDIR);
- }
-
- return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
-}
-
-int
-kernfs_open(v)
- void *v;
-{
- struct vop_open_args /* {
- struct vnode *a_vp;
- int a_mode;
- struct ucred *a_cred;
- struct proc *a_p;
- } */ *ap = v;
- struct kernfs_node *kfs = VTOKERN(ap->a_vp);
-#ifdef IPSEC
- struct mbuf *m;
- struct secpolicy *sp;
-#endif
-
- switch (kfs->kfs_type) {
-#ifdef IPSEC
- case KFSipsecsa:
- m = key_setdumpsa_spi(htonl(kfs->kfs_value));
- if (m) {
- m_freem(m);
- return (0);
- } else
- return (ENOENT);
-
- case KFSipsecsp:
- sp = key_getspbyid(kfs->kfs_value);
- if (sp) {
- kfs->kfs_v = sp;
- return (0);
- } else
- return (ENOENT);
-#endif
-
- default:
- return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_OPEN,
- v, 0);
- }
-}
-
-int
-kernfs_close(v)
- void *v;
-{
- struct vop_close_args /* {
- struct vnode *a_vp;
- int a_fflag;
- struct ucred *a_cred;
- struct proc *a_p;
- } */ *ap = v;
- struct kernfs_node *kfs = VTOKERN(ap->a_vp);
-
- switch (kfs->kfs_type) {
-#ifdef IPSEC
- case KFSipsecsp:
- key_freesp((struct secpolicy *)kfs->kfs_v);
- break;
-#endif
-
- default:
- return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_CLOSE,
- v, 0);
- }
-
- return (0);
-}
-
-int
-kernfs_access(v)
- void *v;
-{
- struct vop_access_args /* {
- struct vnode *a_vp;
- int a_mode;
- struct ucred *a_cred;
- struct proc *a_p;
- } */ *ap = v;
- struct vattr va;
- int error;
-
- if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0)
- return (error);
-
- return (vaccess(va.va_type, va.va_mode, va.va_uid, va.va_gid,
- ap->a_mode, ap->a_cred));
-}
-
-static int
-kernfs_default_fileop_getattr(v)
- void *v;
-{
- struct vop_getattr_args /* {
- struct vnode *a_vp;
- struct vattr *a_vap;
- struct ucred *a_cred;
- struct proc *a_p;
- } */ *ap = v;
- struct vattr *vap = ap->a_vap;
-
- vap->va_nlink = 1;
- vap->va_bytes = vap->va_size = 0;
-
- return 0;
-}
-
-int
-kernfs_getattr(v)
- void *v;
-{
- struct vop_getattr_args /* {
- struct vnode *a_vp;
- struct vattr *a_vap;
- struct ucred *a_cred;
- struct proc *a_p;
- } */ *ap = v;
- struct kernfs_node *kfs = VTOKERN(ap->a_vp);
- struct kernfs_subdir *ks;
- struct vattr *vap = ap->a_vap;
- int error = 0;
- char strbuf[KSTRING], *buf;
- size_t nread, total;
-
- VATTR_NULL(vap);
- vap->va_type = ap->a_vp->v_type;
- vap->va_uid = 0;
- vap->va_gid = 0;
- vap->va_mode = kfs->kfs_mode;
- vap->va_fileid = kfs->kfs_fileno;
- vap->va_flags = 0;
- vap->va_size = 0;
- vap->va_blocksize = DEV_BSIZE;
- /*
- * Make all times be current TOD, except for the "boottime" node.
- * Avoid microtime(9), it's slow.
- * We don't guard the read from time(9) with splclock(9) since we
- * don't actually need to be THAT sure the access is atomic.
- */
- if (kfs->kfs_kt && kfs->kfs_kt->kt_namlen == 8 &&
- !memcmp(kfs->kfs_kt->kt_name, "boottime", 8)) {
- TIMEVAL_TO_TIMESPEC(&boottime, &vap->va_ctime);
- } else {
- TIMEVAL_TO_TIMESPEC(&time, &vap->va_ctime);
- }
- vap->va_atime = vap->va_mtime = vap->va_ctime;
- vap->va_gen = 0;
- vap->va_flags = 0;
- vap->va_rdev = 0;
- vap->va_bytes = 0;
-
- switch (kfs->kfs_type) {
- case KFSkern:
- vap->va_nlink = nkern_dirs;
- vap->va_bytes = vap->va_size = DEV_BSIZE;
- break;
-
- case KFSroot:
- vap->va_nlink = 1;
- vap->va_bytes = vap->va_size = DEV_BSIZE;
- break;
-
- case KFSsubdir:
- ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data;
- vap->va_nlink = ks->ks_dirs;
- vap->va_bytes = vap->va_size = DEV_BSIZE;
- break;
-
- case KFSnull:
- case KFStime:
- case KFSint:
- case KFSstring:
- case KFShostname:
- case KFSavenrun:
- case KFSdevice:
- case KFSmsgbuf:
-#ifdef IPSEC
- case KFSipsecsa:
- case KFSipsecsp:
-#endif
- vap->va_nlink = 1;
- total = 0;
- do {
- buf = strbuf;
- error = kernfs_xread(kfs, total, &buf,
- sizeof(strbuf), &nread);
- total += nread;
- } while (error == 0 && nread != 0);
- vap->va_bytes = vap->va_size = total;
- break;
-
-#ifdef IPSEC
- case KFSipsecsadir:
- case KFSipsecspdir:
- vap->va_nlink = 2;
- vap->va_bytes = vap->va_size = DEV_BSIZE;
- break;
-#endif
-
- default:
- error = kernfs_try_fileop(kfs->kfs_type,
- KERNFS_FILEOP_GETATTR, v, EINVAL);
- break;
- }
-
- return (error);
-}
-
-/*ARGSUSED*/
-int
-kernfs_setattr(v)
- void *v;
-{
-
- /*
- * Silently ignore attribute changes.
- * This allows for open with truncate to have no
- * effect until some data is written. I want to
- * do it this way because all writes are atomic.
- */
- return (0);
-}
-
-static int
-kernfs_default_xread(v)
- void *v;
-{
- struct vop_read_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- int a_ioflag;
- struct ucred *a_cred;
- } */ *ap = v;
- struct uio *uio = ap->a_uio;
- struct kernfs_node *kfs = VTOKERN(ap->a_vp);
- char strbuf[KSTRING], *buf;
- off_t off;
- size_t len;
- int error;
-
- if (ap->a_vp->v_type == VDIR)
- return (EOPNOTSUPP);
-
- off = uio->uio_offset;
- buf = strbuf;
- if ((error = kernfs_xread(kfs, off, &buf, sizeof(strbuf), &len)) == 0)
- error = uiomove(buf, len, uio);
- return (error);
-}
-
-int
-kernfs_read(v)
- void *v;
-{
- struct vop_read_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- int a_ioflag;
- struct ucred *a_cred;
- } */ *ap = v;
- struct kernfs_node *kfs = VTOKERN(ap->a_vp);
-
- return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_READ, v, 0);
-}
-
-static int
-kernfs_default_xwrite(v)
- void *v;
-{
- struct vop_write_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- int a_ioflag;
- struct ucred *a_cred;
- } */ *ap = v;
- struct kernfs_node *kfs = VTOKERN(ap->a_vp);
- struct uio *uio = ap->a_uio;
- int error, xlen;
- char strbuf[KSTRING];
-
- if (uio->uio_offset != 0)
- return (EINVAL);
-
- xlen = min(uio->uio_resid, KSTRING-1);
- if ((error = uiomove(strbuf, xlen, uio)) != 0)
- return (error);
-
- if (uio->uio_resid != 0)
- return (EIO);
-
- strbuf[xlen] = '\0';
- xlen = strlen(strbuf);
- return (kernfs_xwrite(kfs, strbuf, xlen));
-}
-
-int
-kernfs_write(v)
- void *v;
-{
- struct vop_write_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- int a_ioflag;
- struct ucred *a_cred;
- } */ *ap = v;
- struct kernfs_node *kfs = VTOKERN(ap->a_vp);
-
- return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_WRITE, v, 0);
-}
-
-int
-kernfs_ioctl(v)
- void *v;
-{
- struct vop_ioctl_args /* {
- const struct vnodeop_desc *a_desc;
- struct vnode *a_vp;
- u_long a_command;
- void *a_data;
- int a_fflag;
- struct ucred *a_cred;
- struct proc *a_p;
- } */ *ap = v;
- struct kernfs_node *kfs = VTOKERN(ap->a_vp);
-
- return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_IOCTL, v,
- EPASSTHROUGH);
-}
-
-int
-kernfs_mmap(v)
- void *v;
-{
- struct vop_mmap_args /* {
- const struct vnodeop_desc *a_desc;
- struct vnode *a_vp;
- int a_fflags;
- struct ucred *a_cred;
- struct proc *a_p;
- } */ *ap = v;
- struct kernfs_node *kfs = VTOKERN(ap->a_vp);
-
- return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_MMAP, v, 0);
-}
-
-static int
-kernfs_setdirentfileno_kt(struct dirent *d, const struct kern_target *kt,
- u_int32_t value, struct vop_readdir_args *ap)
-{
- struct kernfs_node *kfs;
- struct vnode *vp;
- int error;
-
- if ((error = kernfs_allocvp(ap->a_vp->v_mount, &vp, kt->kt_tag, kt,
- value)) != 0)
- return error;
- if (kt->kt_tag == KFSdevice) {
- struct vattr va;
- if ((error = VOP_GETATTR(vp, &va, ap->a_cred,
- ap->a_uio->uio_segflg == UIO_USERSPACE ?
- ap->a_uio->uio_procp : &proc0)) != 0)
- return (error);
- d->d_fileno = va.va_fileid;
- } else {
- kfs = VTOKERN(vp);
- d->d_fileno = kfs->kfs_fileno;
- }
- vput(vp);
- return 0;
-}
-
-static int
-kernfs_setdirentfileno(struct dirent *d, off_t entry,
- struct kernfs_node *thisdir_kfs, const struct kern_target *parent_kt,
- const struct kern_target *kt, struct vop_readdir_args *ap)
-{
- const struct kern_target *ikt;
- int error;
-
- switch (entry) {
- case 0:
- d->d_fileno = thisdir_kfs->kfs_fileno;
- return 0;
- case 1:
- ikt = parent_kt;
- break;
- default:
- ikt = kt;
- break;
- }
- if (ikt != thisdir_kfs->kfs_kt) {
- if ((error = kernfs_setdirentfileno_kt(d, ikt, 0, ap)) != 0)
- return error;
- } else
- d->d_fileno = thisdir_kfs->kfs_fileno;
- return 0;
-}
-
-int
-kernfs_readdir(v)
- void *v;
-{
- struct vop_readdir_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- struct ucred *a_cred;
- int *a_eofflag;
- off_t **a_cookies;
- int a_*ncookies;
- } */ *ap = v;
- struct uio *uio = ap->a_uio;
- struct dirent d;
- struct kernfs_node *kfs = VTOKERN(ap->a_vp);
- const struct kern_target *kt;
- const struct dyn_kern_target *dkt = NULL;
- const struct kernfs_subdir *ks;
- off_t i, j;
- int error;
- off_t *cookies = NULL;
- int ncookies = 0, n;
-#ifdef IPSEC
- struct secasvar *sav, *sav2;
- struct secpolicy *sp;
-#endif
-
- if (uio->uio_resid < UIO_MX)
- return (EINVAL);
- if (uio->uio_offset < 0)
- return (EINVAL);
-
- error = 0;
- i = uio->uio_offset;
- memset(&d, 0, sizeof(d));
- d.d_reclen = UIO_MX;
- ncookies = uio->uio_resid / UIO_MX;
-
- switch (kfs->kfs_type) {
- case KFSkern:
- if (i >= nkern_targets)
- return (0);
-
- if (ap->a_ncookies) {
- ncookies = min(ncookies, (nkern_targets - i));
- cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
- M_WAITOK);
- *ap->a_cookies = cookies;
- }
-
- n = 0;
- for (; i < nkern_targets && uio->uio_resid >= UIO_MX; i++) {
- if (i < static_nkern_targets)
- kt = &kern_targets[i];
- else {
- if (dkt == NULL) {
- dkt = SIMPLEQ_FIRST(&dyn_kern_targets);
- for (j = static_nkern_targets; j < i &&
- dkt != NULL; j++)
- dkt = SIMPLEQ_NEXT(dkt,
dkt_queue);
- if (j != i)
- break;
- } else {
- dkt = SIMPLEQ_NEXT(dkt, dkt_queue);
- if (dkt == NULL)
- break;
- }
- kt = &dkt->dkt_kt;
- }
- if (kt->kt_tag == KFSdevice) {
- dev_t *dp = kt->kt_data;
- struct vnode *fvp;
-
- if (*dp == NODEV ||
- !vfinddev(*dp, kt->kt_vtype, &fvp))
- continue;
- }
- d.d_namlen = kt->kt_namlen;
- if ((error = kernfs_setdirentfileno(&d, i, kfs,
- &kern_targets[0], kt, ap)) != 0)
- break;
- memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
- d.d_type = kt->kt_type;
- if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
- break;
- if (cookies)
- *cookies++ = i + 1;
- n++;
- }
- ncookies = n;
- break;
-
- case KFSroot:
- if (i >= 2)
- return 0;
-
- if (ap->a_ncookies) {
- ncookies = min(ncookies, (2 - i));
- cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
- M_WAITOK);
- *ap->a_cookies = cookies;
- }
-
- n = 0;
- for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
- kt = &kern_targets[i];
- d.d_namlen = kt->kt_namlen;
- d.d_fileno = KERNFS_FILENO(kt, kt->kt_tag, 0);
- memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
- d.d_type = kt->kt_type;
- if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
- break;
- if (cookies)
- *cookies++ = i + 1;
- n++;
- }
- ncookies = n;
- break;
-
- case KFSsubdir:
- ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data;
- if (i >= ks->ks_nentries)
- return (0);
-
- if (ap->a_ncookies) {
- ncookies = min(ncookies, (ks->ks_nentries - i));
- cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
- M_WAITOK);
- *ap->a_cookies = cookies;
- }
-
- dkt = SIMPLEQ_FIRST(&ks->ks_entries);
- for (j = 0; j < i && dkt != NULL; j++)
- dkt = SIMPLEQ_NEXT(dkt, dkt_queue);
- n = 0;
- for (; i < ks->ks_nentries && uio->uio_resid >= UIO_MX; i++) {
- if (i < 2)
- kt = &subdir_targets[i];
- else {
- /* check if ks_nentries lied to us */
- if (dkt == NULL)
- break;
- kt = &dkt->dkt_kt;
- dkt = SIMPLEQ_NEXT(dkt, dkt_queue);
- }
- if (kt->kt_tag == KFSdevice) {
- dev_t *dp = kt->kt_data;
- struct vnode *fvp;
-
- if (*dp == NODEV ||
- !vfinddev(*dp, kt->kt_vtype, &fvp))
- continue;
- }
- d.d_namlen = kt->kt_namlen;
- if ((error = kernfs_setdirentfileno(&d, i, kfs,
- ks->ks_parent, kt, ap)) != 0)
- break;
- memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
- d.d_type = kt->kt_type;
- if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
- break;
- if (cookies)
- *cookies++ = i + 1;
- n++;
- }
- ncookies = n;
- break;
-
-#ifdef IPSEC
- case KFSipsecsadir:
- /* count SA in the system */
- n = 0;
- TAILQ_FOREACH(sav, &satailq, tailq) {
- for (sav2 = TAILQ_FIRST(&satailq);
- sav2 != sav;
- sav2 = TAILQ_NEXT(sav2, tailq)) {
- if (sav->spi == sav2->spi) {
- /* multiple SA with same SPI */
- break;
- }
- }
- if (sav == sav2 || sav->spi != sav2->spi)
- n++;
- }
-
- if (i >= nipsecsa_targets + n)
- return (0);
-
- if (ap->a_ncookies) {
- ncookies = min(ncookies, (n - i));
- cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
- M_WAITOK);
- *ap->a_cookies = cookies;
- }
-
- n = 0;
- for (; i < nipsecsa_targets && uio->uio_resid >= UIO_MX; i++) {
- kt = &ipsecsa_targets[i];
- d.d_namlen = kt->kt_namlen;
- if ((error = kernfs_setdirentfileno(&d, i, kfs,
- &kern_targets[0], kt, ap)) != 0)
- break;
- memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
- d.d_type = kt->kt_type;
- if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
- break;
- if (cookies)
- *cookies++ = i + 1;
- n++;
- }
- if (error) {
- ncookies = n;
- break;
- }
-
- TAILQ_FOREACH(sav, &satailq, tailq) {
- for (sav2 = TAILQ_FIRST(&satailq);
- sav2 != sav;
- sav2 = TAILQ_NEXT(sav2, tailq)) {
- if (sav->spi == sav2->spi) {
- /* multiple SA with same SPI */
- break;
- }
- }
- if (sav != sav2 && sav->spi == sav2->spi)
- continue;
- if (uio->uio_resid < UIO_MX)
- break;
- if ((error = kernfs_setdirentfileno_kt(&d, &ipsecsa_kt,
- sav->spi, ap)) != 0)
- break;
- d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
- "%u", ntohl(sav->spi));
- d.d_type = DT_REG;
- if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
- break;
- if (cookies)
- *cookies++ = i + 1;
- n++;
- i++;
- }
- ncookies = n;
- break;
-
- case KFSipsecspdir:
- /* count SP in the system */
- n = 0;
- TAILQ_FOREACH(sp, &sptailq, tailq)
- n++;
-
- if (i >= nipsecsp_targets + n)
- return (0);
-
- if (ap->a_ncookies) {
- ncookies = min(ncookies, (n - i));
- cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
- M_WAITOK);
- *ap->a_cookies = cookies;
- }
-
- n = 0;
- for (; i < nipsecsp_targets && uio->uio_resid >= UIO_MX; i++) {
- kt = &ipsecsp_targets[i];
- d.d_namlen = kt->kt_namlen;
- if ((error = kernfs_setdirentfileno(&d, i, kfs,
- &kern_targets[0], kt, ap)) != 0)
- break;
- memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
- d.d_type = kt->kt_type;
- if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
- break;
- if (cookies)
- *cookies++ = i + 1;
- n++;
- }
- if (error) {
- ncookies = n;
- break;
- }
-
- TAILQ_FOREACH(sp, &sptailq, tailq) {
- if (uio->uio_resid < UIO_MX)
- break;
- if ((error = kernfs_setdirentfileno_kt(&d, &ipsecsp_kt,
- sp->id, ap)) != 0)
- break;
- d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
- "%u", sp->id);
- d.d_type = DT_REG;
- if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
- break;
- if (cookies)
- *cookies++ = i + 1;
- n++;
- i++;
- }
- ncookies = n;
- break;
-#endif
-
- default:
- error = ENOTDIR;
- break;
- }
-
- if (ap->a_ncookies) {
- if (error) {
- if (cookies)
- free(*ap->a_cookies, M_TEMP);
- *ap->a_ncookies = 0;
- *ap->a_cookies = NULL;
- } else
- *ap->a_ncookies = ncookies;
- }
-
- uio->uio_offset = i;
- return (error);
-}
-
-int
-kernfs_inactive(v)
- void *v;
-{
- struct vop_inactive_args /* {
- struct vnode *a_vp;
- struct proc *a_p;
- } */ *ap = v;
- struct vnode *vp = ap->a_vp;
- const struct kernfs_node *kfs = VTOKERN(ap->a_vp);
-#ifdef IPSEC
- struct mbuf *m;
- struct secpolicy *sp;
-#endif
-
- VOP_UNLOCK(vp, 0);
- switch (kfs->kfs_type) {
-#ifdef IPSEC
- case KFSipsecsa:
- m = key_setdumpsa_spi(htonl(kfs->kfs_value));
- if (m)
- m_freem(m);
- else
- vgone(vp);
- break;
- case KFSipsecsp:
- sp = key_getspbyid(kfs->kfs_value);
- if (sp)
- key_freesp(sp);
- else {
- /* should never happen as we hold a refcnt */
- vgone(vp);
- }
- break;
-#endif
- default:
- break;
- }
- return (0);
-}
-
-int
-kernfs_reclaim(v)
- void *v;
-{
- struct vop_reclaim_args /* {
- struct vnode *a_vp;
- } */ *ap = v;
-
- return (kernfs_freevp(ap->a_vp));
-}
-
-/*
- * Return POSIX pathconf information applicable to special devices.
- */
-int
-kernfs_pathconf(v)
- void *v;
-{
- struct vop_pathconf_args /* {
- struct vnode *a_vp;
- int a_name;
- register_t *a_retval;
- } */ *ap = v;
-
- switch (ap->a_name) {
- case _PC_LINK_MAX:
- *ap->a_retval = LINK_MAX;
- return (0);
- case _PC_MAX_CANON:
- *ap->a_retval = MAX_CANON;
- return (0);
- case _PC_MAX_INPUT:
- *ap->a_retval = MAX_INPUT;
- return (0);
- case _PC_PIPE_BUF:
- *ap->a_retval = PIPE_BUF;
- return (0);
- case _PC_CHOWN_RESTRICTED:
- *ap->a_retval = 1;
- return (0);
- case _PC_VDISABLE:
- *ap->a_retval = _POSIX_VDISABLE;
- return (0);
- case _PC_SYNC_IO:
- *ap->a_retval = 1;
- return (0);
- default:
- return (EINVAL);
- }
- /* NOTREACHED */
-}
-
-/*
- * Print out the contents of a /dev/fd vnode.
- */
-/* ARGSUSED */
-int
-kernfs_print(v)
- void *v;
-{
-
- printf("tag VT_KERNFS, kernfs vnode\n");
- return (0);
-}
-
-int
-kernfs_link(v)
- void *v;
-{
- struct vop_link_args /* {
- struct vnode *a_dvp;
- struct vnode *a_vp;
- struct componentname *a_cnp;
- } */ *ap = v;
-
- VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
- vput(ap->a_dvp);
- return (EROFS);
-}
-
-int
-kernfs_symlink(v)
- void *v;
-{
- struct vop_symlink_args /* {
- struct vnode *a_dvp;
- struct vnode **a_vpp;
- struct componentname *a_cnp;
- struct vattr *a_vap;
- char *a_target;
- } */ *ap = v;
-
- VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
- vput(ap->a_dvp);
- return (EROFS);
-}
diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/nfs/files.nfs
--- a/netbsd-2.0-xen-sparse/sys/nfs/files.nfs Sun Dec 4 17:24:24 2005
+++ /dev/null Sun Dec 4 19:12:00 2005
@@ -1,34 +0,0 @@
-# $NetBSD: files.nfs,v 1.3 2004/03/11 21:48:43 cl Exp $
-
-deffs fs_nfs.h NFS
-
-defflag opt_nfs_boot.h NFS_BOOT_BOOTP NFS_BOOT_BOOTPARAM NFS_BOOT_DHCP
- NFS_BOOT_GATEWAY NFS_BOOT_TCP
- NFS_BOOT_BOOTSTATIC
-
-defparam opt_nfs_boot.h NFS_BOOT_BOOTP_REQFILE NFS_BOOT_OPTIONS
- NFS_BOOT_RWSIZE
- NFS_BOOTSTATIC_MYIP NFS_BOOTSTATIC_GWIP
- NFS_BOOTSTATIC_MASK NFS_BOOTSTATIC_SERVADDR
- NFS_BOOTSTATIC_SERVER
-
-defflag opt_nfs.h NFS_V2_ONLY
-
-defflag NFSSERVER
-
-file nfs/krpc_subr.c nfs
-file nfs/nfs_bio.c nfs
-file nfs/nfs_boot.c nfs
-file nfs/nfs_bootdhcp.c nfs & (nfs_boot_bootp | nfs_boot_dhcp)
-file nfs/nfs_bootparam.c nfs & nfs_boot_bootparam
-file nfs/nfs_bootstatic.c nfs & nfs_boot_bootstatic
-file nfs/nfs_kq.c nfs
-file nfs/nfs_node.c nfs
-file nfs/nfs_nqlease.c nfsserver | nfs
-file nfs/nfs_serv.c nfsserver
-file nfs/nfs_socket.c nfsserver | nfs
-file nfs/nfs_srvcache.c nfsserver
-file nfs/nfs_subs.c nfsserver | nfs
-file nfs/nfs_syscalls.c nfsserver | nfs
-file nfs/nfs_vfsops.c nfs
-file nfs/nfs_vnops.c nfs
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|