# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1221198460 -32400
# Node ID ec8eaab557d867dca3e8cbb3e0384d797929102a
# Parent 4ddd63b4be9be2440d213da60b10c20327e5c515
# Parent 346c073ed6a4f0debca36588039d649e2efd93c3
merge with xen-unstable.hg
---
.hgignore | 1
Config.mk | 4
docs/misc/vtd.txt | 27
docs/src/user.tex | 4
stubdom/README | 8
tools/examples/init.d/xendomains | 6
tools/examples/xend-config.sxp | 4
tools/examples/xmexample.hvm | 2
tools/examples/xmexample.hvm-stubdom | 2
tools/flask/policy/Makefile | 234 +++++
tools/flask/policy/Rules.modular | 166 +++
tools/flask/policy/Rules.monolithic | 196 ++++
tools/flask/policy/policy/constraints | 27
tools/flask/policy/policy/flask/Makefile | 41
tools/flask/policy/policy/flask/access_vectors | 166 +++
tools/flask/policy/policy/flask/initial_sids | 17
tools/flask/policy/policy/flask/mkaccess_vector.sh | 227 +++++
tools/flask/policy/policy/flask/mkflask.sh | 95 ++
tools/flask/policy/policy/flask/security_classes | 20
tools/flask/policy/policy/global_booleans | 5
tools/flask/policy/policy/global_tunables | 6
tools/flask/policy/policy/mcs | 324 +++++++
tools/flask/policy/policy/mls | 354 ++++++++
tools/flask/policy/policy/modules.conf | 21
tools/flask/policy/policy/modules/xen/xen.if | 1
tools/flask/policy/policy/modules/xen/xen.te | 135 +++
tools/flask/policy/policy/support/loadable_module.spt | 166 +++
tools/flask/policy/policy/support/misc_macros.spt | 32
tools/flask/policy/policy/systemuser | 19
tools/flask/policy/policy/users | 39
tools/ioemu/hw/cirrus_vga.c | 3
tools/ioemu/hw/pass-through.c | 146 +++
tools/ioemu/hw/pass-through.h | 15
tools/ioemu/hw/pci.c | 5
tools/ioemu/hw/pt-msi.c | 2
tools/ioemu/hw/vga.c | 8
tools/ioemu/hw/xen_machine_fv.c | 4
tools/ioemu/vl.h | 2
tools/libxc/ia64/xc_ia64_linux_save.c | 6
tools/libxc/xc_domain_save.c | 65 -
tools/libxc/xc_evtchn.c | 15
tools/libxc/xc_private.c | 10
tools/libxc/xenctrl.h | 6
tools/libxc/xenguest.h | 2
tools/python/Makefile | 26
tools/python/xen/util/xsconstants.py | 6
tools/python/xen/util/xsm/flask/flask.py | 8
tools/python/xen/util/xsm/xsm.py | 20
tools/python/xen/xend/XendConfig.py | 2
tools/python/xen/xend/XendDomainInfo.py | 6
tools/python/xen/xend/XendOptions.py | 8
tools/python/xen/xend/server/blkif.py | 2
tools/python/xen/xend/server/netif.py | 2
tools/python/xen/xend/server/pciif.py | 2
tools/python/xen/xm/create.py | 6
tools/python/xen/xm/main.py | 2
tools/xcutils/lsevtchn.c | 48 -
tools/xcutils/xc_save.c | 117 +-
tools/xenstore/xs.c | 7
tools/xentrace/formats | 149 ++-
tools/xentrace/xentrace.c | 399 ++++++++-
xen/arch/x86/acpi/Makefile | 2
xen/arch/x86/acpi/cpu_idle.c | 434 ++-------
xen/arch/x86/acpi/cpufreq/cpufreq.c | 26
xen/arch/x86/acpi/cpufreq/powernow.c | 4
xen/arch/x86/acpi/cpuidle_menu.c | 132 +++
xen/arch/x86/domain.c | 24
xen/arch/x86/domain_build.c | 1
xen/arch/x86/domctl.c | 47 -
xen/arch/x86/hpet.c | 30
xen/arch/x86/hvm/hvm.c | 5
xen/arch/x86/hvm/svm/intr.c | 4
xen/arch/x86/hvm/svm/svm.c | 36
xen/arch/x86/hvm/vmx/intr.c | 2
xen/arch/x86/hvm/vmx/vmx.c | 49 -
xen/arch/x86/io_apic.c | 13
xen/arch/x86/irq.c | 23
xen/arch/x86/mm.c | 783 +++++++++++-------
xen/arch/x86/mm/hap/hap.c | 1
xen/arch/x86/mm/shadow/common.c | 71 +
xen/arch/x86/mm/shadow/multi.c | 210 ++++
xen/arch/x86/mm/shadow/private.h | 43
xen/arch/x86/physdev.c | 80 -
xen/arch/x86/platform_hypercall.c | 16
xen/arch/x86/smpboot.c | 40
xen/arch/x86/time.c | 7
xen/arch/x86/traps.c | 45 +
xen/common/domain.c | 4
xen/common/domctl.c | 19
xen/common/event_channel.c | 21
xen/common/rangeset.c | 9
xen/common/sched_credit.c | 5
xen/common/schedule.c | 123 ++
xen/common/sysctl.c | 12
xen/common/trace.c | 45 -
xen/drivers/acpi/hwregs.c | 2
xen/drivers/passthrough/iommu.c | 4
xen/drivers/passthrough/vtd/iommu.c | 22
xen/include/asm-ia64/shadow.h | 2
xen/include/asm-x86/bitops.h | 4
xen/include/asm-x86/guest_access.h | 6
xen/include/asm-x86/hvm/trace.h | 49 -
xen/include/asm-x86/io_apic.h | 2
xen/include/asm-x86/mm.h | 38
xen/include/asm-x86/msr-index.h | 12
xen/include/asm-x86/shadow.h | 2
xen/include/public/trace.h | 51 -
xen/include/xen/cpuidle.h | 82 +
xen/include/xen/iommu.h | 1
xen/include/xen/sched.h | 22
xen/include/xen/trace.h | 2
xen/include/xsm/xsm.h | 148 ++-
xen/xsm/dummy.c | 130 ++
xen/xsm/flask/hooks.c | 318 ++++++-
xen/xsm/flask/include/av_perm_to_string.h | 21
xen/xsm/flask/include/av_permissions.h | 63 -
xen/xsm/flask/include/flask.h | 11
xen/xsm/flask/include/initial_sid_to_string.h | 3
xen/xsm/flask/include/security.h | 6
xen/xsm/flask/ss/policydb.h | 13
xen/xsm/flask/ss/services.c | 40
121 files changed, 5439 insertions(+), 1429 deletions(-)
diff -r 4ddd63b4be9b -r ec8eaab557d8 .hgignore
--- a/.hgignore Fri Sep 12 14:32:45 2008 +0900
+++ b/.hgignore Fri Sep 12 14:47:40 2008 +0900
@@ -185,7 +185,6 @@
^tools/misc/xenperf$
^tools/pygrub/build/.*$
^tools/python/build/.*$
-^tools/python/xen/util/xsm/xsm\.py$
^tools/security/secpol_tool$
^tools/security/xen/.*$
^tools/security/xensec_tool$
diff -r 4ddd63b4be9b -r ec8eaab557d8 Config.mk
--- a/Config.mk Fri Sep 12 14:32:45 2008 +0900
+++ b/Config.mk Fri Sep 12 14:47:40 2008 +0900
@@ -86,11 +86,7 @@ QEMU_REMOTE=http://xenbits.xensource.com
# Mercurial in-tree version, or a local directory, or a git URL.
# CONFIG_QEMU ?= ioemu
# CONFIG_QEMU ?= ../qemu-xen.git
-ifeq ($(XEN_TARGET_ARCH),ia64)
-CONFIG_QEMU ?= ioemu
-else
CONFIG_QEMU ?= $(QEMU_REMOTE)
-endif
# Optional components
XENSTAT_XENTOP ?= y
diff -r 4ddd63b4be9b -r ec8eaab557d8 docs/misc/vtd.txt
--- a/docs/misc/vtd.txt Fri Sep 12 14:32:45 2008 +0900
+++ b/docs/misc/vtd.txt Fri Sep 12 14:47:40 2008 +0900
@@ -1,8 +1,9 @@ Title : How to do PCI Passthrough with
Title : How to do PCI Passthrough with VT-d
Authors : Allen Kay <allen.m.kay@xxxxxxxxx>
Weidong Han <weidong.han@xxxxxxxxx>
+ Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>
Created : October-24-2007
-Updated : August-06-2008
+Updated : September-09-2008
How to turn on VT-d in Xen
--------------------------
@@ -106,3 +107,27 @@ http://h10010.www1.hp.com/wwpc/us/en/en/
For more information, pls refer to http://wiki.xensource.com/xenwiki/VTdHowTo.
+
+Assigning devices to HVM domains
+--------------------------------
+
+Most device types such as NIC, HBA, EHCI and UHCI can be assigned to
+an HVM domain.
+
+But some devices have design features which make them unsuitable for
+assignment to an HVM domain. Examples include:
+
+ * Device has an internal resource, such as private memory, which is
+ mapped to memory address space with BAR (Base Address Register).
+ * Driver submits command with a pointer to a buffer within internal
+ resource. Device decodes the pointer (address), and accesses to the
+ buffer.
+
+In an HVM domain, the BAR is virtualized, and host-BAR value and
+guest-BAR value are different. The addresses of internal resource from
+device's view and driver's view are different. Similarly, the
+addresses of buffer within internal resource from device's view and
+driver's view are different. As a result, device can't access to the
+buffer specified by driver.
+
+Such devices assigned to HVM domain currently do not work.
diff -r 4ddd63b4be9b -r ec8eaab557d8 docs/src/user.tex
--- a/docs/src/user.tex Fri Sep 12 14:32:45 2008 +0900
+++ b/docs/src/user.tex Fri Sep 12 14:47:40 2008 +0900
@@ -4252,7 +4252,7 @@ directory of the Xen source distribution
\section{Online References}
The official Xen web site can be found at:
-\begin{quote} {\tt http://www.xensource.com}
+\begin{quote} {\tt http://www.xen.org}
\end{quote}
This contains links to the latest versions of all online
@@ -4282,7 +4282,7 @@ mailing lists and subscription informati
Subscribe at: \\
{\small {\tt http://lists.xensource.com/xen-announce}}
\item[xen-changelog@xxxxxxxxxxxxxxxxxxx] Changelog feed
- from the unstable and 2.0 trees - developer oriented. Subscribe at: \\
+ from the unstable and 3.x trees - developer oriented. Subscribe at: \\
{\small {\tt http://lists.xensource.com/xen-changelog}}
\end{description}
diff -r 4ddd63b4be9b -r ec8eaab557d8 stubdom/README
--- a/stubdom/README Fri Sep 12 14:32:45 2008 +0900
+++ b/stubdom/README Fri Sep 12 14:47:40 2008 +0900
@@ -27,7 +27,7 @@ device_model = '/usr/lib/xen/bin/stubdom
- disable anything related to dom0, like pty serial assignments
-Create /etc/xen/stubdom-hvmconfig (where "hvmconfig" is the name of your HVM
+Create /etc/xen/hvmconfig-dm (where "hvmconfig" is the name of your HVM
guest) with
kernel = "/usr/lib/xen/boot/ioemu-stubdom.gz"
@@ -52,7 +52,7 @@ vnc = 0
vnc = 0
sdl = 0
- - In stubdom-hvmconfig, set an sdl vfb:
+ - In hvmconfig-dm, set an sdl vfb:
vfb = [ 'type=sdl' ]
@@ -65,7 +65,7 @@ vnc = 1
vnc = 1
vnclisten = "172.30.206.1"
- - In stubdom-hvmconfig, fill the reserved vif with the same IP, for instance:
+ - In hvmconfig-dm, fill the reserved vif with the same IP, for instance:
vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,mac=aa:00:00:12:23:34']
@@ -76,7 +76,7 @@ vnc = 0
vnc = 0
sdl = 0
- - In stubdom-hvmconfig, set a vnc vfb:
+ - In hvmconfig-dm, set a vnc vfb:
vfb = [ 'type=vnc' ]
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/examples/init.d/xendomains
--- a/tools/examples/init.d/xendomains Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/examples/init.d/xendomains Fri Sep 12 14:47:40 2008 +0900
@@ -327,15 +327,17 @@ stop()
if test $id = 0; then continue; fi
echo -n " $name"
if test "$XENDOMAINS_AUTO_ONLY" = "true"; then
- case $name in
+ eval "
+ case \"\$name\" in
($NAMES)
# nothing
;;
(*)
- echo -n "(skip)"
+ echo -n '(skip)'
continue
;;
esac
+ "
fi
# XENDOMAINS_SYSRQ chould be something like just "s"
# or "s e i u" or even "s e s i u o"
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/examples/xend-config.sxp Fri Sep 12 14:47:40 2008 +0900
@@ -14,6 +14,10 @@
#(logfile /var/log/xen/xend.log)
#(loglevel DEBUG)
+# Uncomment the line below. Set the value to flask, acm, or dummy to
+# select a security module.
+
+#(xsm_module_name dummy)
# The Xen-API server configuration.
#
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/examples/xmexample.hvm Fri Sep 12 14:47:40 2008 +0900
@@ -220,7 +220,7 @@ serial='pty'
# Configure guest CPUID responses:
#
#cpuid=[ '1:ecx=xxxxxxxxxxx00xxxxxxxxxxxxxxxxxxx,
-# eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
+# eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
# - Unset the SSE4 features (CPUID.1[ECX][20-19])
# - Default behaviour for all other bits in ECX And EAX registers.
#
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/examples/xmexample.hvm-stubdom
--- a/tools/examples/xmexample.hvm-stubdom Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/examples/xmexample.hvm-stubdom Fri Sep 12 14:47:40 2008 +0900
@@ -236,7 +236,7 @@ stdvga=0
# Configure guest CPUID responses:
#
#cpuid=[ '1:ecx=xxxxxxxxxxx00xxxxxxxxxxxxxxxxxxx,
-# eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
+# eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
# - Unset the SSE4 features (CPUID.1[ECX][20-19])
# - Default behaviour for all other bits in ECX And EAX registers.
#
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/Makefile Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,234 @@
+#
+# Makefile for the security policy.
+#
+# Targets:
+#
+# install - compile and install the policy configuration, and context
files.
+# load - compile, install, and load the policy configuration.
+# reload - compile, install, and load/reload the policy configuration.
+# policy - compile the policy configuration locally for
testing/development.
+#
+# The default target is 'policy'.
+#
+
+########################################
+#
+# Configurable portions of the Makefile
+#
+
+# Policy version
+# By default, checkpolicy will create the highest
+# version policy it supports. Setting this will
+# override the version.
+OUTPUT_POLICY = 20
+
+# Policy Type
+# strict, targeted,
+# strict-mls, targeted-mls,
+# strict-mcs, targeted-mcs
+TYPE = strict
+
+# Policy Name
+# If set, this will be used as the policy
+# name. Otherwise the policy type will be
+# used for the name.
+NAME = xenrefpolicy
+
+# Distribution
+# Some distributions have portions of policy
+# for programs or configurations specific to the
+# distribution. Setting this will enable options
+# for the distribution.
+# redhat, gentoo, debian, and suse are current options.
+# Fedora users should enable redhat.
+#DISTRO =
+
+# Build monolithic policy. Putting n here
+# will build a loadable module policy.
+MONOLITHIC=y
+
+# Uncomment this to disable command echoing
+#QUIET:=@
+
+########################################
+#
+# NO OPTIONS BELOW HERE
+#
+
+# executable paths
+PREFIX := /usr
+BINDIR := $(PREFIX)/bin
+SBINDIR := $(PREFIX)/sbin
+CHECKPOLICY := $(BINDIR)/checkpolicy
+CHECKMODULE := $(BINDIR)/checkmodule
+SEMOD_PKG := $(BINDIR)/semodule_package
+LOADPOLICY := $(SBINDIR)/flask-loadpolicy
+
+CFLAGS := -Wall
+
+# policy source layout
+POLDIR := policy
+MODDIR := $(POLDIR)/modules
+FLASKDIR := $(POLDIR)/flask
+SECCLASS := $(FLASKDIR)/security_classes
+ISIDS := $(FLASKDIR)/initial_sids
+AVS := $(FLASKDIR)/access_vectors
+
+#policy building support tools
+SUPPORT := support
+FCSORT := tmp/fc_sort
+
+# config file paths
+GLOBALTUN := $(POLDIR)/global_tunables
+GLOBALBOOL := $(POLDIR)/global_booleans
+MOD_CONF := $(POLDIR)/modules.conf
+TUNABLES := $(POLDIR)/tunables.conf
+BOOLEANS := $(POLDIR)/booleans.conf
+
+# install paths
+TOPDIR = $(DESTDIR)/etc/xen/
+INSTALLDIR = $(TOPDIR)/$(NAME)
+SRCPATH = $(INSTALLDIR)/src
+USERPATH = $(INSTALLDIR)/users
+CONTEXTPATH = $(INSTALLDIR)/contexts
+
+# enable MLS if requested.
+ifneq ($(findstring -mls,$(TYPE)),)
+ override M4PARAM += -D enable_mls
+ CHECKPOLICY += -M
+ CHECKMODULE += -M
+endif
+
+# enable MLS if MCS requested.
+ifneq ($(findstring -mcs,$(TYPE)),)
+ override M4PARAM += -D enable_mcs
+ CHECKPOLICY += -M
+ CHECKMODULE += -M
+endif
+
+# compile targeted policy if requested.
+ifneq ($(findstring targeted,$(TYPE)),)
+ override M4PARAM += -D targeted_policy
+endif
+
+# enable distribution-specific policy
+ifneq ($(DISTRO),)
+ override M4PARAM += -D distro_$(DISTRO)
+endif
+
+ifneq ($(OUTPUT_POLICY),)
+ CHECKPOLICY += -c $(OUTPUT_POLICY)
+endif
+
+ifeq ($(NAME),)
+ NAME := $(TYPE)
+endif
+
+# determine the policy version and current kernel version if possible
+PV := $(shell $(CHECKPOLICY) -V |cut -f 1 -d ' ')
+KV := $(shell cat /selinux/policyvers)
+
+# dont print version warnings if we are unable to determine
+# the currently running kernel's policy version
+ifeq ($(KV),)
+ KV := $(PV)
+endif
+
+FC := file_contexts
+POLVER := policy.$(PV)
+
+M4SUPPORT = $(wildcard $(POLDIR)/support/*.spt)
+
+APPCONF := config/appconfig-$(TYPE)
+APPDIR := $(CONTEXTPATH)
+APPFILES := $(INSTALLDIR)/booleans
+CONTEXTFILES += $(wildcard $(APPCONF)/*_context*) $(APPCONF)/media
+USER_FILES := $(POLDIR)/systemuser $(POLDIR)/users
+
+ALL_LAYERS := $(filter-out $(MODDIR)/CVS,$(shell find $(wildcard $(MODDIR)/*)
-maxdepth 0 -type d))
+
+GENERATED_TE := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard
$(dir)/*.te.in)))
+GENERATED_IF := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard
$(dir)/*.if.in)))
+GENERATED_FC := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard
$(dir)/*.fc.in)))
+
+# sort here since it removes duplicates, which can happen
+# when a generated file is already generated
+DETECTED_MODS := $(sort $(foreach dir,$(ALL_LAYERS),$(wildcard $(dir)/*.te))
$(GENERATED_TE))
+
+# modules.conf setting for base module
+MODBASE := base
+
+# modules.conf setting for module
+MODMOD := module
+
+# extract settings from modules.conf
+BASE_MODS := $(foreach mod,$(shell awk '/^[[:blank:]]*[[:alpha:]]/{ if ($$3 ==
"$(MODBASE)") print $$1 }' $(MOD_CONF) 2> /dev/null),$(subst ./,,$(shell find
-iname $(mod).te)))
+MOD_MODS := $(foreach mod,$(shell awk '/^[[:blank:]]*[[:alpha:]]/{ if ($$3 ==
"$(MODMOD)") print $$1 }' $(MOD_CONF) 2> /dev/null),$(subst ./,,$(shell find
-iname $(mod).te)))
+
+HOMEDIR_TEMPLATE = tmp/homedir_template
+
+########################################
+#
+# Load appropriate rules
+#
+
+ifeq ($(MONOLITHIC),y)
+ include Rules.monolithic
+else
+ include Rules.modular
+endif
+
+########################################
+#
+# Create config files
+#
+conf: $(MOD_CONF) $(BOOLEANS) $(GENERATED_TE) $(GENERATED_IF) $(GENERATED_FC)
+
+$(MOD_CONF) $(BOOLEANS): $(POLXML)
+ @echo "Updating $(MOD_CONF) and $(BOOLEANS)"
+ $(QUIET) cd $(DOCS) && ../$(GENDOC) -t ../$(BOOLEANS) -m ../$(MOD_CONF)
-x ../$(POLXML)
+
+########################################
+#
+# Appconfig files
+#
+install-appconfig: $(APPFILES)
+
+$(INSTALLDIR)/booleans: $(BOOLEANS)
+ @mkdir -p $(INSTALLDIR)
+ $(QUIET) egrep '^[[:blank:]]*[[:alpha:]]' $(BOOLEANS) \
+ | sed -e 's/false/0/g' -e 's/true/1/g' > tmp/booleans
+ $(QUIET) install -m 644 tmp/booleans $@
+
+########################################
+#
+# Install policy sources
+#
+install-src:
+ rm -rf $(SRCPATH)/policy.old
+ -mv $(SRCPATH)/policy $(SRCPATH)/policy.old
+ mkdir -p $(SRCPATH)/policy
+ cp -R . $(SRCPATH)/policy
+
+########################################
+#
+# Clean everything
+#
+bare: clean
+ rm -f $(POLXML)
+ rm -f $(SUPPORT)/*.pyc
+ rm -f $(FCSORT)
+ rm -f $(MOD_CONF)
+ rm -f $(BOOLEANS)
+ rm -fR $(HTMLDIR)
+ifneq ($(GENERATED_TE),)
+ rm -f $(GENERATED_TE)
+endif
+ifneq ($(GENERATED_IF),)
+ rm -f $(GENERATED_IF)
+endif
+ifneq ($(GENERATED_FC),)
+ rm -f $(GENERATED_FC)
+endif
+
+.PHONY: install-src install-appconfig conf html bare
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/Rules.modular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/Rules.modular Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,166 @@
+########################################
+#
+# Rules and Targets for building modular policies
+#
+
+ALL_MODULES := $(filter $(BASE_MODS) $(MOD_MODS),$(DETECTED_MODS))
+ALL_INTERFACES := $(ALL_MODULES:.te=.if)
+
+BASE_PKG := base.pp
+BASE_FC := base.fc
+
+BASE_SECTIONS := tmp/pre_te_files.conf tmp/generated_definitions.conf
tmp/all_interfaces.conf tmp/all_attrs_types.conf $(GLOBALBOOL) $(GLOBALTUN)
tmp/only_te_rules.conf tmp/all_post.conf
+
+BASE_PRE_TE_FILES := $(SECCLASS) $(ISIDS) $(AVS) $(M4SUPPORT) $(POLDIR)/mls
$(POLDIR)/mcs
+BASE_TE_FILES := $(BASE_MODS)
+BASE_POST_TE_FILES := $(POLDIR)/systemuser $(POLDIR)/constraints
+BASE_FC_FILES := $(BASE_MODS:.te=.fc)
+
+MOD_MODULES := $(MOD_MODS:.te=.mod)
+MOD_PKGS := $(notdir $(MOD_MODS:.te=.pp))
+
+# search layer dirs for source files
+vpath %.te $(ALL_LAYERS)
+vpath %.if $(ALL_LAYERS)
+vpath %.fc $(ALL_LAYERS)
+
+########################################
+#
+# default action: create all module packages
+#
+default: base
+
+base: $(BASE_PKG)
+
+modules: $(MOD_PKGS)
+
+#policy: $(POLVER)
+#install: $(LOADPATH) $(FCPATH) $(APPFILES) $(USERPATH)/local.users
+#load: tmp/load
+
+########################################
+#
+# Create a base module package
+#
+$(BASE_PKG): tmp/base.mod $(BASE_FC)
+ @echo "Creating $(NAME) base module package"
+ $(QUIET) $(SEMOD_PKG) $@ $^
+
+########################################
+#
+# Compile a base module
+#
+tmp/base.mod: base.conf
+ @echo "Compiling $(NAME) base module"
+ $(QUIET) $(CHECKMODULE) $^ -o $@
+
+########################################
+#
+# Construct a base module policy.conf
+#
+base.conf: $(BASE_SECTIONS)
+ @echo "Creating $(NAME) base module policy.conf"
+# checkpolicy can use the #line directives provided by -s for error reporting:
+ $(QUIET) m4 -D self_contained_policy $(M4PARAM) -s $^ > tmp/$@.tmp
+ $(QUIET) sed -e /^portcon/d -e /^nodecon/d -e /^netifcon/d < tmp/$@.tmp
> $@
+# the ordering of these ocontexts matters:
+ $(QUIET) grep ^portcon tmp/$@.tmp >> $@ || true
+ $(QUIET) grep ^netifcon tmp/$@.tmp >> $@ || true
+ $(QUIET) grep ^nodecon tmp/$@.tmp >> $@ || true
+
+tmp/pre_te_files.conf: $(BASE_PRE_TE_FILES)
+ @test -d tmp || mkdir -p tmp
+ $(QUIET) cat $^ > $@
+
+tmp/generated_definitions.conf: $(ALL_LAYERS) $(BASE_TE_FILES)
+ @test -d tmp || mkdir -p tmp
+# define all available object classes
+ $(QUIET) $(GENPERM) $(AVS) $(SECCLASS) > $@
+# per-userdomain templates
+ $(QUIET) echo "define(\`per_userdomain_templates',\`" >> $@
+ $(QUIET) for i in $(patsubst %.te,%,$(notdir $(ALL_MODULES))); do \
+ echo
"ifdef(\`""$$i""_per_userdomain_template',\`""$$i""_per_userdomain_template("'$$*'")')"
\
+ >> $@ ;\
+ done
+ $(QUIET) echo "')" >> $@
+# define foo.te
+ $(QUIET) for i in $(notdir $(BASE_TE_FILES)); do \
+ echo "define(\`$$i')" >> $@ ;\
+ done
+ $(QUIET) $(SETTUN) $(BOOLEANS) >> $@
+
+tmp/all_interfaces.conf: $(M4SUPPORT) $(ALL_INTERFACES)
+ifeq ($(ALL_INTERFACES),)
+ $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be
generated by using "make conf")
+endif
+ @test -d tmp || mkdir -p tmp
+ $(QUIET) m4 $^ | sed -e s/dollarsstar/\$$\*/g > $@
+
+tmp/all_te_files.conf: $(BASE_TE_FILES)
+ifeq ($(BASE_TE_FILES),)
+ $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be
generated by using "make conf")
+endif
+ @test -d tmp || mkdir -p tmp
+ $(QUIET) cat $^ > $@
+
+tmp/post_te_files.conf: $(BASE_POST_TE_FILES)
+ @test -d tmp || mkdir -p tmp
+ $(QUIET) cat $^ > $@
+
+# extract attributes and put them first. extract post te stuff
+# like genfscon and put last. portcon, nodecon, and netifcon
+# is delayed since they are generated by m4
+tmp/all_attrs_types.conf tmp/only_te_rules.conf tmp/all_post.conf:
tmp/all_te_files.conf tmp/post_te_files.conf
+ $(QUIET) grep ^attribute tmp/all_te_files.conf >
tmp/all_attrs_types.conf || true
+ $(QUIET) grep '^type ' tmp/all_te_files.conf >> tmp/all_attrs_types.conf
+ $(QUIET) cat tmp/post_te_files.conf > tmp/all_post.conf
+ $(QUIET) grep '^sid ' tmp/all_te_files.conf >> tmp/all_post.conf || true
+ $(QUIET) egrep '^fs_use_(xattr|task|trans)' tmp/all_te_files.conf >>
tmp/all_post.conf || true
+ $(QUIET) grep ^genfscon tmp/all_te_files.conf >> tmp/all_post.conf ||
true
+ $(QUIET) sed -r -e /^attribute/d -e '/^type /d' -e /^genfscon/d \
+ -e '/^sid /d' -e '/^fs_use_(xattr|task|trans)/d' \
+ < tmp/all_te_files.conf > tmp/only_te_rules.conf
+
+########################################
+#
+# Construct base module file contexts
+#
+$(BASE_FC): $(M4SUPPORT) tmp/generated_definitions.conf $(BASE_FC_FILES)
$(FCSORT)
+ifeq ($(BASE_FC_FILES),)
+ $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be
generated by using "make conf")
+endif
+ @echo "Creating $(NAME) base module file contexts."
+ @test -d tmp || mkdir -p tmp
+ $(QUIET) m4 $(M4PARAM) $(M4SUPPORT) tmp/generated_definitions.conf
$(BASE_FC_FILES) > tmp/$@.tmp
+ $(QUIET) grep -e HOME -e ROLE tmp/$@.tmp > $(HOMEDIR_TEMPLATE)
+ $(QUIET) sed -i -e /HOME/d -e /ROLE/d tmp/$@.tmp
+ $(QUIET) $(FCSORT) tmp/$@.tmp $@
+
+########################################
+#
+# Build module packages
+#
+tmp/%.mod: $(M4SUPPORT) tmp/generated_definitions.conf tmp/all_interfaces.conf
%.te
+ @if test -z "$(filter $^,$(MOD_MODS))"; then \
+ echo "The $(notdir $(basename $@)) module is not configured to
be compiled as a lodable module." ;\
+ false ;\
+ fi
+ @echo "Compliling $(NAME) $(@F) module"
+ $(QUIET) m4 $(M4PARAM) -s $^ > $(@:.mod=.tmp)
+ $(QUIET) $(CHECKMODULE) -m $(@:.mod=.tmp) -o $@
+
+%.pp: tmp/%.mod %.fc
+ @echo "Creating $(NAME) $(@F) policy package"
+ $(QUIET) $(SEMOD_PKG) $@ $^
+
+########################################
+#
+# Clean the sources
+#
+clean:
+ rm -fR tmp
+ rm -f base.conf
+ rm -f *.pp
+ rm -f $(BASE_FC)
+
+.PHONY: default base modules clean
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/Rules.monolithic
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/Rules.monolithic Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,196 @@
+########################################
+#
+# Rules and Targets for building monolithic policies
+#
+
+# install paths
+POLICYPATH = $(INSTALLDIR)/policy
+LOADPATH = $(POLICYPATH)/$(POLVER)
+FCPATH = $(CONTEXTPATH)/files/file_contexts
+HOMEDIRPATH = $(CONTEXTPATH)/files/homedir_template
+
+# for monolithic policy use all base and module to create policy
+ENABLEMOD := $(BASE_MODS) $(MOD_MODS)
+
+ALL_MODULES := $(filter $(ENABLEMOD),$(DETECTED_MODS))
+
+ALL_INTERFACES := $(ALL_MODULES:.te=.if)
+ALL_TE_FILES := $(ALL_MODULES)
+ALL_FC_FILES := $(ALL_MODULES:.te=.fc)
+
+PRE_TE_FILES := $(SECCLASS) $(ISIDS) $(AVS) $(M4SUPPORT) $(POLDIR)/mls
$(POLDIR)/mcs
+POST_TE_FILES := $(POLDIR)/systemuser $(POLDIR)/users $(POLDIR)/constraints
+
+POLICY_SECTIONS := tmp/pre_te_files.conf tmp/generated_definitions.conf
tmp/all_interfaces.conf tmp/all_attrs_types.conf $(GLOBALBOOL) $(GLOBALTUN)
tmp/only_te_rules.conf tmp/all_post.conf
+
+########################################
+#
+# default action: build policy locally
+#
+default: policy
+
+policy: $(POLVER)
+
+install: $(LOADPATH) $(FCPATH) $(APPFILES) $(USERPATH)/local.users
+
+load: tmp/load
+
+########################################
+#
+# Build a binary policy locally
+#
+$(POLVER): policy.conf
+ @echo "Compiling $(NAME) $(POLVER)"
+ifneq ($(PV),$(KV))
+ @echo
+ @echo "WARNING: Policy version mismatch! Is your OUTPUT_POLICY set
correctly?"
+ @echo
+endif
+ $(QUIET) $(CHECKPOLICY) $^ -o $@
+
+########################################
+#
+# Install a binary policy
+#
+$(LOADPATH): policy.conf
+ @mkdir -p $(POLICYPATH)
+ @echo "Compiling and installing $(NAME) $(LOADPATH)"
+ifneq ($(PV),$(KV))
+ @echo
+ @echo "WARNING: Policy version mismatch! Is your OUTPUT_POLICY set
correctly?"
+ @echo
+endif
+ $(QUIET) $(CHECKPOLICY) $^ -o $@
+
+########################################
+#
+# Load the binary policy
+#
+reload tmp/load: $(LOADPATH) $(FCPATH)
+ @echo "Loading $(NAME) $(LOADPATH)"
+ $(QUIET) $(LOADPOLICY) -q $(LOADPATH)
+ @touch tmp/load
+
+########################################
+#
+# Construct a monolithic policy.conf
+#
+policy.conf: $(POLICY_SECTIONS)
+ @echo "Creating $(NAME) policy.conf"
+# checkpolicy can use the #line directives provided by -s for error reporting:
+ $(QUIET) m4 -D self_contained_policy $(M4PARAM) -s $^ > tmp/$@.tmp
+ $(QUIET) sed -e /^portcon/d -e /^nodecon/d -e /^netifcon/d < tmp/$@.tmp
> $@
+
+tmp/pre_te_files.conf: $(PRE_TE_FILES)
+ @test -d tmp || mkdir -p tmp
+ $(QUIET) cat $^ > $@
+
+tmp/generated_definitions.conf: $(ALL_LAYERS) $(ALL_TE_FILES)
+# per-userdomain templates:
+ @test -d tmp || mkdir -p tmp
+ $(QUIET) echo "define(\`per_userdomain_templates',\`" > $@
+ $(QUIET) for i in $(patsubst %.te,%,$(notdir $(ALL_MODULES))); do \
+ echo
"ifdef(\`""$$i""_per_userdomain_template',\`""$$i""_per_userdomain_template("'$$*'")')"
\
+ >> $@ ;\
+ done
+ $(QUIET) echo "')" >> $@
+# define foo.te
+ $(QUIET) for i in $(notdir $(ALL_MODULES)); do \
+ echo "define(\`$$i')" >> $@ ;\
+ done
+# $(QUIET) $(SETTUN) $(BOOLEANS) >> $@
+
+tmp/all_interfaces.conf: $(M4SUPPORT) $(ALL_INTERFACES)
+ifeq ($(ALL_INTERFACES),)
+ $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be
generated by using "make conf")
+endif
+ @test -d tmp || mkdir -p tmp
+ $(QUIET) m4 $^ | sed -e s/dollarsstar/\$$\*/g > $@
+
+tmp/all_te_files.conf: $(ALL_TE_FILES)
+ifeq ($(ALL_TE_FILES),)
+ $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be
generated by using "make conf")
+endif
+ @test -d tmp || mkdir -p tmp
+ $(QUIET) cat $^ > $@
+
+tmp/post_te_files.conf: $(POST_TE_FILES)
+ @test -d tmp || mkdir -p tmp
+ $(QUIET) cat $^ > $@
+
+# extract attributes and put them first. extract post te stuff
+# like genfscon and put last. portcon, nodecon, and netifcon
+# is delayed since they are generated by m4
+tmp/all_attrs_types.conf tmp/only_te_rules.conf tmp/all_post.conf:
tmp/all_te_files.conf tmp/post_te_files.conf
+ $(QUIET) grep ^attribute tmp/all_te_files.conf >
tmp/all_attrs_types.conf || true
+ $(QUIET) grep '^type ' tmp/all_te_files.conf >> tmp/all_attrs_types.conf
+ $(QUIET) cat tmp/post_te_files.conf > tmp/all_post.conf
+ $(QUIET) grep '^sid ' tmp/all_te_files.conf >> tmp/all_post.conf || true
+ $(QUIET) egrep '^fs_use_(xattr|task|trans)' tmp/all_te_files.conf >>
tmp/all_post.conf || true
+ $(QUIET) grep ^genfscon tmp/all_te_files.conf >> tmp/all_post.conf ||
true
+ $(QUIET) sed -r -e /^attribute/d -e '/^type /d' -e /^genfscon/d \
+ -e '/^sid /d' -e '/^fs_use_(xattr|task|trans)/d' \
+ < tmp/all_te_files.conf > tmp/only_te_rules.conf
+
+########################################
+#
+# Remove the dontaudit rules from the policy.conf
+#
+enableaudit: policy.conf
+ @test -d tmp || mkdir -p tmp
+ @echo "Removing dontaudit rules from policy.conf"
+ $(QUIET) grep -v dontaudit policy.conf > tmp/policy.audit
+ $(QUIET) mv tmp/policy.audit policy.conf
+
+########################################
+#
+# Construct file_contexts
+#
+$(FC): $(M4SUPPORT) tmp/generated_definitions.conf $(ALL_FC_FILES)
+ifeq ($(ALL_FC_FILES),)
+ $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be
generated by using "make conf")
+endif
+ @echo "Creating $(NAME) file_contexts."
+ @test -d tmp || mkdir -p tmp
+ $(QUIET) m4 $(M4PARAM) $(M4SUPPORT) tmp/generated_definitions.conf
$(ALL_FC_FILES) > tmp/$@.tmp
+# $(QUIET) grep -e HOME -e ROLE tmp/$@.tmp > $(HOMEDIR_TEMPLATE)
+# $(QUIET) sed -i -e /HOME/d -e /ROLE/d tmp/$@.tmp
+# $(QUIET) $(FCSORT) tmp/$@.tmp $@
+ $(QUIET) touch $(HOMEDIR_TEMPLATE)
+ $(QUIET) touch $@
+
+########################################
+#
+# Install file_contexts
+#
+$(FCPATH): $(FC) $(LOADPATH) $(USERPATH)/system.users
+ @echo "Validating $(NAME) file_contexts."
+# $(QUIET) $(SETFILES) -q -c $(LOADPATH) $(FC)
+ @echo "Installing file_contexts."
+ @mkdir -p $(CONTEXTPATH)/files
+ $(QUIET) install -m 644 $(FC) $(FCPATH)
+ $(QUIET) install -m 644 $(HOMEDIR_TEMPLATE) $(HOMEDIRPATH)
+# $(QUIET) $(GENHOMEDIRCON) -d $(TOPDIR) -t $(NAME) $(USEPWD)
+
+########################################
+#
+# Run policy source checks
+#
+check: policy.conf $(FC)
+ $(SECHECK) -s --profile=development --policy=policy.conf --fcfile=$(FC)
> $@.res
+
+longcheck: policy.conf $(FC)
+ $(SECHECK) -s --profile=all --policy=policy.conf --fcfile=$(FC) > $@.res
+
+########################################
+#
+# Clean the sources
+#
+clean:
+ rm -fR tmp
+ rm -f policy.conf
+ rm -f policy.$(PV)
+ rm -f $(FC)
+ rm -f *.res
+
+.PHONY: default policy install load reload enableaudit checklabels
restorelabels relabel check longcheck clean
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/constraints
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/constraints Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,27 @@
+
+#
+# Define the constraints
+#
+# constrain class_set perm_set expression ;
+#
+# expression : ( expression )
+# | not expression
+# | expression and expression
+# | expression or expression
+# | u1 op u2
+# | r1 role_op r2
+# | t1 op t2
+# | u1 op names
+# | u2 op names
+# | r1 op names
+# | r2 op names
+# | t1 op names
+# | t2 op names
+#
+# op : == | !=
+# role_op : == | != | eq | dom | domby | incomp
+#
+# names : name | { name_list }
+# name_list : name | name_list name
+#
+
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/flask/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/Makefile Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,41 @@
+# flask needs to know where to export the libselinux headers.
+LIBSEL ?= ../../libselinux
+
+# flask needs to know where to export the kernel headers.
+LINUXDIR ?= ../../../linux-2.6
+
+AWK = awk
+
+CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
+ else if [ -x /bin/bash ]; then echo /bin/bash; \
+ else echo sh; fi ; fi)
+
+FLASK_H_DEPEND = security_classes initial_sids
+AV_H_DEPEND = access_vectors
+
+FLASK_H_FILES = class_to_string.h flask.h initial_sid_to_string.h
+AV_H_FILES = av_inherit.h common_perm_to_string.h av_perm_to_string.h
av_permissions.h
+ALL_H_FILES = $(FLASK_H_FILES) $(AV_H_FILES)
+
+all: $(ALL_H_FILES)
+
+$(FLASK_H_FILES): $(FLASK_H_DEPEND)
+ $(CONFIG_SHELL) mkflask.sh $(AWK) $(FLASK_H_DEPEND)
+
+$(AV_H_FILES): $(AV_H_DEPEND)
+ $(CONFIG_SHELL) mkaccess_vector.sh $(AWK) $(AV_H_DEPEND)
+
+tolib: all
+ install -m 644 flask.h av_permissions.h $(LIBSEL)/include/selinux
+ install -m 644 class_to_string.h av_inherit.h common_perm_to_string.h
av_perm_to_string.h $(LIBSEL)/src
+
+tokern: all
+ install -m 644 $(ALL_H_FILES) $(LINUXDIR)/security/selinux/include
+
+install: all
+
+relabel:
+
+clean:
+ rm -f $(FLASK_H_FILES)
+ rm -f $(AV_H_FILES)
diff -r 4ddd63b4be9b -r ec8eaab557d8
tools/flask/policy/policy/flask/access_vectors
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/access_vectors Fri Sep 12 14:47:40
2008 +0900
@@ -0,0 +1,166 @@
+#
+# Define common prefixes for access vectors
+#
+# common common_name { permission_name ... }
+
+#
+# Define a common prefix for file access vectors.
+#
+
+
+#
+# Define the access vectors.
+#
+# class class_name [ inherits common_name ] { permission_name ... }
+
+
+#
+# Define the access vector interpretation for file-related objects.
+#
+
+class xen
+{
+ scheduler
+ settime
+ tbufcontrol
+ readconsole
+ clearconsole
+ perfcontrol
+ mtrr_add
+ mtrr_del
+ mtrr_read
+ microcode
+ physinfo
+ quirk
+ writeconsole
+ readapic
+ writeapic
+ privprofile
+ nonprivprofile
+ kexec
+ firmware
+ sleep
+ frequency
+ getidle
+ debug
+ getcpuinfo
+ heap
+}
+
+class domain
+{
+ setvcpucontext
+ pause
+ unpause
+ resume
+ create
+ transition
+ max_vcpus
+ destroy
+ setvcpuaffinity
+ getvcpuaffinity
+ scheduler
+ getdomaininfo
+ getvcpuinfo
+ getvcpucontext
+ setdomainmaxmem
+ setdomainhandle
+ setdebugging
+ hypercall
+ settime
+ set_target
+ shutdown
+ setaddrsize
+ getaddrsize
+ trigger
+ getextvcpucontext
+ setextvcpucontext
+}
+
+class hvm
+{
+ sethvmc
+ gethvmc
+ setparam
+ getparam
+ pcilevel
+ irqlevel
+ pciroute
+ bind_irq
+ cacheattr
+}
+
+class event
+{
+ bind
+ send
+ status
+ notify
+ create
+ vector
+ reset
+}
+
+class grant
+{
+ map_read
+ map_write
+ unmap
+ transfer
+ setup
+ copy
+ query
+}
+
+class mmu
+{
+ map_read
+ map_write
+ pageinfo
+ pagelist
+ adjust
+ stat
+ translategp
+ updatemp
+ physmap
+ pinpage
+ mfnlist
+ memorymap
+}
+
+class shadow
+{
+ disable
+ enable
+ logdirty
+}
+
+class resource
+{
+ add
+ remove
+ use
+ add_irq
+ remove_irq
+ add_ioport
+ remove_ioport
+ add_iomem
+ remove_iomem
+ stat_device
+ add_device
+ remove_device
+}
+
+class security
+{
+ compute_av
+ compute_create
+ compute_member
+ check_context
+ load_policy
+ compute_relabel
+ compute_user
+ setenforce
+ setbool
+ setsecparam
+}
diff -r 4ddd63b4be9b -r ec8eaab557d8
tools/flask/policy/policy/flask/initial_sids
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/initial_sids Fri Sep 12 14:47:40
2008 +0900
@@ -0,0 +1,17 @@
+# FLASK
+
+#
+# Define initial security identifiers
+#
+sid xen
+sid dom0
+sid domU
+sid domio
+sid domxen
+sid unlabeled
+sid security
+sid ioport
+sid iomem
+sid pirq
+sid device
+# FLASK
diff -r 4ddd63b4be9b -r ec8eaab557d8
tools/flask/policy/policy/flask/mkaccess_vector.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/mkaccess_vector.sh Fri Sep 12
14:47:40 2008 +0900
@@ -0,0 +1,227 @@
+#!/bin/sh -
+#
+
+# FLASK
+
+set -e
+
+awk=$1
+shift
+
+# output files
+av_permissions="av_permissions.h"
+av_inherit="av_inherit.h"
+common_perm_to_string="common_perm_to_string.h"
+av_perm_to_string="av_perm_to_string.h"
+
+cat $* | $awk "
+BEGIN {
+ outfile = \"$av_permissions\"
+ inheritfile = \"$av_inherit\"
+ cpermfile = \"$common_perm_to_string\"
+ avpermfile = \"$av_perm_to_string\"
+ "'
+ nextstate = "COMMON_OR_AV";
+ printf("/* This file is automatically generated. Do not edit.
*/\n") > outfile;
+ printf("/* This file is automatically generated. Do not edit.
*/\n") > inheritfile;
+ printf("/* This file is automatically generated. Do not edit.
*/\n") > cpermfile;
+ printf("/* This file is automatically generated. Do not edit.
*/\n") > avpermfile;
+;
+ }
+/^[ \t]*#/ {
+ next;
+ }
+$1 == "common" {
+ if (nextstate != "COMMON_OR_AV")
+ {
+ printf("Parse error: Unexpected COMMON
definition on line %d\n", NR);
+ next;
+ }
+
+ if ($2 in common_defined)
+ {
+ printf("Duplicate COMMON definition for %s on
line %d.\n", $2, NR);
+ next;
+ }
+ common_defined[$2] = 1;
+
+ tclass = $2;
+ common_name = $2;
+ permission = 1;
+
+ printf("TB_(common_%s_perm_to_string)\n", $2) >
cpermfile;
+
+ nextstate = "COMMON-OPENBRACKET";
+ next;
+ }
+$1 == "class" {
+ if (nextstate != "COMMON_OR_AV" &&
+ nextstate != "CLASS_OR_CLASS-OPENBRACKET")
+ {
+ printf("Parse error: Unexpected class
definition on line %d\n", NR);
+ next;
+ }
+
+ tclass = $2;
+
+ if (tclass in av_defined)
+ {
+ printf("Duplicate access vector definition for
%s on line %d\n", tclass, NR);
+ next;
+ }
+ av_defined[tclass] = 1;
+
+ inherits = "";
+ permission = 1;
+
+ nextstate = "INHERITS_OR_CLASS-OPENBRACKET";
+ next;
+ }
+$1 == "inherits" {
+ if (nextstate != "INHERITS_OR_CLASS-OPENBRACKET")
+ {
+ printf("Parse error: Unexpected INHERITS
definition on line %d\n", NR);
+ next;
+ }
+
+ if (!($2 in common_defined))
+ {
+ printf("COMMON %s is not defined (line %d).\n",
$2, NR);
+ next;
+ }
+
+ inherits = $2;
+ permission = common_base[$2];
+
+ for (combined in common_perms)
+ {
+ split(combined,separate, SUBSEP);
+ if (separate[1] == inherits)
+ {
+ inherited_perms[common_perms[combined]]
= separate[2];
+ }
+ }
+
+ j = 1;
+ for (i in inherited_perms) {
+ ind[j] = i + 0;
+ j++;
+ }
+ n = asort(ind);
+ for (i = 1; i <= n; i++) {
+ perm = inherited_perms[ind[i]];
+ printf("#define %s__%s", toupper(tclass),
toupper(perm)) > outfile;
+ spaces = 40 - (length(perm) + length(tclass));
+ if (spaces < 1)
+ spaces = 1;
+ for (j = 0; j < spaces; j++)
+ printf(" ") > outfile;
+ printf("0x%08xUL\n", ind[i]) > outfile;
+ }
+ printf("\n") > outfile;
+ for (i in ind) delete ind[i];
+ for (i in inherited_perms) delete inherited_perms[i];
+
+ printf(" S_(SECCLASS_%s, %s, 0x%08xUL)\n",
toupper(tclass), inherits, permission) > inheritfile;
+
+ nextstate = "CLASS_OR_CLASS-OPENBRACKET";
+ next;
+ }
+$1 == "{" {
+ if (nextstate != "INHERITS_OR_CLASS-OPENBRACKET" &&
+ nextstate != "CLASS_OR_CLASS-OPENBRACKET" &&
+ nextstate != "COMMON-OPENBRACKET")
+ {
+ printf("Parse error: Unexpected { on line
%d\n", NR);
+ next;
+ }
+
+ if (nextstate == "INHERITS_OR_CLASS-OPENBRACKET")
+ nextstate = "CLASS-CLOSEBRACKET";
+
+ if (nextstate == "CLASS_OR_CLASS-OPENBRACKET")
+ nextstate = "CLASS-CLOSEBRACKET";
+
+ if (nextstate == "COMMON-OPENBRACKET")
+ nextstate = "COMMON-CLOSEBRACKET";
+ }
+/[a-z][a-z_]*/ {
+ if (nextstate != "COMMON-CLOSEBRACKET" &&
+ nextstate != "CLASS-CLOSEBRACKET")
+ {
+ printf("Parse error: Unexpected symbol %s on
line %d\n", $1, NR);
+ next;
+ }
+
+ if (nextstate == "COMMON-CLOSEBRACKET")
+ {
+ if ((common_name,$1) in common_perms)
+ {
+ printf("Duplicate permission %s for
common %s on line %d.\n", $1, common_name, NR);
+ next;
+ }
+
+ common_perms[common_name,$1] = permission;
+
+ printf("#define COMMON_%s__%s",
toupper(common_name), toupper($1)) > outfile;
+
+ printf(" S_(\"%s\")\n", $1) > cpermfile;
+ }
+ else
+ {
+ if ((tclass,$1) in av_perms)
+ {
+ printf("Duplicate permission %s for %s
on line %d.\n", $1, tclass, NR);
+ next;
+ }
+
+ av_perms[tclass,$1] = permission;
+
+ if (inherits != "")
+ {
+ if ((inherits,$1) in common_perms)
+ {
+ printf("Permission %s in %s on
line %d conflicts with common permission.\n", $1, tclass, inherits, NR);
+ next;
+ }
+ }
+
+ printf("#define %s__%s", toupper(tclass),
toupper($1)) > outfile;
+
+ printf(" S_(SECCLASS_%s, %s__%s, \"%s\")\n",
toupper(tclass), toupper(tclass), toupper($1), $1) > avpermfile;
+ }
+
+ spaces = 40 - (length($1) + length(tclass));
+ if (spaces < 1)
+ spaces = 1;
+
+ for (i = 0; i < spaces; i++)
+ printf(" ") > outfile;
+ printf("0x%08xUL\n", permission) > outfile;
+ permission = permission * 2;
+ }
+$1 == "}" {
+ if (nextstate != "CLASS-CLOSEBRACKET" &&
+ nextstate != "COMMON-CLOSEBRACKET")
+ {
+ printf("Parse error: Unexpected } on line
%d\n", NR);
+ next;
+ }
+
+ if (nextstate == "COMMON-CLOSEBRACKET")
+ {
+ common_base[common_name] = permission;
+ printf("TE_(common_%s_perm_to_string)\n\n",
common_name) > cpermfile;
+ }
+
+ printf("\n") > outfile;
+
+ nextstate = "COMMON_OR_AV";
+ }
+END {
+ if (nextstate != "COMMON_OR_AV" && nextstate !=
"CLASS_OR_CLASS-OPENBRACKET")
+ printf("Parse error: Unexpected end of file\n");
+
+ }'
+
+# FLASK
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/flask/mkflask.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/mkflask.sh Fri Sep 12 14:47:40
2008 +0900
@@ -0,0 +1,95 @@
+#!/bin/sh -
+#
+
+# FLASK
+
+set -e
+
+awk=$1
+shift 1
+
+# output file
+output_file="flask.h"
+debug_file="class_to_string.h"
+debug_file2="initial_sid_to_string.h"
+
+cat $* | $awk "
+BEGIN {
+ outfile = \"$output_file\"
+ debugfile = \"$debug_file\"
+ debugfile2 = \"$debug_file2\"
+ "'
+ nextstate = "CLASS";
+
+ printf("/* This file is automatically generated. Do not edit.
*/\n") > outfile;
+
+ printf("#ifndef _SELINUX_FLASK_H_\n") > outfile;
+ printf("#define _SELINUX_FLASK_H_\n") > outfile;
+ printf("\n/*\n * Security object class definitions\n */\n") >
outfile;
+ printf("/* This file is automatically generated. Do not edit.
*/\n") > debugfile;
+ printf("/*\n * Security object class definitions\n */\n") >
debugfile;
+ printf(" S_(\"null\")\n") > debugfile;
+ printf("/* This file is automatically generated. Do not edit.
*/\n") > debugfile2;
+ printf("static char *initial_sid_to_string[] =\n{\n") >
debugfile2;
+ printf(" \"null\",\n") > debugfile2;
+ }
+/^[ \t]*#/ {
+ next;
+ }
+$1 == "class" {
+ if (nextstate != "CLASS")
+ {
+ printf("Parse error: Unexpected class
definition on line %d\n", NR);
+ next;
+ }
+
+ if ($2 in class_found)
+ {
+ printf("Duplicate class definition for %s on
line %d.\n", $2, NR);
+ next;
+ }
+ class_found[$2] = 1;
+
+ class_value++;
+
+ printf("#define SECCLASS_%s", toupper($2)) > outfile;
+ for (i = 0; i < 40 - length($2); i++)
+ printf(" ") > outfile;
+ printf("%d\n", class_value) > outfile;
+
+ printf(" S_(\"%s\")\n", $2) > debugfile;
+ }
+$1 == "sid" {
+ if (nextstate == "CLASS")
+ {
+ nextstate = "SID";
+ printf("\n/*\n * Security identifier indices for
initial entities\n */\n") > outfile;
+ }
+
+ if ($2 in sid_found)
+ {
+ printf("Duplicate SID definition for %s on line
%d.\n", $2, NR);
+ next;
+ }
+ sid_found[$2] = 1;
+ sid_value++;
+
+ printf("#define SECINITSID_%s", toupper($2)) > outfile;
+ for (i = 0; i < 37 - length($2); i++)
+ printf(" ") > outfile;
+ printf("%d\n", sid_value) > outfile;
+ printf(" \"%s\",\n", $2) > debugfile2;
+ }
+END {
+ if (nextstate != "SID")
+ printf("Parse error: Unexpected end of file\n");
+
+ printf("\n#define SECINITSID_NUM") > outfile;
+ for (i = 0; i < 34; i++)
+ printf(" ") > outfile;
+ printf("%d\n", sid_value) > outfile;
+ printf("\n#endif\n") > outfile;
+ printf("};\n\n") > debugfile2;
+ }'
+
+# FLASK
diff -r 4ddd63b4be9b -r ec8eaab557d8
tools/flask/policy/policy/flask/security_classes
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/security_classes Fri Sep 12 14:47:40
2008 +0900
@@ -0,0 +1,20 @@
+# FLASK
+
+#
+# Define the security object classes
+#
+
+# Classes marked as userspace are classes
+# for userspace object managers
+
+class xen
+class domain
+class hvm
+class mmu
+class resource
+class shadow
+class event
+class grant
+class security
+
+# FLASK
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/global_booleans
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/global_booleans Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,5 @@
+#
+# This file is for the declaration of global booleans.
+# To change the default value at build time, the booleans.conf
+# file should be used.
+#
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/global_tunables
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/global_tunables Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,6 @@
+#
+# This file is for the declaration of global tunables.
+# To change the default value at build time, the booleans.conf
+# file should be used.
+#
+
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/mcs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/mcs Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,324 @@
+ifdef(`enable_mcs',`
+#
+# Define sensitivities
+#
+# Each sensitivity has a name and zero or more aliases.
+#
+# MCS is single-sensitivity.
+#
+sensitivity s0;
+
+#
+# Define the ordering of the sensitivity levels (least to greatest)
+#
+dominance { s0 }
+
+
+#
+# Define the categories
+#
+# Each category has a name and zero or more aliases.
+#
+category c0;
+category c1;
+category c2;
+category c3;
+category c4;
+category c5;
+category c6;
+category c7;
+category c8;
+category c9;
+category c10;
+category c11;
+category c12;
+category c13;
+category c14;
+category c15;
+category c16;
+category c17;
+category c18;
+category c19;
+category c20;
+category c21;
+category c22;
+category c23;
+category c24;
+category c25;
+category c26;
+category c27;
+category c28;
+category c29;
+category c30;
+category c31;
+category c32;
+category c33;
+category c34;
+category c35;
+category c36;
+category c37;
+category c38;
+category c39;
+category c40;
+category c41;
+category c42;
+category c43;
+category c44;
+category c45;
+category c46;
+category c47;
+category c48;
+category c49;
+category c50;
+category c51;
+category c52;
+category c53;
+category c54;
+category c55;
+category c56;
+category c57;
+category c58;
+category c59;
+category c60;
+category c61;
+category c62;
+category c63;
+category c64;
+category c65;
+category c66;
+category c67;
+category c68;
+category c69;
+category c70;
+category c71;
+category c72;
+category c73;
+category c74;
+category c75;
+category c76;
+category c77;
+category c78;
+category c79;
+category c80;
+category c81;
+category c82;
+category c83;
+category c84;
+category c85;
+category c86;
+category c87;
+category c88;
+category c89;
+category c90;
+category c91;
+category c92;
+category c93;
+category c94;
+category c95;
+category c96;
+category c97;
+category c98;
+category c99;
+category c100;
+category c101;
+category c102;
+category c103;
+category c104;
+category c105;
+category c106;
+category c107;
+category c108;
+category c109;
+category c110;
+category c111;
+category c112;
+category c113;
+category c114;
+category c115;
+category c116;
+category c117;
+category c118;
+category c119;
+category c120;
+category c121;
+category c122;
+category c123;
+category c124;
+category c125;
+category c126;
+category c127;
+category c128;
+category c129;
+category c130;
+category c131;
+category c132;
+category c133;
+category c134;
+category c135;
+category c136;
+category c137;
+category c138;
+category c139;
+category c140;
+category c141;
+category c142;
+category c143;
+category c144;
+category c145;
+category c146;
+category c147;
+category c148;
+category c149;
+category c150;
+category c151;
+category c152;
+category c153;
+category c154;
+category c155;
+category c156;
+category c157;
+category c158;
+category c159;
+category c160;
+category c161;
+category c162;
+category c163;
+category c164;
+category c165;
+category c166;
+category c167;
+category c168;
+category c169;
+category c170;
+category c171;
+category c172;
+category c173;
+category c174;
+category c175;
+category c176;
+category c177;
+category c178;
+category c179;
+category c180;
+category c181;
+category c182;
+category c183;
+category c184;
+category c185;
+category c186;
+category c187;
+category c188;
+category c189;
+category c190;
+category c191;
+category c192;
+category c193;
+category c194;
+category c195;
+category c196;
+category c197;
+category c198;
+category c199;
+category c200;
+category c201;
+category c202;
+category c203;
+category c204;
+category c205;
+category c206;
+category c207;
+category c208;
+category c209;
+category c210;
+category c211;
+category c212;
+category c213;
+category c214;
+category c215;
+category c216;
+category c217;
+category c218;
+category c219;
+category c220;
+category c221;
+category c222;
+category c223;
+category c224;
+category c225;
+category c226;
+category c227;
+category c228;
+category c229;
+category c230;
+category c231;
+category c232;
+category c233;
+category c234;
+category c235;
+category c236;
+category c237;
+category c238;
+category c239;
+category c240;
+category c241;
+category c242;
+category c243;
+category c244;
+category c245;
+category c246;
+category c247;
+category c248;
+category c249;
+category c250;
+category c251;
+category c252;
+category c253;
+category c254;
+category c255;
+
+
+#
+# Each MCS level specifies a sensitivity and zero or more categories which may
+# be associated with that sensitivity.
+#
+level s0:c0.c255;
+
+#
+# Define the MCS policy
+#
+# mlsconstrain class_set perm_set expression ;
+#
+# mlsvalidatetrans class_set expression ;
+#
+# expression : ( expression )
+# | not expression
+# | expression and expression
+# | expression or expression
+# | u1 op u2
+# | r1 role_mls_op r2
+# | t1 op t2
+# | l1 role_mls_op l2
+# | l1 role_mls_op h2
+# | h1 role_mls_op l2
+# | h1 role_mls_op h2
+# | l1 role_mls_op h1
+# | l2 role_mls_op h2
+# | u1 op names
+# | u2 op names
+# | r1 op names
+# | r2 op names
+# | t1 op names
+# | t2 op names
+# | u3 op names (NOTE: this is only available for mlsvalidatetrans)
+# | r3 op names (NOTE: this is only available for mlsvalidatetrans)
+# | t3 op names (NOTE: this is only available for mlsvalidatetrans)
+#
+# op : == | !=
+# role_mls_op : == | != | eq | dom | domby | incomp
+#
+# names : name | { name_list }
+# name_list : name | name_list name
+#
+
+
+') dnl end enable_mcs
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/mls
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/mls Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,354 @@
+
+ifdef(`enable_mls',`
+#
+# Define sensitivities
+#
+# Each sensitivity has a name and zero or more aliases.
+#
+sensitivity s0;
+sensitivity s1;
+sensitivity s2;
+sensitivity s3;
+sensitivity s4;
+sensitivity s5;
+sensitivity s6;
+sensitivity s7;
+sensitivity s8;
+sensitivity s9;
+sensitivity s10;
+sensitivity s11;
+sensitivity s12;
+sensitivity s13;
+sensitivity s14;
+sensitivity s15;
+
+#
+# Define the ordering of the sensitivity levels (least to greatest)
+#
+dominance { s0 s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13 s14 s15 }
+
+
+#
+# Define the categories
+#
+# Each category has a name and zero or more aliases.
+#
+category c0;
+category c1;
+category c2;
+category c3;
+category c4;
+category c5;
+category c6;
+category c7;
+category c8;
+category c9;
+category c10;
+category c11;
+category c12;
+category c13;
+category c14;
+category c15;
+category c16;
+category c17;
+category c18;
+category c19;
+category c20;
+category c21;
+category c22;
+category c23;
+category c24;
+category c25;
+category c26;
+category c27;
+category c28;
+category c29;
+category c30;
+category c31;
+category c32;
+category c33;
+category c34;
+category c35;
+category c36;
+category c37;
+category c38;
+category c39;
+category c40;
+category c41;
+category c42;
+category c43;
+category c44;
+category c45;
+category c46;
+category c47;
+category c48;
+category c49;
+category c50;
+category c51;
+category c52;
+category c53;
+category c54;
+category c55;
+category c56;
+category c57;
+category c58;
+category c59;
+category c60;
+category c61;
+category c62;
+category c63;
+category c64;
+category c65;
+category c66;
+category c67;
+category c68;
+category c69;
+category c70;
+category c71;
+category c72;
+category c73;
+category c74;
+category c75;
+category c76;
+category c77;
+category c78;
+category c79;
+category c80;
+category c81;
+category c82;
+category c83;
+category c84;
+category c85;
+category c86;
+category c87;
+category c88;
+category c89;
+category c90;
+category c91;
+category c92;
+category c93;
+category c94;
+category c95;
+category c96;
+category c97;
+category c98;
+category c99;
+category c100;
+category c101;
+category c102;
+category c103;
+category c104;
+category c105;
+category c106;
+category c107;
+category c108;
+category c109;
+category c110;
+category c111;
+category c112;
+category c113;
+category c114;
+category c115;
+category c116;
+category c117;
+category c118;
+category c119;
+category c120;
+category c121;
+category c122;
+category c123;
+category c124;
+category c125;
+category c126;
+category c127;
+category c128;
+category c129;
+category c130;
+category c131;
+category c132;
+category c133;
+category c134;
+category c135;
+category c136;
+category c137;
+category c138;
+category c139;
+category c140;
+category c141;
+category c142;
+category c143;
+category c144;
+category c145;
+category c146;
+category c147;
+category c148;
+category c149;
+category c150;
+category c151;
+category c152;
+category c153;
+category c154;
+category c155;
+category c156;
+category c157;
+category c158;
+category c159;
+category c160;
+category c161;
+category c162;
+category c163;
+category c164;
+category c165;
+category c166;
+category c167;
+category c168;
+category c169;
+category c170;
+category c171;
+category c172;
+category c173;
+category c174;
+category c175;
+category c176;
+category c177;
+category c178;
+category c179;
+category c180;
+category c181;
+category c182;
+category c183;
+category c184;
+category c185;
+category c186;
+category c187;
+category c188;
+category c189;
+category c190;
+category c191;
+category c192;
+category c193;
+category c194;
+category c195;
+category c196;
+category c197;
+category c198;
+category c199;
+category c200;
+category c201;
+category c202;
+category c203;
+category c204;
+category c205;
+category c206;
+category c207;
+category c208;
+category c209;
+category c210;
+category c211;
+category c212;
+category c213;
+category c214;
+category c215;
+category c216;
+category c217;
+category c218;
+category c219;
+category c220;
+category c221;
+category c222;
+category c223;
+category c224;
+category c225;
+category c226;
+category c227;
+category c228;
+category c229;
+category c230;
+category c231;
+category c232;
+category c233;
+category c234;
+category c235;
+category c236;
+category c237;
+category c238;
+category c239;
+category c240;
+category c241;
+category c242;
+category c243;
+category c244;
+category c245;
+category c246;
+category c247;
+category c248;
+category c249;
+category c250;
+category c251;
+category c252;
+category c253;
+category c254;
+category c255;
+
+
+#
+# Each MLS level specifies a sensitivity and zero or more categories which may
+# be associated with that sensitivity.
+#
+level s0:c0.c255;
+level s1:c0.c255;
+level s2:c0.c255;
+level s3:c0.c255;
+level s4:c0.c255;
+level s5:c0.c255;
+level s6:c0.c255;
+level s7:c0.c255;
+level s8:c0.c255;
+level s9:c0.c255;
+level s10:c0.c255;
+level s11:c0.c255;
+level s12:c0.c255;
+level s13:c0.c255;
+level s14:c0.c255;
+level s15:c0.c255;
+
+
+#
+# Define the MLS policy
+#
+# mlsconstrain class_set perm_set expression ;
+#
+# mlsvalidatetrans class_set expression ;
+#
+# expression : ( expression )
+# | not expression
+# | expression and expression
+# | expression or expression
+# | u1 op u2
+# | r1 role_mls_op r2
+# | t1 op t2
+# | l1 role_mls_op l2
+# | l1 role_mls_op h2
+# | h1 role_mls_op l2
+# | h1 role_mls_op h2
+# | l1 role_mls_op h1
+# | l2 role_mls_op h2
+# | u1 op names
+# | u2 op names
+# | r1 op names
+# | r2 op names
+# | t1 op names
+# | t2 op names
+# | u3 op names (NOTE: this is only available for mlsvalidatetrans)
+# | r3 op names (NOTE: this is only available for mlsvalidatetrans)
+# | t3 op names (NOTE: this is only available for mlsvalidatetrans)
+#
+# op : == | !=
+# role_mls_op : == | != | eq | dom | domby | incomp
+#
+# names : name | { name_list }
+# name_list : name | name_list name
+#
+
+
+') dnl end enable_mls
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/modules.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/modules.conf Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,21 @@
+#
+# This file contains a listing of available modules.
+# To prevent a module from being used in policy
+# creation, set the module name to "off".
+#
+# For monolithic policies, modules set to "base" and "module"
+# will be built into the policy.
+#
+# For modular policies, modules set to "base" will be
+# included in the base module. "module" will be compiled
+# as individual loadable modules.
+#
+
+# Layer: xen
+# Module: xen
+# Required in base
+#
+# Policy for xen.
+#
+xen = base
+
diff -r 4ddd63b4be9b -r ec8eaab557d8
tools/flask/policy/policy/modules/xen/xen.if
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/modules/xen/xen.if Fri Sep 12 14:47:40
2008 +0900
@@ -0,0 +1,1 @@
+#
diff -r 4ddd63b4be9b -r ec8eaab557d8
tools/flask/policy/policy/modules/xen/xen.te
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/modules/xen/xen.te Fri Sep 12 14:47:40
2008 +0900
@@ -0,0 +1,135 @@
+attribute xen_type;
+attribute domain_type;
+attribute resource_type;
+attribute event_type;
+
+type xen_t, xen_type, domain_type;
+
+type dom0_t, domain_type;
+
+type domio_t, domain_type;
+
+type domxen_t, domain_type;
+
+type unlabeled_t, domain_type;
+
+type security_t, domain_type;
+
+type pirq_t, resource_type;
+type ioport_t, resource_type;
+type iomem_t, resource_type;
+type device_t, resource_type;
+
+################################################################################
+#
+# create_domain(priv_dom, domain, channel)
+#
+################################################################################
+define(`create_domain', `
+ type $2, domain_type;
+ allow $1 $2:domain {create max_vcpus setdomainmaxmem
+ setaddrsize getdomaininfo hypercall
+ setvcpucontext scheduler unpause
+ getvcpuinfo getaddrsize getvcpuaffinity};
+ allow $1 $2:shadow {enable};
+ allow $1 $2:mmu {map_read map_write memorymap adjust pinpage};
+ allow $2 $2:mmu {map_read map_write pinpage};
+ allow $2 domio_t:mmu {map_read};
+ allow $2 $2:grant {query setup};
+ allow $1 $2:grant {map_read unmap};
+ allow $1 $3:event {create};
+')
+
+################################################################################
+#
+# manage_domain(priv_dom, domain)
+#
+################################################################################
+define(`manage_domain', `
+ allow $1 $2:domain {pause destroy};
+')
+
+################################################################################
+#
+# create_channel(caller, peer, channel)
+#
+################################################################################
+define(`create_channel', `
+ type $3, event_type;
+ type_transition $1 $2:event $3;
+ allow $1 $3:event {create};
+ allow $3 $2:event {bind};
+')
+
+################################################################################
+#
+# Boot the hypervisor and dom0
+#
+################################################################################
+allow dom0_t xen_t:xen {kexec readapic writeapic mtrr_read mtrr_add mtrr_del
+scheduler physinfo heap quirk readconsole writeconsole settime microcode};
+
+allow dom0_t domio_t:mmu {map_read map_write};
+allow dom0_t iomem_t:mmu {map_read map_write};
+allow dom0_t pirq_t:event {vector};
+allow dom0_t xen_t:mmu {memorymap};
+
+allow dom0_t dom0_t:mmu {pinpage map_read map_write adjust};
+allow dom0_t dom0_t:grant {query setup};
+allow dom0_t dom0_t:domain {scheduler getdomaininfo getvcpuinfo
getvcpuaffinity};
+
+allow xen_t dom0_t:domain {create};
+allow xen_t dom0_t:resource {add remove};
+allow xen_t ioport_t:resource {add_ioport remove_ioport};
+allow dom0_t ioport_t:resource {use};
+allow xen_t iomem_t:resource {add_iomem remove_iomem};
+allow dom0_t iomem_t:resource {use};
+allow xen_t pirq_t:resource {add_irq remove_irq};
+allow dom0_t pirq_t:resource {use};
+
+allow dom0_t security_t:security {compute_av compute_create compute_member
+check_context load_policy compute_relabel compute_user setenforce setbool
+setsecparam};
+
+create_channel(dom0_t, dom0_t, evchn0-0_t)
+allow dom0_t evchn0-0_t:event {send};
+
+################################################################################
+#
+# Create and manage a domU w/ dom0 IO
+#
+################################################################################
+create_domain(dom0_t, domU_t, evchnU-0_t)
+
+create_channel(domU_t, domU_t, evchnU-U_t)
+allow domU_t evchnU-U_t:event {send};
+
+create_channel(dom0_t, domU_t, evchn0-U_t)
+allow dom0_t evchn0-U_t:event {send};
+
+create_channel(domU_t, dom0_t, evchnU-0_t)
+allow domU_t evchnU-0_t:event {send};
+
+manage_domain(dom0_t, domU_t)
+
+################################################################################
+#
+#
+#
+################################################################################
+sid xen gen_context(system_u:system_r:xen_t,s0)
+sid dom0 gen_context(system_u:system_r:dom0_t,s0)
+sid domU gen_context(system_u:system_r:domU_t,s0)
+sid domxen gen_context(system_u:system_r:domxen_t,s0)
+sid domio gen_context(system_u:system_r:domio_t,s0)
+sid unlabeled gen_context(system_u:system_r:unlabeled_t,s0)
+sid security gen_context(system_u:system_r:security_t,s0)
+sid pirq gen_context(system_u:object_r:pirq_t,s0)
+sid iomem gen_context(system_u:object_r:iomem_t,s0)
+sid ioport gen_context(system_u:object_r:ioport_t,s0)
+sid device gen_context(system_u:object_r:device_t,s0)
+
+role system_r types { xen_type domain_type };
+role user_r types { xen_type domain_type };
+role sysadm_r types { xen_type domain_type };
+role staff_r types { xen_type domain_type };
diff -r 4ddd63b4be9b -r ec8eaab557d8
tools/flask/policy/policy/support/loadable_module.spt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/support/loadable_module.spt Fri Sep 12
14:47:40 2008 +0900
@@ -0,0 +1,166 @@
+########################################
+#
+# Macros for switching between source policy
+# and loadable policy module support
+#
+
+##############################
+#
+# For adding the module statement
+#
+define(`policy_module',`
+ ifdef(`self_contained_policy',`',`
+ module $1 $2;
+
+ require {
+ role system_r;
+ all_kernel_class_perms
+ }
+ ')
+')
+
+##############################
+#
+# For use in interfaces, to optionally insert a require block
+#
+define(`gen_require',`
+ ifdef(`self_contained_policy',`',`
+ define(`in_gen_require_block')
+ require {
+ $1
+ }
+ undefine(`in_gen_require_block')
+ ')
+')
+
+##############################
+#
+# In the future interfaces should be in loadable modules
+#
+# template(name,rules)
+#
+define(`template',`
+ `define(`$1',`
+##### begin $1(dollarsstar)
+ $2
+##### end $1(dollarsstar)
+ '')
+')
+
+# helper function, since m4 wont expand macros
+# if a line is a comment (#):
+define(`policy_m4_comment',`dnl
+##### $2 depth: $1
+')dnl
+
+##############################
+#
+# In the future interfaces should be in loadable modules
+#
+# interface(name,rules)
+#
+define(`interface',`
+ `define(`$1',`
+
+ define(`policy_temp',incr(policy_call_depth))
+ pushdef(`policy_call_depth',policy_temp)
+ undefine(`policy_temp')
+
+ policy_m4_comment(policy_call_depth,begin `$1'(dollarsstar))
+
+ $2
+
+ define(`policy_temp',decr(policy_call_depth))
+ pushdef(`policy_call_depth',policy_temp)
+ undefine(`policy_temp')
+
+ policy_m4_comment(policy_call_depth,end `$1'(dollarsstar))
+
+ '')
+')
+
+define(`policy_call_depth',0)
+
+##############################
+#
+# Optional policy handling
+#
+define(`optional_policy',`
+ ifdef(`self_contained_policy',`
+ ifdef(`$1',`$2',`$3')
+ ',`
+ optional {
+ $2
+ ifelse(`$3',`',`',`
+ } else {
+ $3
+ ')
+ }
+ ')
+')
+
+##############################
+#
+# Determine if we should use the default
+# tunable value as specified by the policy
+# or if the override value should be used
+#
+define(`dflt_or_overr',`ifdef(`$1',$1,$2)')
+
+##############################
+#
+# Extract booleans out of an expression.
+# This needs to be reworked so expressions
+# with parentheses can work.
+
+define(`delcare_required_symbols',`
+ifelse(regexp($1, `\w'), -1, `', `dnl
+bool regexp($1, `\(\w+\)', `\1');
+delcare_required_symbols(regexp($1, `\w+\(.*\)', `\1'))dnl
+') dnl
+')
+
+##############################
+#
+# Tunable declaration
+#
+define(`gen_tunable',`
+ ifdef(`self_contained_policy',`
+ bool $1 dflt_or_overr(`$1'_conf,$2);
+ ',`
+ # loadable module tunable
+ # declaration will go here
+ # instead of bool when
+ # loadable modules support
+ # tunables
+ bool $1 dflt_or_overr(`$1'_conf,$2);
+ ')
+')
+
+##############################
+#
+# Tunable policy handling
+#
+define(`tunable_policy',`
+ ifdef(`self_contained_policy',`
+ if (`$1') {
+ $2
+ } else {
+ $3
+ }
+ ',`
+ # structure for tunables
+ # will go here instead of a
+ # conditional when loadable
+ # modules support tunables
+ gen_require(`
+ delcare_required_symbols(`$1')
+ ')
+
+ if (`$1') {
+ $2
+ } else {
+ $3
+ }
+ ')
+')
diff -r 4ddd63b4be9b -r ec8eaab557d8
tools/flask/policy/policy/support/misc_macros.spt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/support/misc_macros.spt Fri Sep 12 14:47:40
2008 +0900
@@ -0,0 +1,32 @@
+
+########################################
+#
+# Helper macros
+#
+
+#
+# shiftn(num,list...)
+#
+# shift the list num times
+#
+define(`shiftn',`ifelse($1,0,`shift($*)',`shiftn(decr($1),shift(shift($*)))')')
+
+########################################
+#
+# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_categories])
+#
+define(`gen_user',`user $1 roles { $2 }`'ifdef(`enable_mls', ` level $3 range
$4')`'ifdef(`enable_mcs',` level s0 range s0`'ifelse(`$5',,,` - s0:$5')');')
+
+########################################
+#
+# gen_context(context,mls_sensitivity,[mcs_categories])
+#
+define(`gen_context',`$1`'ifdef(`enable_mls',`:$2')`'ifdef(`enable_mcs',`:s0`'ifelse(`$3',,,`:$3')')')
dnl
+
+########################################
+#
+# gen_bool(name,default_value)
+#
+define(`gen_bool',`
+ bool $1 dflt_or_overr(`$1'_conf,$2);
+')
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/systemuser
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/systemuser Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,19 @@
+##################################
+#
+# System User configuration.
+#
+
+#
+# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_categories])
+#
+
+#
+# system_u is the user identity for system processes and objects.
+# There should be no corresponding Unix user identity for system,
+# and a user process should never be assigned the system user
+# identity.
+#
+gen_user(system_u, system_r, s0, s0 - s9:c0.c127, c0.c127)
+
+# Normal users should not be added to this file,
+# but instead added to the users file.
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/users
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/users Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,39 @@
+
+##################################
+#
+# Core User configuration.
+#
+
+#
+# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_catetories])
+#
+
+#
+# user_u is a generic user identity for Linux users who have no
+# SELinux user identity defined. The modified daemons will use
+# this user identity in the security context if there is no matching
+# SELinux user identity for a Linux user. If you do not want to
+# permit any access to such users, then remove this entry.
+#
+ifdef(`targeted_policy',`
+gen_user(user_u, user_r sysadm_r system_r, s0, s0 - s9:c0.c127)
+',`
+gen_user(user_u, user_r, s0, s0 - s9:c0.c127)
+')
+
+#
+# The following users correspond to Unix identities.
+# These identities are typically assigned as the user attribute
+# when login starts the user shell. Users with access to the sysadm_r
+# role should use the staff_r role instead of the user_r role when
+# not in the sysadm_r.
+#
+ifdef(`targeted_policy',`
+ gen_user(root, user_r sysadm_r system_r, s0, s0 - s9:c0.c127, c0.c127)
+',`
+ ifdef(`direct_sysadm_daemon',`
+ gen_user(root, sysadm_r staff_r system_r, s0, s0 - s9:c0.c127,
c0.c127)
+ ',`
+ gen_user(root, sysadm_r staff_r, s0, s0 - s9:c0.c127, c0.c127)
+ ')
+')
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/cirrus_vga.c Fri Sep 12 14:47:40 2008 +0900
@@ -2554,6 +2554,9 @@ static void set_vram_mapping(CirrusVGASt
fprintf(logfile,"mapping vram to %lx - %lx\n", begin, end);
+ if (!s->vram_mfns)
+ return;
+
xatp.domid = domid;
xatp.space = XENMAPSPACE_mfn;
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/pass-through.c
--- a/tools/ioemu/hw/pass-through.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/pass-through.c Fri Sep 12 14:47:40 2008 +0900
@@ -57,6 +57,10 @@ static uint32_t pt_irqpin_reg_init(struc
struct pt_reg_info_tbl *reg, uint32_t real_offset);
static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
struct pt_reg_info_tbl *reg, uint32_t real_offset);
+static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
+ struct pt_reg_info_tbl *reg, uint32_t real_offset);
+static uint32_t pt_devctrl2_reg_init(struct pt_dev *ptdev,
+ struct pt_reg_info_tbl *reg, uint32_t real_offset);
static uint32_t pt_linkctrl2_reg_init(struct pt_dev *ptdev,
struct pt_reg_info_tbl *reg, uint32_t real_offset);
static uint32_t pt_msgctrl_reg_init(struct pt_dev *ptdev,
@@ -76,6 +80,8 @@ static uint8_t pt_msix_size_init(struct
static uint8_t pt_msix_size_init(struct pt_dev *ptdev,
struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
static uint8_t pt_vendor_size_init(struct pt_dev *ptdev,
+ struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
+static uint8_t pt_pcie_size_init(struct pt_dev *ptdev,
struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
static int pt_byte_reg_read(struct pt_dev *ptdev,
struct pt_reg_tbl *cfg_entry,
@@ -438,7 +444,7 @@ static struct pt_reg_info_tbl pt_emu_reg
.init_val = 0x0000,
.ro_mask = 0x0000,
.emu_mask = 0xFFFF,
- .init = pt_common_reg_init,
+ .init = pt_linkctrl_reg_init,
.u.w.read = pt_word_reg_read,
.u.w.write = pt_linkctrl_reg_write,
},
@@ -449,7 +455,7 @@ static struct pt_reg_info_tbl pt_emu_reg
.init_val = 0x0000,
.ro_mask = 0x0000,
.emu_mask = 0xFFFF,
- .init = pt_common_reg_init,
+ .init = pt_devctrl2_reg_init,
.u.w.read = pt_word_reg_read,
.u.w.write = pt_devctrl2_reg_write,
},
@@ -666,8 +672,8 @@ static const struct pt_reg_grp_info_tbl
{
.grp_id = PCI_CAP_ID_EXP,
.grp_type = GRP_TYPE_EMU,
- .grp_size = 0x3C,
- .size_init = pt_reg_grp_size_init,
+ .grp_size = 0xFF,
+ .size_init = pt_pcie_size_init,
.emu_reg_tbl= pt_emu_reg_pcie_tbl,
},
/* MSI-X Capability Structure reg group */
@@ -1869,12 +1875,57 @@ static uint32_t pt_bar_reg_init(struct p
return reg_field;
}
+/* initialize Link Control register */
+static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
+ struct pt_reg_info_tbl *reg, uint32_t real_offset)
+{
+ uint8_t cap_ver = 0;
+ uint8_t dev_type = 0;
+
+ cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
+ (uint8_t)PCI_EXP_FLAGS_VERS);
+ dev_type = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS]
&
+ (uint8_t)PCI_EXP_FLAGS_TYPE) >> 4;
+
+ /* no need to initialize in case of Root Complex Integrated Endpoint
+ * with cap_ver 1.x
+ */
+ if ((dev_type == PCI_EXP_TYPE_ROOT_INT_EP) && (cap_ver == 1))
+ return PT_INVALID_REG;
+
+ return reg->init_val;
+}
+
+/* initialize Device Control 2 register */
+static uint32_t pt_devctrl2_reg_init(struct pt_dev *ptdev,
+ struct pt_reg_info_tbl *reg, uint32_t real_offset)
+{
+ uint8_t cap_ver = 0;
+
+ cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
+ (uint8_t)PCI_EXP_FLAGS_VERS);
+
+ /* no need to initialize in case of cap_ver 1.x */
+ if (cap_ver == 1)
+ return PT_INVALID_REG;
+
+ return reg->init_val;
+}
+
/* initialize Link Control 2 register */
static uint32_t pt_linkctrl2_reg_init(struct pt_dev *ptdev,
struct pt_reg_info_tbl *reg, uint32_t real_offset)
{
int reg_field = 0;
-
+ uint8_t cap_ver = 0;
+
+ cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
+ (uint8_t)PCI_EXP_FLAGS_VERS);
+
+ /* no need to initialize in case of cap_ver 1.x */
+ if (cap_ver == 1)
+ return PT_INVALID_REG;
+
/* set Supported Link Speed */
reg_field |=
(0x0F &
@@ -2034,6 +2085,91 @@ static uint8_t pt_vendor_size_init(struc
struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
{
return ptdev->dev.config[base_offset + 0x02];
+}
+
+/* get PCI Express Capability Structure register group size */
+static uint8_t pt_pcie_size_init(struct pt_dev *ptdev,
+ struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
+{
+ PCIDevice *d = &ptdev->dev;
+ uint16_t exp_flag = 0;
+ uint16_t type = 0;
+ uint16_t vers = 0;
+ uint8_t pcie_size = 0;
+
+ exp_flag = *((uint16_t*)(d->config + (base_offset + PCI_EXP_FLAGS)));
+ type = (exp_flag & PCI_EXP_FLAGS_TYPE) >> 4;
+ vers = (exp_flag & PCI_EXP_FLAGS_VERS);
+
+ /* calculate size depend on capability version and device/port type */
+ /* in case of PCI Express Base Specification Rev 1.x */
+ if (vers == 1)
+ {
+ /* The PCI Express Capabilities, Device Capabilities, and Device
+ * Status/Control registers are required for all PCI Express devices.
+ * The Link Capabilities and Link Status/Control are required for all
+ * Endpoints that are not Root Complex Integrated Endpoints. Endpoints
+ * are not required to implement registers other than those listed
+ * above and terminate the capability structure.
+ */
+ switch (type) {
+ case PCI_EXP_TYPE_ENDPOINT:
+ case PCI_EXP_TYPE_LEG_END:
+ pcie_size = 0x14;
+ break;
+ case PCI_EXP_TYPE_ROOT_INT_EP:
+ /* has no link */
+ pcie_size = 0x0C;
+ break;
+ /* only EndPoint passthrough is supported */
+ case PCI_EXP_TYPE_ROOT_PORT:
+ case PCI_EXP_TYPE_UPSTREAM:
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ case PCI_EXP_TYPE_PCI_BRIDGE:
+ case PCI_EXP_TYPE_PCIE_BRIDGE:
+ case PCI_EXP_TYPE_ROOT_EC:
+ default:
+ /* exit I/O emulator */
+ PT_LOG("Internal error: Unsupported device/port type[%d]. "
+ "I/O emulator exit.\n", type);
+ exit(1);
+ }
+ }
+ /* in case of PCI Express Base Specification Rev 2.0 */
+ else if (vers == 2)
+ {
+ switch (type) {
+ case PCI_EXP_TYPE_ENDPOINT:
+ case PCI_EXP_TYPE_LEG_END:
+ case PCI_EXP_TYPE_ROOT_INT_EP:
+ /* For Functions that do not implement the registers,
+ * these spaces must be hardwired to 0b.
+ */
+ pcie_size = 0x3C;
+ break;
+ /* only EndPoint passthrough is supported */
+ case PCI_EXP_TYPE_ROOT_PORT:
+ case PCI_EXP_TYPE_UPSTREAM:
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ case PCI_EXP_TYPE_PCI_BRIDGE:
+ case PCI_EXP_TYPE_PCIE_BRIDGE:
+ case PCI_EXP_TYPE_ROOT_EC:
+ default:
+ /* exit I/O emulator */
+ PT_LOG("Internal error: Unsupported device/port type[%d]. "
+ "I/O emulator exit.\n", type);
+ exit(1);
+ }
+ }
+ else
+ {
+ /* exit I/O emulator */
+ PT_LOG("Internal error: Unsupported capability version[%d]. "
+ "I/O emulator exit.\n", vers);
+ exit(1);
+ }
+
+ return pcie_size;
}
/* read byte size emulate register */
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/pass-through.h
--- a/tools/ioemu/hw/pass-through.h Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/pass-through.h Fri Sep 12 14:47:40 2008 +0900
@@ -60,6 +60,21 @@
#ifndef PCI_MSI_FLAGS_MASK_BIT
/* interrupt masking & reporting supported */
#define PCI_MSI_FLAGS_MASK_BIT 0x0100
+#endif
+
+#ifndef PCI_EXP_TYPE_PCIE_BRIDGE
+/* PCI/PCI-X to PCIE Bridge */
+#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8
+#endif
+
+#ifndef PCI_EXP_TYPE_ROOT_INT_EP
+/* Root Complex Integrated Endpoint */
+#define PCI_EXP_TYPE_ROOT_INT_EP 0x9
+#endif
+
+#ifndef PCI_EXP_TYPE_ROOT_EC
+/* Root Complex Event Collector */
+#define PCI_EXP_TYPE_ROOT_EC 0xa
#endif
#define PT_INVALID_REG 0xFFFFFFFF /* invalid register value */
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/pci.c Fri Sep 12 14:47:40 2008 +0900
@@ -45,7 +45,6 @@ static void pci_update_mappings(PCIDevic
static void pci_update_mappings(PCIDevice *d);
target_phys_addr_t pci_mem_base;
-static int pci_irq_index;
static PCIBus *first_bus;
PCIBus *pci_register_bus(pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
@@ -114,9 +113,6 @@ PCIDevice *pci_register_device(PCIBus *b
{
PCIDevice *pci_dev;
- if (pci_irq_index >= PCI_DEVICES_MAX)
- return NULL;
-
if (devfn < 0) {
for(devfn = bus->devfn_min ; devfn < 256; devfn += 8) {
if ( !bus->devices[devfn] &&
@@ -140,7 +136,6 @@ PCIDevice *pci_register_device(PCIBus *b
config_write = pci_default_write_config;
pci_dev->config_read = config_read;
pci_dev->config_write = config_write;
- pci_dev->irq_index = pci_irq_index++;
bus->devices[devfn] = pci_dev;
return pci_dev;
}
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/pt-msi.c
--- a/tools/ioemu/hw/pt-msi.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/pt-msi.c Fri Sep 12 14:47:40 2008 +0900
@@ -313,7 +313,7 @@ int pt_msix_init(struct pt_dev *dev, int
table_off = pci_read_long(pd, pos + PCI_MSIX_TABLE);
bar_index = dev->msix->bar_index = table_off & PCI_MSIX_BIR;
- table_off &= table_off & ~PCI_MSIX_BIR;
+ table_off = dev->msix->table_off = table_off & ~PCI_MSIX_BIR;
dev->msix->table_base = dev->pci_dev->base_addr[bar_index];
PT_LOG("get MSI-X table bar base %llx\n",
(unsigned long long)dev->msix->table_base);
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/vga.c
--- a/tools/ioemu/hw/vga.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/vga.c Fri Sep 12 14:47:40 2008 +0900
@@ -2080,7 +2080,13 @@ void xen_vga_vram_map(uint64_t vram_addr
if (copy)
memcpy(vram, xen_vga_state->vram_ptr, VGA_RAM_SIZE);
- qemu_free(xen_vga_state->vram_ptr);
+ if (xen_vga_state->vram_mfns) {
+ /* In case this function is called more than once */
+ free(xen_vga_state->vram_mfns);
+ munmap(xen_vga_state->vram_ptr, VGA_RAM_SIZE);
+ } else {
+ qemu_free(xen_vga_state->vram_ptr);
+ }
xen_vga_state->vram_ptr = vram;
xen_vga_state->vram_mfns = pfn_list;
#ifdef CONFIG_STUBDOM
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/xen_machine_fv.c
--- a/tools/ioemu/hw/xen_machine_fv.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/xen_machine_fv.c Fri Sep 12 14:47:40 2008 +0900
@@ -139,8 +139,10 @@ uint8_t *qemu_map_cache(target_phys_addr
!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping))
qemu_remap_bucket(entry, address_index);
- if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping))
+ if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) {
+ last_address_index = ~0UL;
return NULL;
+ }
last_address_index = address_index;
last_address_vaddr = entry->vaddr_base;
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/vl.h Fri Sep 12 14:47:40 2008 +0900
@@ -812,8 +812,6 @@ struct PCIDevice {
/* do not access the following fields */
PCIConfigReadFunc *config_read;
PCIConfigWriteFunc *config_write;
- /* ??? This is a PC-specific hack, and should be removed. */
- int irq_index;
/* Current IRQ levels. Used internally by the generic PCI code. */
int irq_state[4];
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/ia64/xc_ia64_linux_save.c
--- a/tools/libxc/ia64/xc_ia64_linux_save.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/ia64/xc_ia64_linux_save.c Fri Sep 12 14:47:40 2008 +0900
@@ -53,12 +53,12 @@ static inline void set_bit(int nr, volat
}
static int
-suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+suspend_and_state(int (*suspend)(void), int xc_handle, int io_fd,
int dom, xc_dominfo_t *info)
{
int i = 0;
- if (!(*suspend)(dom)) {
+ if (!(*suspend)()) {
ERROR("Suspend request failed");
return -1;
}
@@ -406,7 +406,7 @@ out:
int
xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags, int (*suspend)(int),
+ uint32_t max_factor, uint32_t flags, int (*suspend)(void),
int hvm, void *(*init_qemu_maps)(int, unsigned),
void (*qemu_flip_buffer)(int, int))
{
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/xc_domain_save.c Fri Sep 12 14:47:40 2008 +0900
@@ -338,72 +338,23 @@ static int analysis_phase(int xc_handle,
}
-static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+static int suspend_and_state(int (*suspend)(void), int xc_handle, int io_fd,
int dom, xc_dominfo_t *info)
{
- int i = 0;
-
- if ( !(*suspend)(dom) )
+ if ( !(*suspend)() )
{
ERROR("Suspend request failed");
return -1;
}
- retry:
-
- if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
- {
- ERROR("Could not get domain info");
+ if ( (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) ||
+ !info->shutdown || (info->shutdown_reason != SHUTDOWN_suspend) )
+ {
+ ERROR("Domain not in suspended state");
return -1;
}
- if ( info->dying )
- {
- ERROR("domain is dying");
- return -1;
- }
-
- if ( info->crashed )
- {
- ERROR("domain has crashed");
- return -1;
- }
-
- if ( info->shutdown )
- {
- switch ( info->shutdown_reason )
- {
- case SHUTDOWN_poweroff:
- case SHUTDOWN_reboot:
- ERROR("domain has shut down");
- return -1;
- case SHUTDOWN_suspend:
- return 0;
- case SHUTDOWN_crash:
- ERROR("domain has crashed");
- return -1;
- }
- }
-
- if ( info->paused )
- {
- /* Try unpausing domain, wait, and retest. */
- xc_domain_unpause( xc_handle, dom );
- ERROR("Domain was paused. Wait and re-test.");
- usleep(10000); /* 10ms */
- goto retry;
- }
-
- if ( ++i < 100 )
- {
- ERROR("Retry suspend domain");
- usleep(10000); /* 10ms */
- goto retry;
- }
-
- ERROR("Unable to suspend domain.");
-
- return -1;
+ return 0;
}
/*
@@ -796,7 +747,7 @@ static xen_pfn_t *map_and_save_p2m_table
int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags, int (*suspend)(int),
+ uint32_t max_factor, uint32_t flags, int (*suspend)(void),
int hvm, void *(*init_qemu_maps)(int, unsigned),
void (*qemu_flip_buffer)(int, int))
{
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/xc_evtchn.c
--- a/tools/libxc/xc_evtchn.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/xc_evtchn.c Fri Sep 12 14:47:40 2008 +0900
@@ -59,17 +59,8 @@ int xc_evtchn_reset(int xc_handle,
return do_evtchn_op(xc_handle, EVTCHNOP_reset, &arg, sizeof(arg), 0);
}
-int xc_evtchn_status(int xc_handle,
- uint32_t dom,
- uint32_t port)
+int xc_evtchn_status(int xc_handle, xc_evtchn_status_t *status)
{
- int rc;
- struct evtchn_status arg = { .dom = (domid_t)dom,
- .port = (evtchn_port_t)port };
-
- rc = do_evtchn_op(xc_handle, EVTCHNOP_status, &arg, sizeof(arg), 1);
- if ( rc == 0 )
- rc = arg.status;
-
- return rc;
+ return do_evtchn_op(xc_handle, EVTCHNOP_status, status,
+ sizeof(*status), 1);
}
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/xc_private.c Fri Sep 12 14:47:40 2008 +0900
@@ -307,6 +307,13 @@ int xc_memory_op(int xc_handle,
goto out1;
}
break;
+ case XENMEM_remove_from_physmap:
+ if ( lock_pages(arg, sizeof(struct xen_remove_from_physmap)) )
+ {
+ PERROR("Could not lock");
+ goto out1;
+ }
+ break;
case XENMEM_current_reservation:
case XENMEM_maximum_reservation:
case XENMEM_maximum_gpfn:
@@ -339,6 +346,9 @@ int xc_memory_op(int xc_handle,
break;
case XENMEM_add_to_physmap:
unlock_pages(arg, sizeof(struct xen_add_to_physmap));
+ break;
+ case XENMEM_remove_from_physmap:
+ unlock_pages(arg, sizeof(struct xen_remove_from_physmap));
break;
case XENMEM_current_reservation:
case XENMEM_maximum_reservation:
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/xenctrl.h Fri Sep 12 14:47:40 2008 +0900
@@ -502,9 +502,9 @@ xc_evtchn_alloc_unbound(int xc_handle,
int xc_evtchn_reset(int xc_handle,
uint32_t dom);
-int xc_evtchn_status(int xc_handle,
- uint32_t dom,
- uint32_t port);
+
+typedef struct evtchn_status xc_evtchn_status_t;
+int xc_evtchn_status(int xc_handle, xc_evtchn_status_t *status);
/*
* Return a handle to the event channel driver, or -1 on failure, in which case
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/xenguest.h Fri Sep 12 14:47:40 2008 +0900
@@ -25,7 +25,7 @@
*/
int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
- int (*suspend)(int domid), int hvm,
+ int (*suspend)(void), int hvm,
void *(*init_qemu_maps)(int, unsigned), /* HVM only */
void (*qemu_flip_buffer)(int, int)); /* HVM only */
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/Makefile
--- a/tools/python/Makefile Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/Makefile Fri Sep 12 14:47:40 2008 +0900
@@ -1,13 +1,5 @@ XEN_ROOT = ../..
XEN_ROOT = ../..
include $(XEN_ROOT)/tools/Rules.mk
-
-XEN_SECURITY_MODULE = dummy
-ifeq ($(FLASK_ENABLE),y)
-XEN_SECURITY_MODULE = flask
-endif
-ifeq ($(ACM_SECURITY),y)
-XEN_SECURITY_MODULE = acm
-endif
.PHONY: all
all: build
@@ -23,8 +15,8 @@ NLSDIR = /usr/share/locale
NLSDIR = /usr/share/locale
.PHONY: build buildpy
-buildpy: xsm.py
- CC="$(CC)" CFLAGS="$(CFLAGS)"
XEN_SECURITY_MODULE="$(XEN_SECURITY_MODULE)" python setup.py build
+buildpy:
+ CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build
build: buildpy refresh-pot refresh-po $(CATALOGS)
@@ -61,18 +53,6 @@ refresh-po: $(POTFILE)
%.mo: %.po
$(MSGFMT) -c -o $@ $<
-xsm.py:
- @(set -e; \
- echo "XEN_SECURITY_MODULE = \""$(XEN_SECURITY_MODULE)"\""; \
- echo "from xsm_core import *"; \
- echo ""; \
- echo "import
xen.util.xsm."$(XEN_SECURITY_MODULE)"."$(XEN_SECURITY_MODULE)" as xsm_module"; \
- echo ""; \
- echo "xsm_init(xsm_module)"; \
- echo "from
xen.util.xsm."$(XEN_SECURITY_MODULE)"."$(XEN_SECURITY_MODULE)" import *"; \
- echo "del xsm_module"; \
- echo "") >xen/util/xsm/$@
-
.PHONY: install
ifndef XEN_PYTHON_NATIVE_INSTALL
install: LIBPATH=$(shell PYTHONPATH=xen/util python -c "import auxbin; print
auxbin.libpath()")
@@ -104,4 +84,4 @@ test:
.PHONY: clean
clean:
- rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/xsm/xsm.py
xen/util/auxbin.pyc
+ rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/auxbin.pyc
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/util/xsconstants.py
--- a/tools/python/xen/util/xsconstants.py Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/util/xsconstants.py Fri Sep 12 14:47:40 2008 +0900
@@ -20,8 +20,10 @@ XS_INST_BOOT = (1 << 0)
XS_INST_BOOT = (1 << 0)
XS_INST_LOAD = (1 << 1)
-XS_POLICY_NONE = 0
XS_POLICY_ACM = (1 << 0)
+XS_POLICY_FLASK = (1 << 1)
+XS_POLICY_DUMMY = (1 << 2)
+XS_POLICY_USE = 0
# Some internal variables used by the Xen-API
ACM_LABEL_VM = (1 << 0)
@@ -107,6 +109,6 @@ ACM_POLICY_ID = 'ACM'
INVALID_POLICY_PREFIX = 'INV_'
-INVALID_SSIDREF = 0xFFFFFFFF
+INVALID_SSIDREF = 0xFFFFFFFFL
XS_INACCESSIBLE_LABEL = '__INACCESSIBLE__'
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/util/xsm/flask/flask.py
--- a/tools/python/xen/util/xsm/flask/flask.py Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/util/xsm/flask/flask.py Fri Sep 12 14:47:40 2008 +0900
@@ -1,5 +1,6 @@ import sys
import sys
from xen.lowlevel import flask
+from xen.util import xsconstants
from xen.xend import sxp
#Functions exported through XML-RPC
@@ -12,7 +13,7 @@ def err(msg):
raise XSMError(msg)
def on():
- return 0 #xsconstants.XS_POLICY_FLASK
+ return xsconstants.XS_POLICY_FLASK
def ssidref2label(ssidref):
try:
@@ -37,8 +38,9 @@ def set_security_label(policy, label):
return label
def ssidref2security_label(ssidref):
- return ssidref2label(ssidref)
+ label = ssidref2label(ssidref)
+ return label
def get_security_label(self, xspol=None):
- label = self.info.get('security_label', '')
+ label = self.info['security_label']
return label
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/util/xsm/xsm.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/util/xsm/xsm.py Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,20 @@
+import sys
+import string
+from xen.xend import XendOptions
+from xen.util import xsconstants
+from xsm_core import xsm_init
+
+xoptions = XendOptions.instance()
+xsm_module_name = xoptions.get_xsm_module_name()
+
+xsconstants.XS_POLICY_USE = eval("xsconstants.XS_POLICY_" +
+ string.upper(xsm_module_name))
+
+xsm_module_path = "xen.util.xsm." + xsm_module_name + "." + xsm_module_name
+xsm_module = __import__(xsm_module_path, globals(), locals(), ['*'])
+
+xsm_init(xsm_module)
+
+for op in dir(xsm_module):
+ if not hasattr(sys.modules[__name__], op):
+ setattr(sys.modules[__name__], op, getattr(xsm_module, op, None))
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/XendConfig.py Fri Sep 12 14:47:40 2008 +0900
@@ -729,7 +729,7 @@ class XendConfig(dict):
self.parse_cpuid(cfg, 'cpuid_check')
import xen.util.xsm.xsm as security
- if security.on() == xsconstants.XS_POLICY_ACM:
+ if security.on() == xsconstants.XS_POLICY_USE:
from xen.util.acmpolicy import ACM_LABEL_UNLABELED
if not 'security' in cfg and sxp.child_value(sxp_cfg, 'security'):
cfg['security'] = sxp.child_value(sxp_cfg, 'security')
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py Fri Sep 12 14:47:40 2008 +0900
@@ -2069,7 +2069,7 @@ class XendDomainInfo:
balloon.free(2*1024) # 2MB should be plenty
ssidref = 0
- if security.on() == xsconstants.XS_POLICY_ACM:
+ if security.on() == xsconstants.XS_POLICY_USE:
ssidref = security.calc_dom_ssidref_from_info(self.info)
if security.has_authorization(ssidref) == False:
raise VmError("VM is not authorized to run.")
@@ -2855,10 +2855,6 @@ class XendDomainInfo:
info["maxmem_kb"] = XendNode.instance() \
.physinfo_dict()['total_memory'] * 1024
- #ssidref field not used any longer
- if 'ssidref' in info:
- info.pop('ssidref')
-
# make sure state is reset for info
# TODO: we should eventually get rid of old_dom_states
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/XendOptions.py
--- a/tools/python/xen/xend/XendOptions.py Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/XendOptions.py Fri Sep 12 14:47:40 2008 +0900
@@ -131,6 +131,9 @@ class XendOptions:
"""Default script to configure a backend network interface"""
vif_script = osdep.vif_script
+
+ """Default Xen Security Module"""
+ xsm_module_default = 'dummy'
"""Default rotation count of qemu-dm log file."""
qemu_dm_logrotate_count = 10
@@ -427,6 +430,11 @@ class XendOptionsFile(XendOptions):
return self.get_config_value('xen-api-server',
self.xen_api_server_default)
+ def get_xsm_module_name(self):
+ """Get the Xen Security Module name.
+ """
+ return self.get_config_string('xsm_module_name',
self.xsm_module_default)
+
if os.uname()[0] == 'SunOS':
class XendOptionsSMF(XendOptions):
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/server/blkif.py Fri Sep 12 14:47:40 2008 +0900
@@ -78,7 +78,7 @@ class BlkifController(DevController):
if uuid:
back['uuid'] = uuid
- if security.on() == xsconstants.XS_POLICY_ACM:
+ if security.on() == xsconstants.XS_POLICY_USE:
self.do_access_control(config, uname)
(device_path, devid) = blkif.blkdev_name_to_number(dev)
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/server/netif.py
--- a/tools/python/xen/xend/server/netif.py Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/server/netif.py Fri Sep 12 14:47:40 2008 +0900
@@ -156,7 +156,7 @@ class NetifController(DevController):
front = { 'handle' : "%i" % devid,
'mac' : mac }
- if security.on() == xsconstants.XS_POLICY_ACM:
+ if security.on() == xsconstants.XS_POLICY_USE:
self.do_access_control(config)
return (devid, back, front)
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/server/pciif.py Fri Sep 12 14:47:40 2008 +0900
@@ -286,7 +286,7 @@ class PciController(DevController):
)%(dev.name))
if dev.has_non_page_aligned_bar and arch.type != "ia64":
- raise VmError("pci: %: non-page-aligned MMIO BAR found." %
dev.name)
+ raise VmError("pci: %s: non-page-aligned MMIO BAR found." %
dev.name)
self.CheckSiblingDevices(fe_domid, dev)
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xm/create.py Fri Sep 12 14:47:40 2008 +0900
@@ -566,11 +566,11 @@ gopts.var('hap', val='HAP',
use="""Hap status (0=hap is disabled;
1=hap is enabled.""")
-gopts.var('cpuid', val="IN[,SIN]:eax=EAX,ebx=EBX,exc=ECX,edx=EDX",
+gopts.var('cpuid', val="IN[,SIN]:eax=EAX,ebx=EBX,ecx=ECX,edx=EDX",
fn=append_value, default=[],
use="""Cpuid description.""")
-gopts.var('cpuid_check', val="IN[,SIN]:eax=EAX,ebx=EBX,exc=ECX,edx=EDX",
+gopts.var('cpuid_check', val="IN[,SIN]:eax=EAX,ebx=EBX,ecx=ECX,edx=EDX",
fn=append_value, default=[],
use="""Cpuid check description.""")
@@ -971,7 +971,7 @@ def preprocess_cpuid(vals, attr_name):
"of the register %s for input %s\n"
% (res['reg'], input) )
cpuid[input][res['reg']] = res['val'] # new register
- setattr(vals, attr_name, cpuid)
+ setattr(vals, attr_name, cpuid)
def preprocess_pci(vals):
if not vals.pci: return
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xm/main.py Fri Sep 12 14:47:40 2008 +0900
@@ -1812,7 +1812,7 @@ def domain_name_to_domid(domain_name):
else:
dom = server.xend.domain(domain_name)
domid = int(sxp.child_value(dom, 'domid', '-1'))
- return domid
+ return int(domid)
def xm_vncviewer(args):
autopass = False;
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/xcutils/lsevtchn.c
--- a/tools/xcutils/lsevtchn.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/xcutils/lsevtchn.c Fri Sep 12 14:47:40 2008 +0900
@@ -8,49 +8,55 @@
#include <xenctrl.h>
#include <xenguest.h>
-int
-main(int argc, char **argv)
+int main(int argc, char **argv)
{
- int xc_fd;
- int domid = 0, port = 0, status;
- const char *msg;
+ int xc_fd, domid, port, rc;
+ xc_evtchn_status_t status;
- if ( argc > 1 )
- domid = strtol(argv[1], NULL, 10);
+ domid = (argc > 1) ? strtol(argv[1], NULL, 10) : 0;
xc_fd = xc_interface_open();
if ( xc_fd < 0 )
errx(1, "failed to open control interface");
- while ( (status = xc_evtchn_status(xc_fd, domid, port)) >= 0 )
+ for ( port = 0; ; port++ )
{
- switch ( status )
+ status.dom = domid;
+ status.port = port;
+ rc = xc_evtchn_status(xc_fd, &status);
+ if ( rc < 0 )
+ break;
+
+ if ( status.status == EVTCHNSTAT_closed )
+ continue;
+
+ printf("%4d: VCPU %u: ", port, status.vcpu);
+
+ switch ( status.status )
{
- case EVTCHNSTAT_closed:
- msg = "Channel is not in use.";
- break;
case EVTCHNSTAT_unbound:
- msg = "Channel is waiting interdom connection.";
+ printf("Interdomain (Waiting connection) - Remote Domain %u",
+ status.u.unbound.dom);
break;
case EVTCHNSTAT_interdomain:
- msg = "Channel is connected to remote domain.";
+ printf("Interdomain (Connected) - Remote Domain %u, Port %u",
+ status.u.interdomain.dom, status.u.interdomain.port);
break;
case EVTCHNSTAT_pirq:
- msg = "Channel is bound to a phys IRQ line.";
+ printf("Physical IRQ %u", status.u.pirq);
break;
case EVTCHNSTAT_virq:
- msg = "Channel is bound to a virtual IRQ line.";
+ printf("Virtual IRQ %u", status.u.virq);
break;
case EVTCHNSTAT_ipi:
- msg = "Channel is bound to a virtual IPI line.";
+ printf("IPI");
break;
default:
- msg = "Unknown.";
+ printf("Unknown");
break;
+ }
- }
- printf("%03d: %d: %s\n", port, status, msg);
- port++;
+ printf("\n");
}
xc_interface_close(xc_fd);
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/xcutils/xc_save.c Fri Sep 12 14:47:40 2008 +0900
@@ -32,7 +32,7 @@ static struct suspendinfo {
* Issue a suspend request through stdout, and receive the acknowledgement
* from stdin. This is handled by XendCheckpoint in the Python layer.
*/
-static int compat_suspend(int domid)
+static int compat_suspend(void)
{
char ans[30];
@@ -43,16 +43,35 @@ static int compat_suspend(int domid)
!strncmp(ans, "done\n", 5));
}
-static int suspend_evtchn_release(int xc, int domid)
+static int suspend_evtchn_release(void)
{
if (si.suspend_evtchn >= 0) {
- xc_evtchn_unbind(si.xce, si.suspend_evtchn);
- si.suspend_evtchn = -1;
+ xc_evtchn_unbind(si.xce, si.suspend_evtchn);
+ si.suspend_evtchn = -1;
}
if (si.xce >= 0) {
- xc_evtchn_close(si.xce);
- si.xce = -1;
- }
+ xc_evtchn_close(si.xce);
+ si.xce = -1;
+ }
+
+ return 0;
+}
+
+static int await_suspend(void)
+{
+ int rc;
+
+ do {
+ rc = xc_evtchn_pending(si.xce);
+ if (rc < 0) {
+ warnx("error polling suspend notification channel: %d", rc);
+ return -1;
+ }
+ } while (rc != si.suspend_evtchn);
+
+ /* harmless for one-off suspend */
+ if (xc_evtchn_unmask(si.xce, si.suspend_evtchn) < 0)
+ warnx("failed to unmask suspend notification channel: %d", rc);
return 0;
}
@@ -71,16 +90,16 @@ static int suspend_evtchn_init(int xc, i
xs = xs_daemon_open();
if (!xs) {
- errx(1, "failed to get xenstore handle");
- return -1;
+ warnx("failed to get xenstore handle");
+ return -1;
}
sprintf(path, "/local/domain/%d/device/suspend/event-channel", domid);
portstr = xs_read(xs, XBT_NULL, path, &plen);
xs_daemon_close(xs);
if (!portstr || !plen) {
- warnx("could not read suspend event channel");
- return -1;
+ warnx("could not read suspend event channel");
+ return -1;
}
port = atoi(portstr);
@@ -88,27 +107,29 @@ static int suspend_evtchn_init(int xc, i
si.xce = xc_evtchn_open();
if (si.xce < 0) {
- errx(1, "failed to open event channel handle");
- goto cleanup;
+ warnx("failed to open event channel handle");
+ goto cleanup;
}
si.suspend_evtchn = xc_evtchn_bind_interdomain(si.xce, domid, port);
if (si.suspend_evtchn < 0) {
- errx(1, "failed to bind suspend event channel: %d",
- si.suspend_evtchn);
- goto cleanup;
+ warnx("failed to bind suspend event channel: %d", si.suspend_evtchn);
+ goto cleanup;
}
rc = xc_domain_subscribe_for_suspend(xc, domid, port);
if (rc < 0) {
- errx(1, "failed to subscribe to domain: %d", rc);
- goto cleanup;
- }
+ warnx("failed to subscribe to domain: %d", rc);
+ goto cleanup;
+ }
+
+ /* event channel is pending immediately after binding */
+ await_suspend();
return 0;
cleanup:
- suspend_evtchn_release(xc, domid);
+ suspend_evtchn_release();
return -1;
}
@@ -116,29 +137,20 @@ static int suspend_evtchn_init(int xc, i
/**
* Issue a suspend request to a dedicated event channel in the guest, and
* receive the acknowledgement from the subscribe event channel. */
-static int evtchn_suspend(int domid)
-{
- int xcefd;
+static int evtchn_suspend(void)
+{
int rc;
rc = xc_evtchn_notify(si.xce, si.suspend_evtchn);
if (rc < 0) {
- errx(1, "failed to notify suspend request channel: %d", rc);
- return 0;
- }
-
- xcefd = xc_evtchn_fd(si.xce);
- do {
- rc = xc_evtchn_pending(si.xce);
- if (rc < 0) {
- errx(1, "error polling suspend notification channel: %d", rc);
- return 0;
- }
- } while (rc != si.suspend_evtchn);
-
- /* harmless for one-off suspend */
- if (xc_evtchn_unmask(si.xce, si.suspend_evtchn) < 0)
- errx(1, "failed to unmask suspend notification channel: %d", rc);
+ warnx("failed to notify suspend request channel: %d", rc);
+ return 0;
+ }
+
+ if (await_suspend() < 0) {
+ warnx("suspend failed");
+ return 0;
+ }
/* notify xend that it can do device migration */
printf("suspended\n");
@@ -147,12 +159,12 @@ static int evtchn_suspend(int domid)
return 1;
}
-static int suspend(int domid)
+static int suspend(void)
{
if (si.suspend_evtchn >= 0)
- return evtchn_suspend(domid);
-
- return compat_suspend(domid);
+ return evtchn_suspend();
+
+ return compat_suspend();
}
/* For HVM guests, there are two sources of dirty pages: the Xen shadow
@@ -195,11 +207,9 @@ static void qemu_flip_buffer(int domid,
/* Tell qemu that we want it to start writing log-dirty bits to the
* other buffer */
- if (!xs_write(xs, XBT_NULL, qemu_next_active_path, &digit, 1)) {
+ if (!xs_write(xs, XBT_NULL, qemu_next_active_path, &digit, 1))
errx(1, "can't write next-active to store path (%s)\n",
- qemu_next_active_path);
- exit(1);
- }
+ qemu_next_active_path);
/* Wait a while for qemu to signal that it has switched to the new
* active buffer */
@@ -208,10 +218,8 @@ static void qemu_flip_buffer(int domid,
tv.tv_usec = 0;
FD_ZERO(&fdset);
FD_SET(xs_fileno(xs), &fdset);
- if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) {
+ if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1)
errx(1, "timed out waiting for qemu to switch buffers\n");
- exit(1);
- }
watch = xs_read_watch(xs, &len);
free(watch);
@@ -221,7 +229,7 @@ static void qemu_flip_buffer(int domid,
goto read_again;
}
-static void * init_qemu_maps(int domid, unsigned int bitmap_size)
+static void *init_qemu_maps(int domid, unsigned int bitmap_size)
{
key_t key;
char key_ascii[17] = {0,};
@@ -293,7 +301,7 @@ main(int argc, char **argv)
int ret;
if (argc != 6)
- errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
+ errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
xc_fd = xc_interface_open();
if (xc_fd < 0)
@@ -305,13 +313,14 @@ main(int argc, char **argv)
max_f = atoi(argv[4]);
flags = atoi(argv[5]);
- suspend_evtchn_init(xc_fd, domid);
+ if (suspend_evtchn_init(xc_fd, domid) < 0)
+ warnx("suspend event channel initialization failed, using slow path");
ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags,
&suspend, !!(flags & XCFLAGS_HVM),
&init_qemu_maps, &qemu_flip_buffer);
- suspend_evtchn_release(xc_fd, domid);
+ suspend_evtchn_release();
xc_interface_close(xc_fd);
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/xenstore/xs.c
--- a/tools/xenstore/xs.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/xenstore/xs.c Fri Sep 12 14:47:40 2008 +0900
@@ -795,8 +795,11 @@ char *xs_get_domain_path(struct xs_handl
bool xs_is_domain_introduced(struct xs_handle *h, unsigned int domid)
{
- return strcmp("F",
- single_with_domid(h, XS_IS_DOMAIN_INTRODUCED, domid));
+ char *domain = single_with_domid(h, XS_IS_DOMAIN_INTRODUCED, domid);
+ int rc = strcmp("F", domain);
+
+ free(domain);
+ return rc;
}
/* Only useful for DEBUG versions */
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/xentrace/formats
--- a/tools/xentrace/formats Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/xentrace/formats Fri Sep 12 14:47:40 2008 +0900
@@ -4,56 +4,69 @@ 0x0001f002 CPU%(cpu)d %(tsc)d (+%(relt
0x0001f002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) wrap_buffer 0x%(1)08x
0x0001f003 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) cpu_change 0x%(1)08x
-0x0002f001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) sched_add_domain [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) sched_rem_domain [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f003 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) domain_sleep [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f004 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) domain_wake [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f005 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) do_yield [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f006 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) do_block [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f007 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) domain_shutdown [
domid = 0x%(1)08x, edomid = 0x%(2)08x, reason = 0x%(3)08x ]
-0x0002f008 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) sched_ctl
-0x0002f009 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) sched_adjdom [ domid =
0x%(1)08x ]
-0x0002f00a CPU%(cpu)d %(tsc)d (+%(reltsc)8d) __enter_scheduler [
prev<domid:edomid> = 0x%(1)08x : 0x%(2)08x, next<domid:edomid> = 0x%(3)08x :
0x%(4)08x ]
-0x0002f00B CPU%(cpu)d %(tsc)d (+%(reltsc)8d) s_timer_fn
-0x0002f00c CPU%(cpu)d %(tsc)d (+%(reltsc)8d) t_timer_fn
-0x0002f00d CPU%(cpu)d %(tsc)d (+%(reltsc)8d) dom_timer_fn
-0x0002f00e CPU%(cpu)d %(tsc)d (+%(reltsc)8d) switch_infprev [ old_domid
= 0x%(1)08x, runtime = %(2)d ]
-0x0002f00f CPU%(cpu)d %(tsc)d (+%(reltsc)8d) switch_infnext [ new_domid
= 0x%(1)08x, time = %(2)d, r_time = %(3)d ]
+0x00021011 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) running_to_runnable [ dom:vcpu
= 0x%(1)08x ]
+0x00021021 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) running_to_blocked [ dom:vcpu
= 0x%(1)08x ]
+0x00021031 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) running_to_offline [ dom:vcpu
= 0x%(1)08x ]
+0x00021101 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) runnable_to_running [ dom:vcpu
= 0x%(1)08x ]
+0x00021121 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) runnable_to_blocked [ dom:vcpu
= 0x%(1)08x ]
+0x00021131 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) runnable_to_offline [ dom:vcpu
= 0x%(1)08x ]
+0x00021201 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) blocked_to_running [ dom:vcpu
= 0x%(1)08x ]
+0x00021211 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) blocked_to_runnable [ dom:vcpu
= 0x%(1)08x ]
+0x00021231 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) blocked_to_offline [ dom:vcpu
= 0x%(1)08x ]
+0x00021301 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) offline_to_running [ dom:vcpu
= 0x%(1)08x ]
+0x00021311 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) offline_to_runnable [ dom:vcpu
= 0x%(1)08x ]
+0x00021321 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) offline_to_blocked [ dom:vcpu
= 0x%(1)08x ]
-0x00081001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) VMENTRY [ dom:vcpu =
0x%(1)08x ]
-0x00081002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) VMEXIT [ dom:vcpu =
0x%(1)08x, exitcode = 0x%(2)08x, rIP = 0x%(3)08x ]
-0x00081102 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) VMEXIT [ dom:vcpu =
0x%(1)08x, exitcode = 0x%(2)08x, rIP = 0x%(3)016x ]
-0x00082001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_XEN [ dom:vcpu =
0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)08x ]
-0x00082101 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_XEN [ dom:vcpu =
0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)016x ]
-0x00082002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_INJECT [ dom:vcpu =
0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)08x ]
-0x00082102 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_INJECT [ dom:vcpu =
0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)016x ]
-0x00082003 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INJ_EXC [ dom:vcpu =
0x%(1)08x, vector = 0x%(2)02x, errorcode = 0x%(3)04x ]
-0x00082004 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INJ_VIRQ [ dom:vcpu =
0x%(1)08x, vector = 0x%(2)02x, fake = %(3)d ]
-0x00082005 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) REINJ_VIRQ [ dom:vcpu =
0x%(1)08x, vector = 0x%(2)02x ]
-0x00082006 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) IO_READ [ dom:vcpu =
0x%(1)08x, port = 0x%(2)04x, size = %(3)d ]
-0x00082007 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) IO_WRITE [ dom:vcpu =
0x%(1)08x, port = 0x%(2)04x, size = %(3)d ]
-0x00082008 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CR_READ [ dom:vcpu =
0x%(1)08x, CR# = %(2)d, value = 0x%(3)08x ]
-0x00082108 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CR_READ [ dom:vcpu =
0x%(1)08x, CR# = %(2)d, value = 0x%(3)016x ]
-0x00082009 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CR_WRITE [ dom:vcpu =
0x%(1)08x, CR# = %(2)d, value = 0x%(3)08x ]
-0x00082109 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CR_WRITE [ dom:vcpu =
0x%(1)08x, CR# = %(2)d, value = 0x%(3)016x ]
-0x0008200A CPU%(cpu)d %(tsc)d (+%(reltsc)8d) DR_READ [ dom:vcpu =
0x%(1)08x ]
-0x0008200B CPU%(cpu)d %(tsc)d (+%(reltsc)8d) DR_WRITE [ dom:vcpu =
0x%(1)08x ]
-0x0008200C CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MSR_READ [ dom:vcpu =
0x%(1)08x, MSR# = 0x%(2)08x, value = 0x%(3)016x ]
-0x0008200D CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MSR_WRITE [ dom:vcpu =
0x%(1)08x, MSR# = 0x%(2)08x, value = 0x%(3)016x ]
-0x0008200E CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CPUID [ dom:vcpu =
0x%(1)08x, func = 0x%(2)08x, eax = 0x%(3)08x, ebx = 0x%(4)08x, ecx=0x%(5)08x,
edx = 0x%(6)08x ]
-0x0008200F CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INTR [ dom:vcpu =
0x%(1)08x, vector = 0x%(2)02x ]
-0x00082010 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) NMI [ dom:vcpu =
0x%(1)08x ]
-0x00082011 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) SMI [ dom:vcpu =
0x%(1)08x ]
-0x00082012 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) VMMCALL [ dom:vcpu =
0x%(1)08x, func = 0x%(2)08x ]
-0x00082013 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) HLT [ dom:vcpu =
0x%(1)08x, intpending = %(2)d ]
-0x00082014 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INVLPG [ dom:vcpu =
0x%(1)08x, is invlpga? = %(2)d, virt = 0x%(3)08x ]
-0x00082114 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INVLPG [ dom:vcpu =
0x%(1)08x, is invlpga? = %(2)d, virt = 0x%(3)016x ]
-0x00082015 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MCE [ dom:vcpu =
0x%(1)08x ]
-0x00082016 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) IO_ASSIST [ dom:vcpu =
0x%(1)08x, data = 0x%(2)04x ]
-0x00082017 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MMIO_ASSIST [ dom:vcpu =
0x%(1)08x, data = 0x%(2)04x ]
-0x00082018 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CLTS [ dom:vcpu =
0x%(1)08x ]
-0x00082019 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) LMSW [ dom:vcpu =
0x%(1)08x, value = 0x%(2)08x ]
-0x00082119 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) LMSW [ dom:vcpu =
0x%(1)08x, value = 0x%(2)016x ]
+0x00028001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) sched_add_domain [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) sched_rem_domain [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028003 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) domain_sleep [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028004 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) domain_wake [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028005 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) do_yield [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028006 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) do_block [ domid =
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028007 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) domain_shutdown [
domid = 0x%(1)08x, edomid = 0x%(2)08x, reason = 0x%(3)08x ]
+0x00028008 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) sched_ctl
+0x00028009 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) sched_adjdom [ domid =
0x%(1)08x ]
+0x0002800a CPU%(cpu)d %(tsc)d (+%(reltsc)8d) __enter_scheduler [
prev<domid:edomid> = 0x%(1)08x : 0x%(2)08x, next<domid:edomid> = 0x%(3)08x :
0x%(4)08x ]
+0x0002800b CPU%(cpu)d %(tsc)d (+%(reltsc)8d) s_timer_fn
+0x0002800c CPU%(cpu)d %(tsc)d (+%(reltsc)8d) t_timer_fn
+0x0002800d CPU%(cpu)d %(tsc)d (+%(reltsc)8d) dom_timer_fn
+0x0002800e CPU%(cpu)d %(tsc)d (+%(reltsc)8d) switch_infprev [ old_domid
= 0x%(1)08x, runtime = %(2)d ]
+0x0002800f CPU%(cpu)d %(tsc)d (+%(reltsc)8d) switch_infnext [ new_domid
= 0x%(1)08x, time = %(2)d, r_time = %(3)d ]
+
+0x00081001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) VMENTRY
+0x00081002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) VMEXIT [ exitcode =
0x%(1)08x, rIP = 0x%(2)08x ]
+0x00081102 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) VMEXIT [ exitcode =
0x%(1)08x, rIP = 0x%(2)016x ]
+0x00082001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_XEN [ errorcode =
0x%(2)02x, virt = 0x%(1)08x ]
+0x00082101 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_XEN [ errorcode =
0x%(2)02x, virt = 0x%(1)016x ]
+0x00082002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_INJECT [ errorcode =
0x%(1)02x, virt = 0x%(2)08x ]
+0x00082102 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_INJECT [ errorcode =
0x%(1)02x, virt = 0x%(2)016x ]
+0x00082003 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INJ_EXC [ vector =
0x%(1)02x, errorcode = 0x%(2)04x ]
+0x00082004 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INJ_VIRQ [ vector =
0x%(1)02x, fake = %(2)d ]
+0x00082005 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) REINJ_VIRQ [ vector =
0x%(1)02x ]
+0x00082006 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) IO_READ [ port =
0x%(1)04x, size = %(2)d ]
+0x00082007 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) IO_WRITE [ port =
0x%(1)04x, size = %(2)d ]
+0x00082008 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CR_READ [ CR# = %(1)d,
value = 0x%(2)08x ]
+0x00082108 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CR_READ [ CR# = %(1)d,
value = 0x%(2)016x ]
+0x00082009 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CR_WRITE [ CR# = %(1)d,
value = 0x%(2)08x ]
+0x00082109 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CR_WRITE [ CR# = %(1)d,
value = 0x%(2)016x ]
+0x0008200A CPU%(cpu)d %(tsc)d (+%(reltsc)8d) DR_READ
+0x0008200B CPU%(cpu)d %(tsc)d (+%(reltsc)8d) DR_WRITE
+0x0008200C CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MSR_READ [ MSR# =
0x%(1)08x, value = 0x%(2)016x ]
+0x0008200D CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MSR_WRITE [ MSR# =
0x%(1)08x, value = 0x%(2)016x ]
+0x0008200E CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CPUID [ func =
0x%(1)08x, eax = 0x%(2)08x, ebx = 0x%(3)08x, ecx=0x%(4)08x, edx = 0x%(5)08x ]
+0x0008200F CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INTR [ vector =
0x%(1)02x ]
+0x00082010 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) NMI
+0x00082011 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) SMI
+0x00082012 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) VMMCALL [ func = 0x%(1)08x
]
+0x00082013 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) HLT [ intpending =
%(1)d ]
+0x00082014 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INVLPG [ is invlpga? =
%(1)d, virt = 0x%(2)08x ]
+0x00082114 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INVLPG [ is invlpga? =
%(1)d, virt = 0x%(2)016x ]
+0x00082015 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MCE
+0x00082016 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) IO_ASSIST [ data = 0x%(1)04x
]
+0x00082017 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MMIO_ASSIST [ data = 0x%(1)04x
]
+0x00082018 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CLTS
+0x00082019 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) LMSW [ value =
0x%(1)08x ]
+0x00082119 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) LMSW [ value =
0x%(1)016x ]
0x0010f001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) page_grant_map [ domid =
%(1)d ]
0x0010f002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) page_grant_unmap [ domid =
%(1)d ]
@@ -65,3 +78,41 @@ 0x0020f103 CPU%(cpu)d %(tsc)d (+%(relt
0x0020f103 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) trap [ rip = 0x%(1)016x,
trapnr:error = 0x%(2)08x ]
0x0020f004 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) page_fault [ eip = 0x%(1)08x,
addr = 0x%(2)08x, error = 0x%(3)08x ]
0x0020f104 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) page_fault [ rip = 0x%(1)16x,
addr = 0x%(3)16x, error = 0x%(5)08x ]
+
+0x0020f006 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) emulate_privop [ eip =
0x%(1)08x ]
+0x0020f106 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) emulate_privop [ rip =
0x%(1)16x ]
+0x0020f007 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) emulate_4G [ eip =
0x%(1)08x ]
+0x0020f107 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) emulate_4G [ rip =
0x%(1)16x ]
+0x0020f00c CPU%(cpu)d %(tsc)d (+%(reltsc)8d) ptwr_emulation_pae [ addr =
0x%(2)08x, eip = 0x%(1)08x, npte = 0x%(1)16x ]
+0x0020f10c CPU%(cpu)d %(tsc)d (+%(reltsc)8d) ptwr_emulation_pae [ addr =
0x%(2)16x, rip = 0x%(1)16x, npte = 0x%(1)16x ]
+
+0x0040f001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_not_shadow
[ gl1e = 0x%(1)16x, va = 0x%(2)08x, flags = 0x%(3)08x ]
+0x0040f101 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_not_shadow
[ gl1e = 0x%(1)16x, va = 0x%(2)16x, flags = 0x%(3)08x ]
+0x0040f002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_fast_propagate
[ va = 0x%(1)08x ]
+0x0040f102 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_fast_propagate
[ va = 0x%(1)16x ]
+0x0040f003 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_fast_mmio
[ va = 0x%(1)08x ]
+0x0040f103 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_fast_mmio
[ va = 0x%(1)16x ]
+0x0040f004 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_false_fast_path
[ va = 0x%(1)08x ]
+0x0040f104 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_false_fast_path
[ va = 0x%(1)16x ]
+0x0040f005 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_mmio
[ va = 0x%(1)08x ]
+0x0040f105 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_mmio
[ va = 0x%(1)16x ]
+0x0040f006 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_fixup
[ gl1e = 0x%(1)08x, va = 0x%(2)08x, flags = 0x%(3)08x ]
+0x0040f106 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_fixup
[ gl1e = 0x%(1)16x, va = 0x%(2)16x, flags = 0x%(3)08x ]
+0x0040f007 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_domf_dying
[ va = 0x%(1)08x ]
+0x0040f107 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_domf_dying
[ va = 0x%(1)16x ]
+0x0040f008 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate
[ gl1e = 0x%(1)08x, write_val = 0x%(2)08x, va = 0x%(3)08x, flags =
0x%(4)08x, emulation_count = 0x%(5)08x]
+0x0040f108 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate
[ gl1e = 0x%(1)16x, write_val = 0x%(2)16x, va = 0x%(3)16x, flags =
0x%(4)08x, emulation_count = 0x%(5)08x]
+0x0040f009 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_unshadow_user
[ va = 0x%(1)08x, gfn = 0x%(2)08x ]
+0x0040f109 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_unshadow_user
[ va = 0x%(1)16x, gfn = 0x%(2)16x ]
+0x0040f00a CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_unshadow_evtinj
[ va = 0x%(1)08x, gfn = 0x%(2)08x ]
+0x0040f10a CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_unshadow_evtinj
[ va = 0x%(1)16x, gfn = 0x%(2)16x ]
+0x0040f00b CPU%(cpu)d %(tsc)d (+%(reltsc)8d)
shadow_emulate_unshadow_unhandled [ va = 0x%(1)08x, gfn = 0x%(2)08x ]
+0x0040f10b CPU%(cpu)d %(tsc)d (+%(reltsc)8d)
shadow_emulate_unshadow_unhandled [ va = 0x%(1)16x, gfn = 0x%(2)16x ]
+0x0040f00c CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_wrmap_bf
[ gfn = 0x%(1)08x ]
+0x0040f10c CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_wrmap_bf
[ gfn = 0x%(1)16x ]
+0x0040f00d CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_prealloc_unpin
[ gfn = 0x%(1)08x ]
+0x0040f10d CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_prealloc_unpin
[ gfn = 0x%(1)16x ]
+0x0040f00e CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_resync_full
[ gfn = 0x%(1)08x ]
+0x0040f10e CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_resync_full
[ gfn = 0x%(1)16x ]
+0x0040f00f CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_resync_only
[ gfn = 0x%(1)08x ]
+0x0040f10f CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_resync_only
[ gfn = 0x%(1)16x ]
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/xentrace/xentrace.c Fri Sep 12 14:47:40 2008 +0900
@@ -56,6 +56,7 @@ typedef struct settings_st {
unsigned long tbuf_size;
unsigned long disk_rsvd;
unsigned long timeout;
+ unsigned long memory_buffer;
uint8_t discard:1,
disable_tracing:1;
} settings_t;
@@ -67,10 +68,243 @@ static int xc_handle = -1;
static int xc_handle = -1;
static int event_fd = -1;
static int virq_port = -1;
+static int outfd = 1;
static void close_handler(int signal)
{
interrupted = 1;
+}
+
+static struct {
+ char * buf;
+ unsigned long prod, cons, size;
+ unsigned long pending_size, pending_prod;
+} membuf = { 0 };
+
+#define MEMBUF_INDEX_RESET_THRESHOLD (1<<29)
+
+/* FIXME -- make a power of 2 so we can mask instead. */
+#define MEMBUF_POINTER(_i) (membuf.buf + ((_i) % membuf.size))
+#define MEMBUF_CONS_INCREMENT(_n) \
+ do { \
+ membuf.cons += (_n); \
+ } while(0)
+#define MEMBUF_PROD_SET(_x) \
+ do { \
+ if ( (_x) < membuf.prod ) { \
+ fprintf(stderr, "%s: INTERNAL_ERROR: prod %lu, trying to set to
%lu!\n", \
+ __func__, membuf.prod, (unsigned long)(_x)); \
+ exit(1); \
+ } \
+ membuf.prod = (_x); \
+ if ( (_x) > MEMBUF_INDEX_RESET_THRESHOLD ) \
+ { \
+ membuf.prod %= membuf.size; \
+ membuf.cons %= membuf.size; \
+ if( membuf.prod < membuf.cons ) \
+ membuf.prod += membuf.size; \
+ } \
+ } while(0)
+
+struct cpu_change_record {
+ uint32_t header;
+ struct {
+ int cpu;
+ unsigned window_size;
+ } data;
+};
+
+#define CPU_CHANGE_HEADER \
+ (TRC_TRACE_CPU_CHANGE \
+ | (((sizeof(struct cpu_change_record)/sizeof(uint32_t)) - 1) \
+ << TRACE_EXTRA_SHIFT) )
+
+void membuf_alloc(unsigned long size)
+{
+ membuf.buf = malloc(size);
+
+ if(!membuf.buf)
+ {
+ fprintf(stderr, "%s: Couldn't malloc %lu bytes!\n",
+ __func__, size);
+ exit(1);
+ }
+
+ membuf.prod = membuf.cons = 0;
+ membuf.size = size;
+}
+
+/*
+ * Reserve a new window in the buffer. Move the 'consumer' forward size
+ * bytes, re-adjusting the cpu window sizes as necessary, and insert a
+ * cpu_change record.
+ */
+void membuf_reserve_window(unsigned cpu, unsigned long window_size)
+{
+ struct cpu_change_record *rec;
+ long need_to_consume, free, freed;
+
+ if ( membuf.pending_size > 0 )
+ {
+ fprintf(stderr, "%s: INTERNAL_ERROR: pending_size %lu\n",
+ __func__, membuf.pending_size);
+ exit(1);
+ }
+
+ need_to_consume = window_size + sizeof(*rec);
+
+ if ( window_size > membuf.size )
+ {
+ fprintf(stderr, "%s: reserve size %lu larger than buffer size %lu!\n",
+ __func__, window_size, membuf.size);
+ exit(1);
+ }
+
+ /* Subtract free space already in buffer. */
+ free = membuf.size - (membuf.prod - membuf.cons);
+ if( need_to_consume < free)
+ goto start_window;
+
+ need_to_consume -= free;
+
+ /*
+ * "Free" up full windows until we have enough for this window.
+ * It's a bit wasteful to throw away partial buffers, but the only
+ * other option is to scan throught he buffer headers. Since the
+ * common case is that it's going to be thrown away next anyway, I
+ * think minimizing the overall impact is more important.
+ */
+ do {
+ rec = (struct cpu_change_record *)MEMBUF_POINTER(membuf.cons);
+ if( rec->header != CPU_CHANGE_HEADER )
+ {
+ fprintf(stderr, "%s: INTERNAL ERROR: no cpu_change record at
consumer!\n",
+ __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ freed = sizeof(*rec) + rec->data.window_size;
+
+ if ( need_to_consume > 0 )
+ {
+ MEMBUF_CONS_INCREMENT(freed);
+ need_to_consume -= freed;
+ }
+ } while( need_to_consume > 0 );
+
+start_window:
+ /*
+ * Start writing "pending" data. Update prod once all this data is
+ * written.
+ */
+ membuf.pending_prod = membuf.prod;
+ membuf.pending_size = window_size;
+
+ rec = (struct cpu_change_record *)MEMBUF_POINTER(membuf.pending_prod);
+
+ rec->header = CPU_CHANGE_HEADER;
+ rec->data.cpu = cpu;
+ rec->data.window_size = window_size;
+
+ membuf.pending_prod += sizeof(*rec);
+}
+
+void membuf_write(void *start, unsigned long size) {
+ char * p;
+ unsigned long wsize;
+
+ if( (membuf.size - (membuf.prod - membuf.cons)) < size )
+ {
+ fprintf(stderr, "%s: INTERNAL ERROR: need %lu bytes, only have %lu!\n",
+ __func__, size, membuf.prod - membuf.cons);
+ exit(1);
+ }
+
+ if( size > membuf.pending_size )
+ {
+ fprintf(stderr, "%s: INTERNAL ERROR: size %lu, pending %lu!\n",
+ __func__, size, membuf.pending_size);
+ exit(1);
+ }
+
+ wsize = size;
+ p = MEMBUF_POINTER(membuf.pending_prod);
+
+ /* If the buffer overlaps the "wrap", do an extra write */
+ if ( p + size > membuf.buf + membuf.size )
+ {
+ int usize = ( membuf.buf + membuf.size ) - p;
+
+ memcpy(p, start, usize);
+
+ start += usize;
+ wsize -= usize;
+ p = membuf.buf;
+ }
+
+ memcpy(p, start, wsize);
+
+ membuf.pending_prod += size;
+ membuf.pending_size -= size;
+
+ if ( membuf.pending_size == 0 )
+ {
+ MEMBUF_PROD_SET(membuf.pending_prod);
+ }
+}
+
+void membuf_dump(void) {
+ /* Dump circular memory buffer */
+ int cons, prod, wsize, written;
+ char * wstart;
+
+ fprintf(stderr, "Dumping memory buffer.\n");
+
+ cons = membuf.cons % membuf.size;
+ prod = membuf.prod % membuf.size;
+
+ if(prod > cons)
+ {
+ /* Write in one go */
+ wstart = membuf.buf + cons;
+ wsize = prod - cons;
+
+ written = write(outfd, wstart, wsize);
+ if ( written != wsize )
+ goto fail;
+ }
+ else
+ {
+ /* Write in two pieces: cons->end, beginning->prod. */
+ wstart = membuf.buf + cons;
+ wsize = membuf.size - cons;
+
+ written = write(outfd, wstart, wsize);
+ if ( written != wsize )
+ {
+ fprintf(stderr, "Write failed! (size %d, returned %d)\n",
+ wsize, written);
+ goto fail;
+ }
+
+ wstart = membuf.buf;
+ wsize = prod;
+
+ written = write(outfd, wstart, wsize);
+ if ( written != wsize )
+ {
+ fprintf(stderr, "Write failed! (size %d, returned %d)\n",
+ wsize, written);
+ goto fail;
+ }
+ }
+
+ membuf.cons = membuf.prod = 0;
+
+ return;
+fail:
+ exit(1);
+ return;
}
/**
@@ -85,20 +319,20 @@ static void close_handler(int signal)
* of the buffer write.
*/
static void write_buffer(unsigned int cpu, unsigned char *start, int size,
- int total_size, int outfd)
+ int total_size)
{
struct statvfs stat;
size_t written = 0;
- if ( opts.disk_rsvd != 0 )
+ if ( opts.memory_buffer == 0 && opts.disk_rsvd != 0 )
{
unsigned long long freespace;
/* Check that filesystem has enough space. */
if ( fstatvfs (outfd, &stat) )
{
- fprintf(stderr, "Statfs failed!\n");
- goto fail;
+ fprintf(stderr, "Statfs failed!\n");
+ goto fail;
}
freespace = stat.f_frsize * (unsigned long long)stat.f_bfree;
@@ -112,8 +346,8 @@ static void write_buffer(unsigned int cp
if ( freespace <= opts.disk_rsvd )
{
- fprintf(stderr, "Disk space limit reached (free space: %lluMB,
limit: %luMB).\n", freespace, opts.disk_rsvd);
- exit (EXIT_FAILURE);
+ fprintf(stderr, "Disk space limit reached (free space: %lluMB,
limit: %luMB).\n", freespace, opts.disk_rsvd);
+ exit (EXIT_FAILURE);
}
}
@@ -122,40 +356,46 @@ static void write_buffer(unsigned int cp
* first write. */
if ( total_size != 0 )
{
- struct {
- uint32_t header;
- struct {
- unsigned cpu;
- unsigned byte_count;
- } extra;
- } rec;
-
- rec.header = TRC_TRACE_CPU_CHANGE
- | ((sizeof(rec.extra)/sizeof(uint32_t)) << TRACE_EXTRA_SHIFT);
- rec.extra.cpu = cpu;
- rec.extra.byte_count = total_size;
-
- written = write(outfd, &rec, sizeof(rec));
-
- if ( written != sizeof(rec) )
- {
- fprintf(stderr, "Cannot write cpu change (write returned %zd)\n",
- written);
+ if ( opts.memory_buffer )
+ {
+ membuf_reserve_window(cpu, total_size);
+ }
+ else
+ {
+ struct cpu_change_record rec;
+
+ rec.header = CPU_CHANGE_HEADER;
+ rec.data.cpu = cpu;
+ rec.data.window_size = total_size;
+
+ written = write(outfd, &rec, sizeof(rec));
+ if ( written != sizeof(rec) )
+ {
+ fprintf(stderr, "Cannot write cpu change (write returned
%zd)\n",
+ written);
+ goto fail;
+ }
+ }
+ }
+
+ if ( opts.memory_buffer )
+ {
+ membuf_write(start, size);
+ }
+ else
+ {
+ written = write(outfd, start, size);
+ if ( written != size )
+ {
+ fprintf(stderr, "Write failed! (size %d, returned %zd)\n",
+ size, written);
goto fail;
}
}
- written = write(outfd, start, size);
- if ( written != size )
- {
- fprintf(stderr, "Write failed! (size %d, returned %zd)\n",
- size, written);
- goto fail;
- }
-
return;
- fail:
+fail:
PERROR("Failed to write trace data");
exit(EXIT_FAILURE);
}
@@ -394,7 +634,7 @@ static void wait_for_event_or_timeout(un
* monitor_tbufs - monitor the contents of tbufs and output to a file
* @logfile: the FILE * representing the file to log to
*/
-static int monitor_tbufs(int outfd)
+static int monitor_tbufs(void)
{
int i;
@@ -429,9 +669,9 @@ static int monitor_tbufs(int outfd)
meta[i]->cons = meta[i]->prod;
/* now, scan buffers for events */
- while ( !interrupted )
- {
- for ( i = 0; (i < num) && !interrupted; i++ )
+ while ( 1 )
+ {
+ for ( i = 0; i < num; i++ )
{
unsigned long start_offset, end_offset, window_size, cons, prod;
@@ -463,8 +703,7 @@ static int monitor_tbufs(int outfd)
/* If window does not wrap, write in one big chunk */
write_buffer(i, data[i]+start_offset,
window_size,
- window_size,
- outfd);
+ window_size);
}
else
{
@@ -474,23 +713,28 @@ static int monitor_tbufs(int outfd)
*/
write_buffer(i, data[i] + start_offset,
data_size - start_offset,
- window_size,
- outfd);
+ window_size);
write_buffer(i, data[i],
end_offset,
- 0,
- outfd);
+ 0);
}
xen_mb(); /* read buffer, then update cons. */
meta[i]->cons = prod;
- }
+
+ }
+
+ if ( interrupted )
+ break;
wait_for_event_or_timeout(opts.poll_sleep);
}
- if(opts.disable_tracing)
+ if ( opts.disable_tracing )
disable_tbufs();
+
+ if ( opts.memory_buffer )
+ membuf_dump();
/* cleanup */
free(meta);
@@ -538,6 +782,8 @@ static void usage(void)
" -T --time-interval=s Run xentrace for s seconds and quit.\n" \
" -?, --help Show this message\n" \
" -V, --version Print program version\n" \
+" -M, --memory-buffer=b Copy trace records to a circular memory buffer.\n" \
+" Dump to file on exit.\n" \
"\n" \
"This tool is used to capture trace buffer data from Xen. The\n" \
"data is output in a binary format, in the following order:\n" \
@@ -551,6 +797,53 @@ static void usage(void)
printf("\nReport bugs to %s\n", program_bug_address);
exit(EXIT_FAILURE);
+}
+
+/* convert the argument string pointed to by arg to a long int representation,
+ * including suffixes such as 'M' and 'k'. */
+#define MB (1024*1024)
+#define KB (1024)
+long sargtol(const char *restrict arg, int base)
+{
+ char *endp;
+ long val;
+
+ errno = 0;
+ val = strtol(arg, &endp, base);
+
+ if ( errno != 0 )
+ {
+ fprintf(stderr, "Invalid option argument: %s\n", arg);
+ fprintf(stderr, "Error: %s\n\n", strerror(errno));
+ usage();
+ }
+ else if (endp == arg)
+ {
+ goto invalid;
+ }
+
+ switch(*endp)
+ {
+ case '\0':
+ break;
+ case 'M':
+ val *= MB;
+ break;
+ case 'K':
+ case 'k':
+ val *= KB;
+ break;
+ default:
+ fprintf(stderr, "Unknown suffix %c\n", *endp);
+ exit(1);
+ }
+
+
+ return val;
+invalid:
+ return 0;
+ fprintf(stderr, "Invalid option argument: %s\n\n", arg);
+ usage();
}
/* convert the argument string pointed to by arg to a long int representation
*/
@@ -606,6 +899,7 @@ static void parse_args(int argc, char **
{ "trace-buf-size", required_argument, 0, 'S' },
{ "reserve-disk-space", required_argument, 0, 'r' },
{ "time-interval", required_argument, 0, 'T' },
+ { "memory-buffer", required_argument, 0, 'M' },
{ "discard-buffers", no_argument, 0, 'D' },
{ "dont-disable-tracing", no_argument, 0, 'x' },
{ "help", no_argument, 0, '?' },
@@ -613,7 +907,7 @@ static void parse_args(int argc, char **
{ 0, 0, 0, 0 }
};
- while ( (option = getopt_long(argc, argv, "c:e:s:S:t:?V",
+ while ( (option = getopt_long(argc, argv, "t:s:c:e:S:r:T:M:Dx?V",
long_options, NULL)) != -1)
{
switch ( option )
@@ -653,6 +947,10 @@ static void parse_args(int argc, char **
case 'T':
opts.timeout = argtol(optarg, 0);
+ break;
+
+ case 'M':
+ opts.memory_buffer = sargtol(optarg, 0);
break;
default:
@@ -674,7 +972,7 @@ static void parse_args(int argc, char **
int main(int argc, char **argv)
{
- int outfd = 1, ret;
+ int ret;
struct sigaction act;
opts.outfile = 0;
@@ -719,6 +1017,9 @@ int main(int argc, char **argv)
fprintf(stderr, "Cannot output to a TTY, specify a log file.\n");
exit(EXIT_FAILURE);
}
+
+ if ( opts.memory_buffer > 0 )
+ membuf_alloc(opts.memory_buffer);
/* ensure that if we get a signal, we'll do cleanup, then exit */
act.sa_handler = close_handler;
@@ -729,7 +1030,7 @@ int main(int argc, char **argv)
sigaction(SIGINT, &act, NULL);
sigaction(SIGALRM, &act, NULL);
- ret = monitor_tbufs(outfd);
+ ret = monitor_tbufs();
return ret;
}
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/acpi/Makefile
--- a/xen/arch/x86/acpi/Makefile Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/acpi/Makefile Fri Sep 12 14:47:40 2008 +0900
@@ -1,5 +1,5 @@ subdir-y += cpufreq
subdir-y += cpufreq
obj-y += boot.o
-obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o
+obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o cpuidle_menu.o
obj-y += pmstat.o
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/acpi/cpu_idle.c Fri Sep 12 14:47:40 2008 +0900
@@ -39,6 +39,7 @@
#include <xen/smp.h>
#include <xen/guest_access.h>
#include <xen/keyhandler.h>
+#include <xen/cpuidle.h>
#include <asm/cache.h>
#include <asm/io.h>
#include <asm/hpet.h>
@@ -49,12 +50,9 @@
#define DEBUG_PM_CX
#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
+#define PM_TIMER_TICKS_TO_US(t) ((t * 1000) / (PM_TIMER_FREQUENCY / 1000))
#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */
#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */
-
-#define ACPI_PROCESSOR_MAX_POWER 8
-#define ACPI_PROCESSOR_MAX_C2_LATENCY 100
-#define ACPI_PROCESSOR_MAX_C3_LATENCY 1000
static void (*lapic_timer_off)(void);
static void (*lapic_timer_on)(void);
@@ -65,66 +63,6 @@ static void (*pm_idle_save) (void) __rea
static void (*pm_idle_save) (void) __read_mostly;
unsigned int max_cstate __read_mostly = 2;
integer_param("max_cstate", max_cstate);
-/*
- * bm_history -- bit-mask with a bit per jiffy of bus-master activity
- * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
- * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
- * 100 HZ: 0x0000000F: 4 jiffies = 40ms
- * reduce history for more aggressive entry into C3
- */
-unsigned int bm_history __read_mostly =
- (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
-integer_param("bm_history", bm_history);
-
-struct acpi_processor_cx;
-
-struct acpi_processor_cx_policy
-{
- u32 count;
- struct acpi_processor_cx *state;
- struct
- {
- u32 time;
- u32 ticks;
- u32 count;
- u32 bm;
- } threshold;
-};
-
-struct acpi_processor_cx
-{
- u8 valid;
- u8 type;
- u32 address;
- u8 space_id;
- u32 latency;
- u32 latency_ticks;
- u32 power;
- u32 usage;
- u64 time;
- struct acpi_processor_cx_policy promotion;
- struct acpi_processor_cx_policy demotion;
-};
-
-struct acpi_processor_flags
-{
- u8 bm_control:1;
- u8 bm_check:1;
- u8 has_cst:1;
- u8 power_setup_done:1;
- u8 bm_rld_set:1;
-};
-
-struct acpi_processor_power
-{
- struct acpi_processor_flags flags;
- struct acpi_processor_cx *state;
- s_time_t bm_check_timestamp;
- u32 default_state;
- u32 bm_activity;
- u32 count;
- struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
-};
static struct acpi_processor_power processor_powers[NR_CPUS];
@@ -133,26 +71,21 @@ static void print_acpi_power(uint32_t cp
uint32_t i;
printk("==cpu%d==\n", cpu);
- printk("active state:\t\tC%d\n", (power->state)?power->state->type:-1);
+ printk("active state:\t\tC%d\n",
+ (power->last_state) ? power->last_state->type : -1);
printk("max_cstate:\t\tC%d\n", max_cstate);
- printk("bus master activity:\t%08x\n", power->bm_activity);
printk("states:\n");
for ( i = 1; i < power->count; i++ )
{
- printk((power->states[i].type == power->state->type) ? " *" : "
");
+ if ( power->last_state &&
+ power->states[i].type == power->last_state->type )
+ printk(" *");
+ else
+ printk(" ");
printk("C%d:\t\t", i);
printk("type[C%d] ", power->states[i].type);
- if ( power->states[i].promotion.state )
- printk("promotion[C%d] ", power->states[i].promotion.state->type);
- else
- printk("promotion[--] ");
- if ( power->states[i].demotion.state )
- printk("demotion[C%d] ", power->states[i].demotion.state->type);
- else
- printk("demotion[--] ");
- printk("latency[%03d]\n ", power->states[i].latency);
- printk("\t\t\t");
+ printk("latency[%03d] ", power->states[i].latency);
printk("usage[%08d] ", power->states[i].usage);
printk("duration[%"PRId64"]\n", power->states[i].time);
}
@@ -180,48 +113,6 @@ static inline u32 ticks_elapsed(u32 t1,
return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
else
return ((0xFFFFFFFF - t1) + t2);
-}
-
-static void acpi_processor_power_activate(struct acpi_processor_power *power,
- struct acpi_processor_cx *new)
-{
- struct acpi_processor_cx *old;
-
- if ( !power || !new )
- return;
-
- old = power->state;
-
- if ( old )
- old->promotion.count = 0;
- new->demotion.count = 0;
-
- /* Cleanup from old state. */
- if ( old )
- {
- switch ( old->type )
- {
- case ACPI_STATE_C3:
- /* Disable bus master reload */
- if ( new->type != ACPI_STATE_C3 && power->flags.bm_check )
- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
- break;
- }
- }
-
- /* Prepare to use new state. */
- switch ( new->type )
- {
- case ACPI_STATE_C3:
- /* Enable bus master reload */
- if ( old->type != ACPI_STATE_C3 && power->flags.bm_check )
- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
- break;
- }
-
- power->state = new;
-
- return;
}
static void acpi_safe_halt(void)
@@ -263,13 +154,50 @@ static void acpi_idle_do_entry(struct ac
}
}
-static atomic_t c3_cpu_count;
+static inline void acpi_idle_update_bm_rld(struct acpi_processor_power *power,
+ struct acpi_processor_cx *target)
+{
+ if ( !power->flags.bm_check )
+ return;
+
+ if ( power->flags.bm_rld_set && target->type != ACPI_STATE_C3 )
+ {
+ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+ power->flags.bm_rld_set = 0;
+ }
+
+ if ( !power->flags.bm_rld_set && target->type == ACPI_STATE_C3 )
+ {
+ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
+ power->flags.bm_rld_set = 1;
+ }
+}
+
+static int acpi_idle_bm_check(void)
+{
+ u32 bm_status = 0;
+
+ acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
+ if ( bm_status )
+ acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
+ /*
+ * TBD: PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
+ * the true state of bus mastering activity; forcing us to
+ * manually check the BMIDEA bit of each IDE channel.
+ */
+ return bm_status;
+}
+
+static struct {
+ spinlock_t lock;
+ unsigned int count;
+} c3_cpu_status = { .lock = SPIN_LOCK_UNLOCKED };
static void acpi_processor_idle(void)
{
struct acpi_processor_power *power = NULL;
struct acpi_processor_cx *cx = NULL;
- struct acpi_processor_cx *next_state = NULL;
+ int next_state;
int sleep_ticks = 0;
u32 t1, t2 = 0;
@@ -287,7 +215,16 @@ static void acpi_processor_idle(void)
return;
}
- cx = power->state;
+ next_state = cpuidle_current_governor->select(power);
+ if ( next_state > 0 )
+ {
+ cx = &power->states[next_state];
+ if ( power->flags.bm_check && acpi_idle_bm_check()
+ && cx->type == ACPI_STATE_C3 )
+ cx = power->safe_state;
+ if ( cx->type > max_cstate )
+ cx = &power->states[max_cstate];
+ }
if ( !cx )
{
if ( pm_idle_save )
@@ -303,69 +240,14 @@ static void acpi_processor_idle(void)
return;
}
- /*
- * Check BM Activity
- * -----------------
- * Check for bus mastering activity (if required), record, and check
- * for demotion.
- */
- if ( power->flags.bm_check )
- {
- u32 bm_status = 0;
- unsigned long diff = (NOW() - power->bm_check_timestamp) >> 23;
-
- if ( diff > 31 )
- diff = 31;
-
- power->bm_activity <<= diff;
-
- acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
- if ( bm_status )
- {
- power->bm_activity |= 0x1;
- acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
- }
- /*
- * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
- * the true state of bus mastering activity; forcing us to
- * manually check the BMIDEA bit of each IDE channel.
- */
- /*else if ( errata.piix4.bmisx )
- {
- if ( (inb_p(errata.piix4.bmisx + 0x02) & 0x01)
- || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01) )
- pr->power.bm_activity |= 0x1;
- }*/
-
- power->bm_check_timestamp = NOW();
-
- /*
- * If bus mastering is or was active this jiffy, demote
- * to avoid a faulty transition. Note that the processor
- * won't enter a low-power state during this call (to this
- * function) but should upon the next.
- *
- * TBD: A better policy might be to fallback to the demotion
- * state (use it for this quantum only) istead of
- * demoting -- and rely on duration as our sole demotion
- * qualification. This may, however, introduce DMA
- * issues (e.g. floppy DMA transfer overrun/underrun).
- */
- if ( (power->bm_activity & 0x1) && cx->demotion.threshold.bm )
- {
- local_irq_enable();
- next_state = cx->demotion.state;
- goto end;
- }
- }
+ power->last_state = cx;
/*
* Sleep:
* ------
* Invoke the current Cx state to put the processor to sleep.
*/
- if ( cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3 )
- smp_mb__after_clear_bit();
+ acpi_idle_update_bm_rld(power, cx);
switch ( cx->type )
{
@@ -399,8 +281,7 @@ static void acpi_processor_idle(void)
/* Re-enable interrupts */
local_irq_enable();
/* Compute time (ticks) that we were actually asleep */
- sleep_ticks =
- ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
+ sleep_ticks = ticks_elapsed(t1, t2);
break;
case ACPI_STATE_C3:
@@ -416,8 +297,8 @@ static void acpi_processor_idle(void)
*/
if ( power->flags.bm_check && power->flags.bm_control )
{
- atomic_inc(&c3_cpu_count);
- if ( atomic_read(&c3_cpu_count) == num_online_cpus() )
+ spin_lock(&c3_cpu_status.lock);
+ if ( ++c3_cpu_status.count == num_online_cpus() )
{
/*
* All CPUs are trying to go to C3
@@ -425,6 +306,7 @@ static void acpi_processor_idle(void)
*/
acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
}
+ spin_unlock(&c3_cpu_status.lock);
}
else if ( !power->flags.bm_check )
{
@@ -455,8 +337,10 @@ static void acpi_processor_idle(void)
if ( power->flags.bm_check && power->flags.bm_control )
{
/* Enable bus master arbitration */
- atomic_dec(&c3_cpu_count);
- acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
+ spin_lock(&c3_cpu_status.lock);
+ if ( c3_cpu_status.count-- == num_online_cpus() )
+ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
+ spin_unlock(&c3_cpu_status.lock);
}
/* Re-enable interrupts */
@@ -465,8 +349,6 @@ static void acpi_processor_idle(void)
lapic_timer_on();
/* Compute time (ticks) that we were actually asleep */
sleep_ticks = ticks_elapsed(t1, t2);
- /* Do not account our idle-switching overhead: */
- sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;
break;
@@ -476,163 +358,14 @@ static void acpi_processor_idle(void)
}
cx->usage++;
- if ( (cx->type != ACPI_STATE_C1) && (sleep_ticks > 0) )
+ if ( sleep_ticks > 0 )
+ {
+ power->last_residency = PM_TIMER_TICKS_TO_US(sleep_ticks);
cx->time += sleep_ticks;
-
- next_state = power->state;
-
- /*
- * Promotion?
- * ----------
- * Track the number of longs (time asleep is greater than threshold)
- * and promote when the count threshold is reached. Note that bus
- * mastering activity may prevent promotions.
- * Do not promote above max_cstate.
- */
- if ( cx->promotion.state &&
- ((cx->promotion.state - power->states) <= max_cstate) )
- {
- if ( sleep_ticks > cx->promotion.threshold.ticks )
- {
- cx->promotion.count++;
- cx->demotion.count = 0;
- if ( cx->promotion.count >= cx->promotion.threshold.count )
- {
- if ( power->flags.bm_check )
- {
- if ( !(power->bm_activity & cx->promotion.threshold.bm) )
- {
- next_state = cx->promotion.state;
- goto end;
- }
- }
- else
- {
- next_state = cx->promotion.state;
- goto end;
- }
- }
- }
- }
-
- /*
- * Demotion?
- * ---------
- * Track the number of shorts (time asleep is less than time threshold)
- * and demote when the usage threshold is reached.
- */
- if ( cx->demotion.state )
- {
- if ( sleep_ticks < cx->demotion.threshold.ticks )
- {
- cx->demotion.count++;
- cx->promotion.count = 0;
- if ( cx->demotion.count >= cx->demotion.threshold.count )
- {
- next_state = cx->demotion.state;
- goto end;
- }
- }
- }
-
-end:
- /*
- * Demote if current state exceeds max_cstate
- */
- if ( (power->state - power->states) > max_cstate )
- {
- if ( cx->demotion.state )
- next_state = cx->demotion.state;
- }
-
- /*
- * New Cx State?
- * -------------
- * If we're going to start using a new Cx state we must clean up
- * from the previous and prepare to use the new.
- */
- if ( next_state != power->state )
- acpi_processor_power_activate(power, next_state);
-}
-
-static int acpi_processor_set_power_policy(struct acpi_processor_power *power)
-{
- unsigned int i;
- unsigned int state_is_set = 0;
- struct acpi_processor_cx *lower = NULL;
- struct acpi_processor_cx *higher = NULL;
- struct acpi_processor_cx *cx;
-
- if ( !power )
- return -EINVAL;
-
- /*
- * This function sets the default Cx state policy (OS idle handler).
- * Our scheme is to promote quickly to C2 but more conservatively
- * to C3. We're favoring C2 for its characteristics of low latency
- * (quick response), good power savings, and ability to allow bus
- * mastering activity. Note that the Cx state policy is completely
- * customizable and can be altered dynamically.
- */
-
- /* startup state */
- for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
- {
- cx = &power->states[i];
- if ( !cx->valid )
- continue;
-
- if ( !state_is_set )
- power->state = cx;
- state_is_set++;
- break;
- }
-
- if ( !state_is_set )
- return -ENODEV;
-
- /* demotion */
- for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
- {
- cx = &power->states[i];
- if ( !cx->valid )
- continue;
-
- if ( lower )
- {
- cx->demotion.state = lower;
- cx->demotion.threshold.ticks = cx->latency_ticks;
- cx->demotion.threshold.count = 1;
- if ( cx->type == ACPI_STATE_C3 )
- cx->demotion.threshold.bm = bm_history;
- }
-
- lower = cx;
- }
-
- /* promotion */
- for ( i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i-- )
- {
- cx = &power->states[i];
- if ( !cx->valid )
- continue;
-
- if ( higher )
- {
- cx->promotion.state = higher;
- cx->promotion.threshold.ticks = cx->latency_ticks;
- if ( cx->type >= ACPI_STATE_C2 )
- cx->promotion.threshold.count = 4;
- else
- cx->promotion.threshold.count = 10;
- if ( higher->type == ACPI_STATE_C3 )
- cx->promotion.threshold.bm = bm_history;
- }
-
- higher = cx;
- }
-
- return 0;
+ }
+
+ if ( cpuidle_current_governor->reflect )
+ cpuidle_current_governor->reflect(power);
}
static int init_cx_pminfo(struct acpi_processor_power *acpi_power)
@@ -821,6 +554,8 @@ static int check_cx(struct acpi_processo
return 0;
}
+static unsigned int latency_factor = 2;
+
static void set_cx(
struct acpi_processor_power *acpi_power,
xen_processor_cx_t *xen_cx)
@@ -842,6 +577,9 @@ static void set_cx(
cx->power = xen_cx->power;
cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+ cx->target_residency = cx->latency * latency_factor;
+ if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 )
+ acpi_power->safe_state = cx;
}
int get_cpu_id(u8 acpi_id)
@@ -936,6 +674,7 @@ long set_cx_pminfo(uint32_t cpu, struct
init_cx_pminfo(acpi_power);
+ acpi_power->cpu = cpu_id;
acpi_power->flags.bm_check = power->flags.bm_check;
acpi_power->flags.bm_control = power->flags.bm_control;
acpi_power->flags.has_cst = power->flags.has_cst;
@@ -950,10 +689,11 @@ long set_cx_pminfo(uint32_t cpu, struct
set_cx(acpi_power, &xen_cx);
}
+ if ( cpuidle_current_governor->enable &&
+ cpuidle_current_governor->enable(acpi_power) )
+ return -EFAULT;
+
/* FIXME: C-state dependency is not supported by far */
-
- /* initialize default policy */
- acpi_processor_set_power_policy(acpi_power);
print_acpi_power(cpu_id, acpi_power);
@@ -978,7 +718,7 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
uint64_t usage;
int i;
- stat->last = (power->state) ? power->state->type : 0;
+ stat->last = (power->last_state) ? power->last_state->type : 0;
stat->nr = processor_powers[cpuid].count;
stat->idle_time = v->runstate.time[RUNSTATE_running];
if ( v->is_running )
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Sep 12 14:47:40 2008 +0900
@@ -48,7 +48,7 @@ struct cpufreq_policy xen_px_policy[NR_C
struct cpufreq_policy xen_px_policy[NR_CPUS];
static cpumask_t *cpufreq_dom_pt;
-static cpumask_t cpufreq_dom_mask;
+static unsigned long *cpufreq_dom_mask;
static unsigned int cpufreq_dom_max;
enum {
@@ -562,7 +562,8 @@ void cpufreq_dom_exit(void)
void cpufreq_dom_exit(void)
{
cpufreq_dom_max = 0;
- cpus_clear(cpufreq_dom_mask);
+ if (cpufreq_dom_mask)
+ xfree(cpufreq_dom_mask);
if (cpufreq_dom_pt)
xfree(cpufreq_dom_pt);
}
@@ -572,22 +573,28 @@ int cpufreq_dom_init(void)
unsigned int i;
cpufreq_dom_max = 0;
- cpus_clear(cpufreq_dom_mask);
for_each_online_cpu(i) {
- cpu_set(processor_pminfo[i].perf.domain_info.domain, cpufreq_dom_mask);
if (cpufreq_dom_max < processor_pminfo[i].perf.domain_info.domain)
cpufreq_dom_max = processor_pminfo[i].perf.domain_info.domain;
}
cpufreq_dom_max++;
+
+ cpufreq_dom_mask = xmalloc_array(unsigned long,
+ BITS_TO_LONGS(cpufreq_dom_max));
+ if (!cpufreq_dom_mask)
+ return -ENOMEM;
+ bitmap_zero(cpufreq_dom_mask, cpufreq_dom_max);
cpufreq_dom_pt = xmalloc_array(cpumask_t, cpufreq_dom_max);
if (!cpufreq_dom_pt)
return -ENOMEM;
memset(cpufreq_dom_pt, 0, cpufreq_dom_max * sizeof(cpumask_t));
- for_each_online_cpu(i)
+ for_each_online_cpu(i) {
+ __set_bit(processor_pminfo[i].perf.domain_info.domain,
cpufreq_dom_mask);
cpu_set(i,
cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain]);
+ }
for_each_online_cpu(i)
processor_pminfo[i].perf.shared_cpu_map =
@@ -616,10 +623,11 @@ static int cpufreq_cpu_init(void)
int cpufreq_dom_dbs(unsigned int event)
{
- int cpu, dom, ret = 0;
-
- for (dom=0; dom<cpufreq_dom_max; dom++) {
- if (!cpu_isset(dom, cpufreq_dom_mask))
+ unsigned int cpu, dom;
+ int ret = 0;
+
+ for (dom = 0; dom < cpufreq_dom_max; dom++) {
+ if (!test_bit(dom, cpufreq_dom_mask))
continue;
cpu = first_cpu(cpufreq_dom_pt[dom]);
ret = cpufreq_governor_dbs(&xen_px_policy[cpu], event);
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/acpi/cpufreq/powernow.c
--- a/xen/arch/x86/acpi/cpufreq/powernow.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c Fri Sep 12 14:47:40 2008 +0900
@@ -197,8 +197,8 @@ static int powernow_cpufreq_cpu_init(str
data->max_freq = perf->states[0].core_frequency * 1000;
/* table init */
- for (i=0; i<perf->state_count && i<max_hw_pstate; i++) {
- if (i>0 && perf->states[i].core_frequency >=
+ for (i = 0; i < perf->state_count && i <= max_hw_pstate; i++) {
+ if (i > 0 && perf->states[i].core_frequency >=
data->freq_table[valid_states-1].frequency / 1000)
continue;
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/acpi/cpuidle_menu.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/acpi/cpuidle_menu.c Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,132 @@
+/*
+ * cpuidle_menu - menu governor for cpu idle, main idea come from Linux.
+ * drivers/cpuidle/governors/menu.c
+ *
+ * Copyright (C) 2006-2007 Adam Belay <abelay@xxxxxxxxxx>
+ * Copyright (C) 2007, 2008 Intel Corporation
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/lib.h>
+#include <xen/types.h>
+#include <xen/acpi.h>
+#include <xen/timer.h>
+#include <xen/cpuidle.h>
+
+#define BREAK_FUZZ 4 /* 4 us */
+#define USEC_PER_SEC 1000000
+
+struct menu_device
+{
+ int last_state_idx;
+ unsigned int expected_us;
+ unsigned int predicted_us;
+ unsigned int last_measured_us;
+ unsigned int elapsed_us;
+};
+
+static DEFINE_PER_CPU(struct menu_device, menu_devices);
+
+static s_time_t get_sleep_length_ns(void)
+{
+ return per_cpu(timer_deadline, smp_processor_id()) - NOW();
+}
+
+static int menu_select(struct acpi_processor_power *power)
+{
+ struct menu_device *data = &__get_cpu_var(menu_devices);
+ int i;
+
+ /* determine the expected residency time */
+ data->expected_us = (u32) get_sleep_length_ns() / 1000;
+
+ /* find the deepest idle state that satisfies our constraints */
+ for ( i = 1; i < power->count; i++ )
+ {
+ struct acpi_processor_cx *s = &power->states[i];
+
+ if ( s->target_residency > data->expected_us + s->latency )
+ break;
+ if ( s->target_residency > data->predicted_us )
+ break;
+ /* TBD: we need to check the QoS requirment in future */
+ }
+
+ data->last_state_idx = i - 1;
+ return i - 1;
+}
+
+static void menu_reflect(struct acpi_processor_power *power)
+{
+ struct menu_device *data = &__get_cpu_var(menu_devices);
+ struct acpi_processor_cx *target = &power->states[data->last_state_idx];
+ unsigned int last_residency;
+ unsigned int measured_us;
+
+ /*
+ * Ugh, this idle state doesn't support residency measurements, so we
+ * are basically lost in the dark. As a compromise, assume we slept
+ * for one full standard timer tick. However, be aware that this
+ * could potentially result in a suboptimal state transition.
+ */
+ if ( target->type == ACPI_STATE_C1 )
+ last_residency = USEC_PER_SEC / HZ;
+ else
+ last_residency = power->last_residency;
+
+ measured_us = last_residency + data->elapsed_us;
+
+ /* if wrapping, set to max uint (-1) */
+ measured_us = data->elapsed_us <= measured_us ? measured_us : -1;
+
+ /* Predict time remaining until next break event */
+ data->predicted_us = max(measured_us, data->last_measured_us);
+
+ /* Distinguish between expected & non-expected events */
+ if ( last_residency + BREAK_FUZZ
+ < data->expected_us + target->latency )
+ {
+ data->last_measured_us = measured_us;
+ data->elapsed_us = 0;
+ }
+ else
+ data->elapsed_us = measured_us;
+}
+
+static int menu_enable_device(struct acpi_processor_power *power)
+{
+ struct menu_device *data = &per_cpu(menu_devices, power->cpu);
+
+ memset(data, 0, sizeof(struct menu_device));
+
+ return 0;
+}
+
+static struct cpuidle_governor menu_governor =
+{
+ .name = "menu",
+ .rating = 20,
+ .enable = menu_enable_device,
+ .select = menu_select,
+ .reflect = menu_reflect,
+};
+
+struct cpuidle_governor *cpuidle_current_governor = &menu_governor;
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/domain.c Fri Sep 12 14:47:40 2008 +0900
@@ -31,6 +31,7 @@
#include <xen/compat.h>
#include <xen/acpi.h>
#include <xen/pci.h>
+#include <xen/paging.h>
#include <asm/regs.h>
#include <asm/mc146818rtc.h>
#include <asm/system.h>
@@ -40,7 +41,6 @@
#include <asm/i387.h>
#include <asm/mpspec.h>
#include <asm/ldt.h>
-#include <asm/paging.h>
#include <asm/hypercall.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
@@ -302,7 +302,8 @@ int vcpu_initialise(struct vcpu *v)
else
{
/* PV guests by default have a 100Hz ticker. */
- v->periodic_period = MILLISECS(10);
+ if ( !is_idle_domain(d) )
+ v->periodic_period = MILLISECS(10);
/* PV guests get an emulated PIT too for video BIOSes to use. */
if ( !is_idle_domain(d) && (v->vcpu_id == 0) )
@@ -1645,23 +1646,26 @@ static int relinquish_memory(
/*
* Forcibly invalidate top-most, still valid page tables at this point
- * to break circular 'linear page table' references. This is okay
- * because MMU structures are not shared across domains and this domain
- * is now dead. Thus top-most valid tables are not in use so a non-zero
- * count means circular reference.
+ * to break circular 'linear page table' references as well as clean up
+ * partially validated pages. This is okay because MMU structures are
+ * not shared across domains and this domain is now dead. Thus top-most
+ * valid tables are not in use so a non-zero count means circular
+ * reference or partially validated.
*/
y = page->u.inuse.type_info;
for ( ; ; )
{
x = y;
- if ( likely((x & (PGT_type_mask|PGT_validated)) !=
- (type|PGT_validated)) )
+ if ( likely((x & PGT_type_mask) != type) ||
+ likely(!(x & (PGT_validated|PGT_partial))) )
break;
- y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
+ y = cmpxchg(&page->u.inuse.type_info, x,
+ x & ~(PGT_validated|PGT_partial));
if ( likely(y == x) )
{
- free_page_type(page, type);
+ if ( free_page_type(page, x, 0) != 0 )
+ BUG();
break;
}
}
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/domain_build.c Fri Sep 12 14:47:40 2008 +0900
@@ -26,6 +26,7 @@
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/paging.h>
+#include <asm/p2m.h>
#include <asm/e820.h>
#include <public/version.h>
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/domctl.c Fri Sep 12 14:47:40 2008 +0900
@@ -20,7 +20,7 @@
#include <xen/trace.h>
#include <xen/console.h>
#include <xen/iocap.h>
-#include <asm/paging.h>
+#include <xen/paging.h>
#include <asm/irq.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
@@ -67,14 +67,6 @@ long arch_do_domctl(
ret = -ESRCH;
if ( unlikely((d = rcu_lock_domain_by_id(domctl->domain)) == NULL) )
break;
-
- ret = xsm_ioport_permission(d, fp,
- domctl->u.ioport_permission.allow_access);
- if ( ret )
- {
- rcu_unlock_domain(d);
- break;
- }
if ( np == 0 )
ret = 0;
@@ -550,6 +542,10 @@ long arch_do_domctl(
if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
break;
+ ret = xsm_sendtrigger(d);
+ if ( ret )
+ goto sendtrigger_out;
+
ret = -EINVAL;
if ( domctl->u.sendtrigger.vcpu >= MAX_VIRT_CPUS )
goto sendtrigger_out;
@@ -628,6 +624,10 @@ long arch_do_domctl(
bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
+ ret = xsm_test_assign_device(domctl->u.assign_device.machine_bdf);
+ if ( ret )
+ break;
+
if ( device_assigned(bus, devfn) )
{
gdprintk(XENLOG_ERR, "XEN_DOMCTL_test_assign_device: "
@@ -655,6 +655,11 @@ long arch_do_domctl(
"XEN_DOMCTL_assign_device: get_domain_by_id() failed\n");
break;
}
+
+ ret = xsm_assign_device(d, domctl->u.assign_device.machine_bdf);
+ if ( ret )
+ goto assign_device_out;
+
bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
@@ -680,6 +685,7 @@ long arch_do_domctl(
"assign device (%x:%x:%x) failed\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ assign_device_out:
put_domain(d);
}
break;
@@ -700,6 +706,11 @@ long arch_do_domctl(
"XEN_DOMCTL_deassign_device: get_domain_by_id() failed\n");
break;
}
+
+ ret = xsm_assign_device(d, domctl->u.assign_device.machine_bdf);
+ if ( ret )
+ goto deassign_device_out;
+
bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
@@ -720,6 +731,8 @@ long arch_do_domctl(
deassign_device(d, bus, devfn);
gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ deassign_device_out:
put_domain(d);
}
break;
@@ -733,10 +746,17 @@ long arch_do_domctl(
if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
break;
bind = &(domctl->u.bind_pt_irq);
+
+ ret = xsm_bind_pt_irq(d, bind);
+ if ( ret )
+ goto bind_out;
+
if ( iommu_enabled )
ret = pt_irq_create_bind_vtd(d, bind);
if ( ret < 0 )
gdprintk(XENLOG_ERR, "pt_irq_create_bind failed!\n");
+
+ bind_out:
rcu_unlock_domain(d);
}
break;
@@ -877,11 +897,16 @@ long arch_do_domctl(
if ( d == NULL )
break;
+ ret = xsm_pin_mem_cacheattr(d);
+ if ( ret )
+ goto pin_out;
+
ret = hvm_set_mem_pinned_cacheattr(
d, domctl->u.pin_mem_cacheattr.start,
domctl->u.pin_mem_cacheattr.end,
domctl->u.pin_mem_cacheattr.type);
+ pin_out:
rcu_unlock_domain(d);
}
break;
@@ -899,6 +924,10 @@ long arch_do_domctl(
d = rcu_lock_domain_by_id(domctl->domain);
if ( d == NULL )
break;
+
+ ret = xsm_ext_vcpucontext(d, domctl->cmd);
+ if ( ret )
+ goto ext_vcpucontext_out;
ret = -ESRCH;
if ( (evc->vcpu >= MAX_VIRT_CPUS) ||
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hpet.c
--- a/xen/arch/x86/hpet.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hpet.c Fri Sep 12 14:47:40 2008 +0900
@@ -100,6 +100,13 @@ static int reprogram_hpet_evt_channel(
ch->next_event = expire;
+ if ( expire == STIME_MAX )
+ {
+ /* We assume it will take a long time for the timer to wrap. */
+ hpet_write32(0, HPET_T0_CMP);
+ return 0;
+ }
+
delta = min_t(int64_t, delta, MAX_DELTA_NS);
delta = max_t(int64_t, delta, MIN_DELTA_NS);
delta = ns2ticks(delta, ch->shift, ch->mult);
@@ -206,9 +213,11 @@ void hpet_broadcast_enter(void)
{
struct hpet_event_channel *ch = &hpet_event;
+ spin_lock(&ch->lock);
+
+ disable_APIC_timer();
+
cpu_set(smp_processor_id(), ch->cpumask);
-
- spin_lock(&ch->lock);
/* reprogram if current cpu expire time is nearer */
if ( this_cpu(timer_deadline) < ch->next_event )
@@ -222,8 +231,23 @@ void hpet_broadcast_exit(void)
struct hpet_event_channel *ch = &hpet_event;
int cpu = smp_processor_id();
+ spin_lock_irq(&ch->lock);
+
if ( cpu_test_and_clear(cpu, ch->cpumask) )
- reprogram_timer(per_cpu(timer_deadline, cpu));
+ {
+ /* Cancel any outstanding LAPIC event and re-enable interrupts. */
+ reprogram_timer(0);
+ enable_APIC_timer();
+
+ /* Reprogram the deadline; trigger timer work now if it has passed. */
+ if ( !reprogram_timer(per_cpu(timer_deadline, cpu)) )
+ raise_softirq(TIMER_SOFTIRQ);
+
+ if ( cpus_empty(ch->cpumask) && ch->next_event != STIME_MAX )
+ reprogram_hpet_evt_channel(ch, STIME_MAX, 0, 0);
+ }
+
+ spin_unlock_irq(&ch->lock);
}
int hpet_broadcast_is_available(void)
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hvm/hvm.c Fri Sep 12 14:47:40 2008 +0900
@@ -31,10 +31,11 @@
#include <xen/hypercall.h>
#include <xen/guest_access.h>
#include <xen/event.h>
+#include <xen/paging.h>
+#include <asm/shadow.h>
#include <asm/current.h>
#include <asm/e820.h>
#include <asm/io.h>
-#include <asm/paging.h>
#include <asm/regs.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
@@ -772,7 +773,7 @@ void hvm_hlt(unsigned long rflags)
do_sched_op_compat(SCHEDOP_block, 0);
- HVMTRACE_1D(HLT, curr, /* pending = */ vcpu_runnable(curr));
+ HVMTRACE_1D(HLT, /* pending = */ vcpu_runnable(curr));
}
void hvm_triple_fault(void)
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hvm/svm/intr.c Fri Sep 12 14:47:40 2008 +0900
@@ -80,7 +80,7 @@ static void enable_intr_window(struct vc
ASSERT(intack.source != hvm_intsrc_none);
- HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
+ HVMTRACE_2D(INJ_VIRQ, 0x0, /*fake=*/ 1);
/*
* Create a dummy virtual interrupt to intercept as soon as the
@@ -199,7 +199,7 @@ asmlinkage void svm_intr_assist(void)
}
else
{
- HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
+ HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
svm_inject_extint(v, intack.vector);
pt_intr_post(v, intack);
}
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hvm/svm/svm.c Fri Sep 12 14:47:40 2008 +0900
@@ -759,11 +759,11 @@ static void svm_inject_exception(
if ( trapnr == TRAP_page_fault )
{
vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
- HVMTRACE_LONG_2D(PF_INJECT, curr, errcode, TRC_PAR_LONG(cr2));
+ HVMTRACE_LONG_2D(PF_INJECT, errcode, TRC_PAR_LONG(cr2));
}
else
{
- HVMTRACE_2D(INJ_EXC, curr, trapnr, errcode);
+ HVMTRACE_2D(INJ_EXC, trapnr, errcode);
}
if ( (trapnr == TRAP_debug) &&
@@ -919,7 +919,7 @@ static void svm_cpuid_intercept(
__clear_bit(X86_FEATURE_APIC & 31, edx);
}
- HVMTRACE_5D (CPUID, v, input, *eax, *ebx, *ecx, *edx);
+ HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
}
static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
@@ -946,7 +946,7 @@ static void svm_vmexit_do_cpuid(struct c
static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
{
- HVMTRACE_0D(DR_WRITE, v);
+ HVMTRACE_0D(DR_WRITE);
__restore_debug_registers(v);
}
@@ -1018,7 +1018,7 @@ static int svm_msr_read_intercept(struct
regs->edx = msr_content >> 32;
done:
- HVMTRACE_3D (MSR_READ, v, ecx, regs->eax, regs->edx);
+ HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
return X86EMUL_OKAY;
@@ -1037,7 +1037,7 @@ static int svm_msr_write_intercept(struc
msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
- HVMTRACE_3D (MSR_WRITE, v, ecx, regs->eax, regs->edx);
+ HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
switch ( ecx )
{
@@ -1168,7 +1168,7 @@ static void svm_invlpg_intercept(unsigne
static void svm_invlpg_intercept(unsigned long vaddr)
{
struct vcpu *curr = current;
- HVMTRACE_LONG_2D(INVLPG, curr, 0, TRC_PAR_LONG(vaddr));
+ HVMTRACE_LONG_2D(INVLPG, 0, TRC_PAR_LONG(vaddr));
paging_invlpg(curr, vaddr);
svm_asid_g_invlpg(curr, vaddr);
}
@@ -1191,7 +1191,7 @@ asmlinkage void svm_vmexit_handler(struc
exit_reason = vmcb->exitcode;
- HVMTRACE_ND(VMEXIT64, 1/*cycles*/, v, 3, exit_reason,
+ HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
(uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
0, 0, 0);
@@ -1216,17 +1216,17 @@ asmlinkage void svm_vmexit_handler(struc
{
case VMEXIT_INTR:
/* Asynchronous event, handled when we STGI'd after the VMEXIT. */
- HVMTRACE_0D(INTR, v);
+ HVMTRACE_0D(INTR);
break;
case VMEXIT_NMI:
/* Asynchronous event, handled when we STGI'd after the VMEXIT. */
- HVMTRACE_0D(NMI, v);
+ HVMTRACE_0D(NMI);
break;
case VMEXIT_SMI:
/* Asynchronous event, handled when we STGI'd after the VMEXIT. */
- HVMTRACE_0D(SMI, v);
+ HVMTRACE_0D(SMI);
break;
case VMEXIT_EXCEPTION_DB:
@@ -1261,10 +1261,12 @@ asmlinkage void svm_vmexit_handler(struc
if ( paging_fault(va, regs) )
{
- if (hvm_long_mode_enabled(v))
- HVMTRACE_LONG_2D(PF_XEN, v, regs->error_code,
TRC_PAR_LONG(va));
+ if ( trace_will_trace_event(TRC_SHADOW) )
+ break;
+ if ( hvm_long_mode_enabled(v) )
+ HVMTRACE_LONG_2D(PF_XEN, regs->error_code, TRC_PAR_LONG(va));
else
- HVMTRACE_2D(PF_XEN, v, regs->error_code, va);
+ HVMTRACE_2D(PF_XEN, regs->error_code, va);
break;
}
@@ -1274,7 +1276,7 @@ asmlinkage void svm_vmexit_handler(struc
/* Asynchronous event, handled when we STGI'd after the VMEXIT. */
case VMEXIT_EXCEPTION_MC:
- HVMTRACE_0D(MCE, v);
+ HVMTRACE_0D(MCE);
break;
case VMEXIT_VINTR:
@@ -1331,7 +1333,7 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_VMMCALL:
if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
break;
- HVMTRACE_1D(VMMCALL, v, regs->eax);
+ HVMTRACE_1D(VMMCALL, regs->eax);
rc = hvm_do_hypercall(regs);
if ( rc != HVM_HCALL_preempted )
{
@@ -1406,7 +1408,7 @@ asmlinkage void svm_vmexit_handler(struc
asmlinkage void svm_trace_vmentry(void)
{
- HVMTRACE_ND (VMENTRY, 1/*cycles*/, current, 0, 0, 0, 0, 0, 0, 0);
+ HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
}
/*
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/intr.c Fri Sep 12 14:47:40 2008 +0900
@@ -198,7 +198,7 @@ asmlinkage void vmx_intr_assist(void)
}
else
{
- HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
+ HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
vmx_inject_extint(v, intack.vector);
pt_intr_post(v, intack);
}
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Sep 12 14:47:40 2008 +0900
@@ -1114,10 +1114,10 @@ static void __vmx_inject_exception(
__vmwrite(VM_ENTRY_INTR_INFO, intr_fields);
if ( trap == TRAP_page_fault )
- HVMTRACE_LONG_2D(PF_INJECT, v, error_code,
+ HVMTRACE_LONG_2D(PF_INJECT, error_code,
TRC_PAR_LONG(v->arch.hvm_vcpu.guest_cr[2]));
else
- HVMTRACE_2D(INJ_EXC, v, trap, error_code);
+ HVMTRACE_2D(INJ_EXC, trap, error_code);
}
void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code)
@@ -1345,7 +1345,7 @@ static void vmx_cpuid_intercept(
break;
}
- HVMTRACE_5D (CPUID, current, input, *eax, *ebx, *ecx, *edx);
+ HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
}
static void vmx_do_cpuid(struct cpu_user_regs *regs)
@@ -1370,7 +1370,7 @@ static void vmx_dr_access(unsigned long
{
struct vcpu *v = current;
- HVMTRACE_0D(DR_WRITE, v);
+ HVMTRACE_0D(DR_WRITE);
if ( !v->arch.hvm_vcpu.flag_dr_dirty )
__restore_debug_registers(v);
@@ -1383,7 +1383,7 @@ static void vmx_invlpg_intercept(unsigne
static void vmx_invlpg_intercept(unsigned long vaddr)
{
struct vcpu *curr = current;
- HVMTRACE_LONG_2D(INVLPG, curr, /*invlpga=*/ 0, TRC_PAR_LONG(vaddr));
+ HVMTRACE_LONG_2D(INVLPG, /*invlpga=*/ 0, TRC_PAR_LONG(vaddr));
if ( paging_invlpg(curr, vaddr) )
vpid_sync_vcpu_gva(curr, vaddr);
}
@@ -1434,7 +1434,7 @@ static int mov_to_cr(int gp, int cr, str
goto exit_and_crash;
}
- HVMTRACE_LONG_2D(CR_WRITE, v, cr, TRC_PAR_LONG(value));
+ HVMTRACE_LONG_2D(CR_WRITE, cr, TRC_PAR_LONG(value));
HVM_DBG_LOG(DBG_LEVEL_1, "CR%d, value = %lx", cr, value);
@@ -1505,7 +1505,7 @@ static void mov_from_cr(int cr, int gp,
break;
}
- HVMTRACE_LONG_2D(CR_READ, v, cr, TRC_PAR_LONG(value));
+ HVMTRACE_LONG_2D(CR_READ, cr, TRC_PAR_LONG(value));
HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%d, value = %lx", cr, value);
}
@@ -1531,13 +1531,13 @@ static int vmx_cr_access(unsigned long e
case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
vmx_update_guest_cr(v, 0);
- HVMTRACE_0D(CLTS, current);
+ HVMTRACE_0D(CLTS);
break;
case VMX_CONTROL_REG_ACCESS_TYPE_LMSW:
value = v->arch.hvm_vcpu.guest_cr[0];
/* LMSW can: (1) set bits 0-3; (2) clear bits 1-3. */
value = (value & ~0xe) | ((exit_qualification >> 16) & 0xf);
- HVMTRACE_LONG_1D(LMSW, current, value);
+ HVMTRACE_LONG_1D(LMSW, value);
return !hvm_set_cr0(value);
default:
BUG();
@@ -1692,7 +1692,7 @@ static int vmx_msr_read_intercept(struct
regs->edx = (uint32_t)(msr_content >> 32);
done:
- HVMTRACE_3D (MSR_READ, v, ecx, regs->eax, regs->edx);
+ HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
ecx, (unsigned long)regs->eax,
(unsigned long)regs->edx);
@@ -1803,7 +1803,7 @@ static int vmx_msr_write_intercept(struc
msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
- HVMTRACE_3D (MSR_WRITE, v, ecx, regs->eax, regs->edx);
+ HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
switch ( ecx )
{
@@ -1894,7 +1894,7 @@ static void vmx_do_extint(struct cpu_use
BUG_ON(!(vector & INTR_INFO_VALID_MASK));
vector &= INTR_INFO_VECTOR_MASK;
- HVMTRACE_1D(INTR, current, vector);
+ HVMTRACE_1D(INTR, vector);
switch ( vector )
{
@@ -2010,7 +2010,7 @@ static void vmx_failed_vmentry(unsigned
break;
case EXIT_REASON_MACHINE_CHECK:
printk("caused by machine check.\n");
- HVMTRACE_0D(MCE, curr);
+ HVMTRACE_0D(MCE);
do_machine_check(regs);
break;
default:
@@ -2037,7 +2037,7 @@ asmlinkage void vmx_vmexit_handler(struc
exit_reason = __vmread(VM_EXIT_REASON);
- HVMTRACE_ND(VMEXIT64, 1/*cycles*/, v, 3, exit_reason,
+ HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
(uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
0, 0, 0);
@@ -2101,7 +2101,8 @@ asmlinkage void vmx_vmexit_handler(struc
!(__vmread(IDT_VECTORING_INFO) & INTR_INFO_VALID_MASK) &&
(vector != TRAP_double_fault) )
__vmwrite(GUEST_INTERRUPTIBILITY_INFO,
- __vmread(GUEST_INTERRUPTIBILITY_INFO)|VMX_INTR_SHADOW_NMI);
+ __vmread(GUEST_INTERRUPTIBILITY_INFO)
+ | VMX_INTR_SHADOW_NMI);
perfc_incra(cause_vector, vector);
@@ -2128,12 +2129,14 @@ asmlinkage void vmx_vmexit_handler(struc
if ( paging_fault(exit_qualification, regs) )
{
+ if ( trace_will_trace_event(TRC_SHADOW) )
+ break;
if ( hvm_long_mode_enabled(v) )
- HVMTRACE_LONG_2D (PF_XEN, v, regs->error_code,
- TRC_PAR_LONG(exit_qualification) );
+ HVMTRACE_LONG_2D(PF_XEN, regs->error_code,
+ TRC_PAR_LONG(exit_qualification) );
else
- HVMTRACE_2D (PF_XEN, v,
- regs->error_code, exit_qualification );
+ HVMTRACE_2D(PF_XEN,
+ regs->error_code, exit_qualification );
break;
}
@@ -2144,11 +2147,11 @@ asmlinkage void vmx_vmexit_handler(struc
if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) !=
(X86_EVENTTYPE_NMI << 8) )
goto exit_and_crash;
- HVMTRACE_0D(NMI, v);
+ HVMTRACE_0D(NMI);
do_nmi(regs); /* Real NMI, vector 2: normal processing. */
break;
case TRAP_machine_check:
- HVMTRACE_0D(MCE, v);
+ HVMTRACE_0D(MCE);
do_machine_check(regs);
break;
default:
@@ -2213,7 +2216,7 @@ asmlinkage void vmx_vmexit_handler(struc
case EXIT_REASON_VMCALL:
{
int rc;
- HVMTRACE_1D(VMMCALL, v, regs->eax);
+ HVMTRACE_1D(VMMCALL, regs->eax);
inst_len = __get_instruction_length(); /* Safe: VMCALL */
rc = hvm_do_hypercall(regs);
if ( rc != HVM_HCALL_preempted )
@@ -2300,7 +2303,7 @@ asmlinkage void vmx_vmexit_handler(struc
asmlinkage void vmx_trace_vmentry(void)
{
- HVMTRACE_ND (VMENTRY, 1/*cycles*/, current, 0, 0, 0, 0, 0, 0, 0);
+ HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
}
/*
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/io_apic.c Fri Sep 12 14:47:40 2008 +0900
@@ -45,23 +45,14 @@ int (*ioapic_renumber_irq)(int ioapic, i
int (*ioapic_renumber_irq)(int ioapic, int irq);
atomic_t irq_mis_count;
-int msi_enable = 0;
-boolean_param("msi", msi_enable);
-
int domain_irq_to_vector(struct domain *d, int irq)
{
- if ( !msi_enable )
- return irq_to_vector(irq);
- else
- return d->arch.pirq_vector[irq];
+ return d->arch.pirq_vector[irq];
}
int domain_vector_to_irq(struct domain *d, int vector)
{
- if ( !msi_enable )
- return vector_to_irq(vector);
- else
- return d->arch.vector_pirq[vector];
+ return d->arch.vector_pirq[vector];
}
/* Where if anywhere is the i8259 connect in external int mode */
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/irq.c Fri Sep 12 14:47:40 2008 +0900
@@ -737,9 +737,12 @@ __initcall(setup_dump_irqs);
void fixup_irqs(cpumask_t map)
{
- unsigned int irq;
+ unsigned int irq, sp;
static int warned;
-
+ irq_guest_action_t *action;
+ struct pending_eoi *peoi;
+
+ /* Direct all future interrupts away from this CPU. */
for ( irq = 0; irq < NR_IRQS; irq++ )
{
cpumask_t mask;
@@ -758,8 +761,24 @@ void fixup_irqs(cpumask_t map)
printk("Cannot set affinity for irq %i\n", irq);
}
+ /* Service any interrupts that beat us in the re-direction race. */
local_irq_enable();
mdelay(1);
local_irq_disable();
+
+ /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
+ for ( irq = 0; irq < NR_IRQS; irq++ )
+ {
+ if ( !(irq_desc[irq].status & IRQ_GUEST) )
+ continue;
+ action = (irq_guest_action_t *)irq_desc[irq].action;
+ cpu_clear(smp_processor_id(), action->cpu_eoi_map);
+ }
+
+ /* Flush the interrupt EOI stack. */
+ peoi = this_cpu(pending_eoi);
+ for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
+ peoi[sp].ready = 1;
+ flush_ready_eoi(NULL);
}
#endif
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/mm.c Fri Sep 12 14:47:40 2008 +0900
@@ -507,11 +507,11 @@ static int alloc_segdesc_page(struct pag
goto fail;
unmap_domain_page(descs);
- return 1;
+ return 0;
fail:
unmap_domain_page(descs);
- return 0;
+ return -EINVAL;
}
@@ -565,20 +565,23 @@ static int get_page_from_pagenr(unsigned
static int get_page_and_type_from_pagenr(unsigned long page_nr,
unsigned long type,
- struct domain *d)
+ struct domain *d,
+ int preemptible)
{
struct page_info *page = mfn_to_page(page_nr);
+ int rc;
if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
- return 0;
-
- if ( unlikely(!get_page_type(page, type)) )
- {
+ return -EINVAL;
+
+ rc = (preemptible ?
+ get_page_type_preemptible(page, type) :
+ (get_page_type(page, type) ? 0 : -EINVAL));
+
+ if ( rc )
put_page(page);
- return 0;
- }
-
- return 1;
+
+ return rc;
}
/*
@@ -754,22 +757,22 @@ get_page_from_l2e(
if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
{
MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
- return 0;
- }
-
- rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
- if ( unlikely(!rc) )
- rc = get_l2_linear_pagetable(l2e, pfn, d);
+ return -EINVAL;
+ }
+
+ rc = get_page_and_type_from_pagenr(
+ l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
+ if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+ rc = 0;
return rc;
}
-#if CONFIG_PAGING_LEVELS >= 3
define_get_linear_pagetable(l3);
static int
get_page_from_l3e(
- l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
+ l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible)
{
int rc;
@@ -779,22 +782,22 @@ get_page_from_l3e(
if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
{
MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
- return 0;
- }
-
- rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
- if ( unlikely(!rc) )
- rc = get_l3_linear_pagetable(l3e, pfn, d);
+ return -EINVAL;
+ }
+
+ rc = get_page_and_type_from_pagenr(
+ l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
+ if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
+ rc = 0;
return rc;
}
-#endif /* 3 level */
#if CONFIG_PAGING_LEVELS >= 4
define_get_linear_pagetable(l4);
static int
get_page_from_l4e(
- l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
+ l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible)
{
int rc;
@@ -804,12 +807,13 @@ get_page_from_l4e(
if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
{
MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
- return 0;
- }
-
- rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
- if ( unlikely(!rc) )
- rc = get_l4_linear_pagetable(l4e, pfn, d);
+ return -EINVAL;
+ }
+
+ rc = get_page_and_type_from_pagenr(
+ l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
+ if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
+ rc = 0;
return rc;
}
@@ -946,29 +950,35 @@ void put_page_from_l1e(l1_pgentry_t l1e,
* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
* Note also that this automatically deals correctly with linear p.t.'s.
*/
-static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
+static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
{
if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
(l2e_get_pfn(l2e) != pfn) )
+ {
put_page_and_type(l2e_get_page(l2e));
-}
-
-
-#if CONFIG_PAGING_LEVELS >= 3
-static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
+ return 0;
+ }
+ return 1;
+}
+
+
+static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+ int preemptible)
{
if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
(l3e_get_pfn(l3e) != pfn) )
- put_page_and_type(l3e_get_page(l3e));
-}
-#endif
+ return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
+ return 1;
+}
#if CONFIG_PAGING_LEVELS >= 4
-static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
+static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
+ int preemptible)
{
if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
(l4e_get_pfn(l4e) != pfn) )
- put_page_and_type(l4e_get_page(l4e));
+ return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
+ return 1;
}
#endif
@@ -977,7 +987,7 @@ static int alloc_l1_table(struct page_in
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l1_pgentry_t *pl1e;
- int i;
+ unsigned int i;
pl1e = map_domain_page(pfn);
@@ -991,7 +1001,7 @@ static int alloc_l1_table(struct page_in
}
unmap_domain_page(pl1e);
- return 1;
+ return 0;
fail:
MEM_LOG("Failure in alloc_l1_table: entry %d", i);
@@ -1000,7 +1010,7 @@ static int alloc_l1_table(struct page_in
put_page_from_l1e(pl1e[i], d);
unmap_domain_page(pl1e);
- return 0;
+ return -EINVAL;
}
static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
@@ -1128,47 +1138,53 @@ static void pae_flush_pgd(
# define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
#endif
-static int alloc_l2_table(struct page_info *page, unsigned long type)
+static int alloc_l2_table(struct page_info *page, unsigned long type,
+ int preemptible)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l2_pgentry_t *pl2e;
- int i;
+ unsigned int i;
+ int rc = 0;
pl2e = map_domain_page(pfn);
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- {
- if ( !is_guest_l2_slot(d, type, i) )
+ for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ )
+ {
+ if ( preemptible && i && hypercall_preempt_check() )
+ {
+ page->nr_validated_ptes = i;
+ rc = -EAGAIN;
+ break;
+ }
+
+ if ( !is_guest_l2_slot(d, type, i) ||
+ (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 )
continue;
- if ( unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
- goto fail;
-
+ if ( rc < 0 )
+ {
+ MEM_LOG("Failure in alloc_l2_table: entry %d", i);
+ while ( i-- > 0 )
+ if ( is_guest_l2_slot(d, type, i) )
+ put_page_from_l2e(pl2e[i], pfn);
+ break;
+ }
+
adjust_guest_l2e(pl2e[i], d);
}
unmap_domain_page(pl2e);
- return 1;
-
- fail:
- MEM_LOG("Failure in alloc_l2_table: entry %d", i);
- while ( i-- > 0 )
- if ( is_guest_l2_slot(d, type, i) )
- put_page_from_l2e(pl2e[i], pfn);
-
- unmap_domain_page(pl2e);
- return 0;
-}
-
-
-#if CONFIG_PAGING_LEVELS >= 3
-static int alloc_l3_table(struct page_info *page)
+ return rc > 0 ? 0 : rc;
+}
+
+static int alloc_l3_table(struct page_info *page, int preemptible)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l3_pgentry_t *pl3e;
- int i;
+ unsigned int i;
+ int rc = 0;
#if CONFIG_PAGING_LEVELS == 3
/*
@@ -1181,7 +1197,7 @@ static int alloc_l3_table(struct page_in
d->vcpu[0] && d->vcpu[0]->is_initialised )
{
MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
- return 0;
+ return -EINVAL;
}
#endif
@@ -1197,64 +1213,96 @@ static int alloc_l3_table(struct page_in
if ( is_pv_32on64_domain(d) )
memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
- for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+ for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ )
{
if ( is_pv_32bit_domain(d) && (i == 3) )
{
if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
- (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
- !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
- PGT_l2_page_table |
- PGT_pae_xen_l2,
- d) )
- goto fail;
- }
- else if ( !is_guest_l3_slot(i) )
+ (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) )
+ rc = -EINVAL;
+ else
+ rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
+ PGT_l2_page_table |
+ PGT_pae_xen_l2,
+ d, preemptible);
+ }
+ else if ( !is_guest_l3_slot(i) ||
+ (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 )
continue;
- else if ( unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
- goto fail;
+
+ if ( rc == -EAGAIN )
+ {
+ page->nr_validated_ptes = i;
+ page->partial_pte = 1;
+ }
+ else if ( rc == -EINTR && i )
+ {
+ page->nr_validated_ptes = i;
+ page->partial_pte = 0;
+ rc = -EAGAIN;
+ }
+ if ( rc < 0 )
+ break;
adjust_guest_l3e(pl3e[i], d);
}
- if ( !create_pae_xen_mappings(d, pl3e) )
- goto fail;
+ if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) )
+ rc = -EINVAL;
+ if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
+ {
+ MEM_LOG("Failure in alloc_l3_table: entry %d", i);
+ while ( i-- > 0 )
+ {
+ if ( !is_guest_l3_slot(i) )
+ continue;
+ unadjust_guest_l3e(pl3e[i], d);
+ put_page_from_l3e(pl3e[i], pfn, 0);
+ }
+ }
unmap_domain_page(pl3e);
- return 1;
-
- fail:
- MEM_LOG("Failure in alloc_l3_table: entry %d", i);
- while ( i-- > 0 )
- {
- if ( !is_guest_l3_slot(i) )
- continue;
- unadjust_guest_l3e(pl3e[i], d);
- put_page_from_l3e(pl3e[i], pfn);
- }
-
- unmap_domain_page(pl3e);
- return 0;
-}
-#else
-#define alloc_l3_table(page) (0)
-#endif
+ return rc > 0 ? 0 : rc;
+}
#if CONFIG_PAGING_LEVELS >= 4
-static int alloc_l4_table(struct page_info *page)
+static int alloc_l4_table(struct page_info *page, int preemptible)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l4_pgentry_t *pl4e = page_to_virt(page);
- int i;
-
- for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
- {
- if ( !is_guest_l4_slot(d, i) )
+ unsigned int i;
+ int rc = 0;
+
+ for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ )
+ {
+ if ( !is_guest_l4_slot(d, i) ||
+ (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 )
continue;
- if ( unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
- goto fail;
+ if ( rc == -EAGAIN )
+ {
+ page->nr_validated_ptes = i;
+ page->partial_pte = 1;
+ }
+ else if ( rc == -EINTR )
+ {
+ if ( i )
+ {
+ page->nr_validated_ptes = i;
+ page->partial_pte = 0;
+ rc = -EAGAIN;
+ }
+ }
+ else if ( rc < 0 )
+ {
+ MEM_LOG("Failure in alloc_l4_table: entry %d", i);
+ while ( i-- > 0 )
+ if ( is_guest_l4_slot(d, i) )
+ put_page_from_l4e(pl4e[i], pfn, 0);
+ }
+ if ( rc < 0 )
+ return rc;
adjust_guest_l4e(pl4e[i], d);
}
@@ -1269,18 +1317,10 @@ static int alloc_l4_table(struct page_in
l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
__PAGE_HYPERVISOR);
- return 1;
-
- fail:
- MEM_LOG("Failure in alloc_l4_table: entry %d", i);
- while ( i-- > 0 )
- if ( is_guest_l4_slot(d, i) )
- put_page_from_l4e(pl4e[i], pfn);
-
- return 0;
+ return rc > 0 ? 0 : rc;
}
#else
-#define alloc_l4_table(page) (0)
+#define alloc_l4_table(page, preemptible) (-EINVAL)
#endif
@@ -1289,7 +1329,7 @@ static void free_l1_table(struct page_in
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l1_pgentry_t *pl1e;
- int i;
+ unsigned int i;
pl1e = map_domain_page(pfn);
@@ -1301,74 +1341,114 @@ static void free_l1_table(struct page_in
}
-static void free_l2_table(struct page_info *page)
+static int free_l2_table(struct page_info *page, int preemptible)
{
#ifdef CONFIG_COMPAT
struct domain *d = page_get_owner(page);
#endif
unsigned long pfn = page_to_mfn(page);
l2_pgentry_t *pl2e;
- int i;
+ unsigned int i = page->nr_validated_ptes - 1;
+ int err = 0;
pl2e = map_domain_page(pfn);
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
- put_page_from_l2e(pl2e[i], pfn);
+ ASSERT(page->nr_validated_ptes);
+ do {
+ if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) &&
+ put_page_from_l2e(pl2e[i], pfn) == 0 &&
+ preemptible && i && hypercall_preempt_check() )
+ {
+ page->nr_validated_ptes = i;
+ err = -EAGAIN;
+ }
+ } while ( !err && i-- );
unmap_domain_page(pl2e);
- page->u.inuse.type_info &= ~PGT_pae_xen_l2;
-}
-
-
-#if CONFIG_PAGING_LEVELS >= 3
-
-static void free_l3_table(struct page_info *page)
+ if ( !err )
+ page->u.inuse.type_info &= ~PGT_pae_xen_l2;
+
+ return err;
+}
+
+static int free_l3_table(struct page_info *page, int preemptible)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l3_pgentry_t *pl3e;
- int i;
+ unsigned int i = page->nr_validated_ptes - !page->partial_pte;
+ int rc = 0;
#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
if ( d->arch.relmem == RELMEM_l3 )
- return;
+ return 0;
#endif
pl3e = map_domain_page(pfn);
- for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+ do {
if ( is_guest_l3_slot(i) )
{
- put_page_from_l3e(pl3e[i], pfn);
+ rc = put_page_from_l3e(pl3e[i], pfn, preemptible);
+ if ( rc > 0 )
+ continue;
+ if ( rc )
+ break;
unadjust_guest_l3e(pl3e[i], d);
}
+ } while ( i-- );
unmap_domain_page(pl3e);
-}
-
-#endif
+
+ if ( rc == -EAGAIN )
+ {
+ page->nr_validated_ptes = i;
+ page->partial_pte = 1;
+ }
+ else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
+ {
+ page->nr_validated_ptes = i + 1;
+ page->partial_pte = 0;
+ rc = -EAGAIN;
+ }
+ return rc > 0 ? 0 : rc;
+}
#if CONFIG_PAGING_LEVELS >= 4
-
-static void free_l4_table(struct page_info *page)
+static int free_l4_table(struct page_info *page, int preemptible)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l4_pgentry_t *pl4e = page_to_virt(page);
- int i;
+ unsigned int i = page->nr_validated_ptes - !page->partial_pte;
+ int rc = 0;
#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
if ( d->arch.relmem == RELMEM_l4 )
- return;
+ return 0;
#endif
- for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
+ do {
if ( is_guest_l4_slot(d, i) )
- put_page_from_l4e(pl4e[i], pfn);
-}
-
+ rc = put_page_from_l4e(pl4e[i], pfn, preemptible);
+ } while ( rc >= 0 && i-- );
+
+ if ( rc == -EAGAIN )
+ {
+ page->nr_validated_ptes = i;
+ page->partial_pte = 1;
+ }
+ else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
+ {
+ page->nr_validated_ptes = i + 1;
+ page->partial_pte = 0;
+ rc = -EAGAIN;
+ }
+ return rc > 0 ? 0 : rc;
+}
+#else
+#define free_l4_table(page, preemptible) (-EINVAL)
#endif
static void page_lock(struct page_info *page)
@@ -1560,7 +1640,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
return rc;
}
- if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
+ if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
return page_unlock(l2pg), 0;
adjust_guest_l2e(nl2e, d);
@@ -1582,25 +1662,24 @@ static int mod_l2_entry(l2_pgentry_t *pl
put_page_from_l2e(ol2e, pfn);
return rc;
}
-
-#if CONFIG_PAGING_LEVELS >= 3
/* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
static int mod_l3_entry(l3_pgentry_t *pl3e,
l3_pgentry_t nl3e,
unsigned long pfn,
- int preserve_ad)
+ int preserve_ad,
+ int preemptible)
{
l3_pgentry_t ol3e;
struct vcpu *curr = current;
struct domain *d = curr->domain;
struct page_info *l3pg = mfn_to_page(pfn);
- int rc = 1;
+ int rc = 0;
if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
{
MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
- return 0;
+ return -EINVAL;
}
/*
@@ -1608,12 +1687,12 @@ static int mod_l3_entry(l3_pgentry_t *pl
* would be a pain to ensure they remain continuously valid throughout.
*/
if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
- return 0;
+ return -EINVAL;
page_lock(l3pg);
if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
- return page_unlock(l3pg), 0;
+ return page_unlock(l3pg), -EFAULT;
if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
{
@@ -1622,7 +1701,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
page_unlock(l3pg);
MEM_LOG("Bad L3 flags %x",
l3e_get_flags(nl3e) & l3_disallow_mask(d));
- return 0;
+ return -EINVAL;
}
/* Fast path for identical mapping and presence. */
@@ -1631,28 +1710,30 @@ static int mod_l3_entry(l3_pgentry_t *pl
adjust_guest_l3e(nl3e, d);
rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
page_unlock(l3pg);
- return rc;
- }
-
- if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
- return page_unlock(l3pg), 0;
+ return rc ? 0 : -EFAULT;
+ }
+
+ rc = get_page_from_l3e(nl3e, pfn, d, preemptible);
+ if ( unlikely(rc < 0) )
+ return page_unlock(l3pg), rc;
+ rc = 0;
adjust_guest_l3e(nl3e, d);
if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
preserve_ad)) )
{
ol3e = nl3e;
- rc = 0;
+ rc = -EFAULT;
}
}
else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
preserve_ad)) )
{
page_unlock(l3pg);
- return 0;
- }
-
- if ( likely(rc) )
+ return -EFAULT;
+ }
+
+ if ( likely(rc == 0) )
{
if ( !create_pae_xen_mappings(d, pl3e) )
BUG();
@@ -1661,11 +1742,9 @@ static int mod_l3_entry(l3_pgentry_t *pl
}
page_unlock(l3pg);
- put_page_from_l3e(ol3e, pfn);
+ put_page_from_l3e(ol3e, pfn, 0);
return rc;
}
-
-#endif
#if CONFIG_PAGING_LEVELS >= 4
@@ -1673,24 +1752,25 @@ static int mod_l4_entry(l4_pgentry_t *pl
static int mod_l4_entry(l4_pgentry_t *pl4e,
l4_pgentry_t nl4e,
unsigned long pfn,
- int preserve_ad)
+ int preserve_ad,
+ int preemptible)
{
struct vcpu *curr = current;
struct domain *d = curr->domain;
l4_pgentry_t ol4e;
struct page_info *l4pg = mfn_to_page(pfn);
- int rc = 1;
+ int rc = 0;
if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
{
MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
- return 0;
+ return -EINVAL;
}
page_lock(l4pg);
if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
- return page_unlock(l4pg), 0;
+ return page_unlock(l4pg), -EFAULT;
if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
{
@@ -1699,7 +1779,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
page_unlock(l4pg);
MEM_LOG("Bad L4 flags %x",
l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
- return 0;
+ return -EINVAL;
}
/* Fast path for identical mapping and presence. */
@@ -1708,29 +1788,31 @@ static int mod_l4_entry(l4_pgentry_t *pl
adjust_guest_l4e(nl4e, d);
rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
page_unlock(l4pg);
- return rc;
- }
-
- if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) )
- return page_unlock(l4pg), 0;
+ return rc ? 0 : -EFAULT;
+ }
+
+ rc = get_page_from_l4e(nl4e, pfn, d, preemptible);
+ if ( unlikely(rc < 0) )
+ return page_unlock(l4pg), rc;
+ rc = 0;
adjust_guest_l4e(nl4e, d);
if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
preserve_ad)) )
{
ol4e = nl4e;
- rc = 0;
+ rc = -EFAULT;
}
}
else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
preserve_ad)) )
{
page_unlock(l4pg);
- return 0;
+ return -EFAULT;
}
page_unlock(l4pg);
- put_page_from_l4e(ol4e, pfn);
+ put_page_from_l4e(ol4e, pfn, 0);
return rc;
}
@@ -1788,9 +1870,11 @@ int get_page(struct page_info *page, str
}
-static int alloc_page_type(struct page_info *page, unsigned long type)
+static int alloc_page_type(struct page_info *page, unsigned long type,
+ int preemptible)
{
struct domain *owner = page_get_owner(page);
+ int rc;
/* A page table is dirtied when its type count becomes non-zero. */
if ( likely(owner != NULL) )
@@ -1799,30 +1883,65 @@ static int alloc_page_type(struct page_i
switch ( type & PGT_type_mask )
{
case PGT_l1_page_table:
- return alloc_l1_table(page);
+ alloc_l1_table(page);
+ rc = 0;
+ break;
case PGT_l2_page_table:
- return alloc_l2_table(page, type);
+ rc = alloc_l2_table(page, type, preemptible);
+ break;
case PGT_l3_page_table:
- return alloc_l3_table(page);
+ rc = alloc_l3_table(page, preemptible);
+ break;
case PGT_l4_page_table:
- return alloc_l4_table(page);
+ rc = alloc_l4_table(page, preemptible);
+ break;
case PGT_seg_desc_page:
- return alloc_segdesc_page(page);
+ rc = alloc_segdesc_page(page);
+ break;
default:
printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
type, page->u.inuse.type_info,
page->count_info);
+ rc = -EINVAL;
BUG();
}
- return 0;
-}
-
-
-void free_page_type(struct page_info *page, unsigned long type)
+ /* No need for atomic update of type_info here: noone else updates it. */
+ wmb();
+ if ( rc == -EAGAIN )
+ {
+ page->u.inuse.type_info |= PGT_partial;
+ }
+ else if ( rc == -EINTR )
+ {
+ ASSERT((page->u.inuse.type_info &
+ (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
+ page->u.inuse.type_info &= ~PGT_count_mask;
+ }
+ else if ( rc )
+ {
+ ASSERT(rc < 0);
+ MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
+ PRtype_info ": caf=%08x taf=%" PRtype_info,
+ page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
+ type, page->count_info, page->u.inuse.type_info);
+ page->u.inuse.type_info = 0;
+ }
+ else
+ {
+ page->u.inuse.type_info |= PGT_validated;
+ }
+
+ return rc;
+}
+
+
+int free_page_type(struct page_info *page, unsigned long type,
+ int preemptible)
{
struct domain *owner = page_get_owner(page);
unsigned long gmfn;
+ int rc;
if ( likely(owner != NULL) )
{
@@ -1842,7 +1961,7 @@ void free_page_type(struct page_info *pa
paging_mark_dirty(owner, page_to_mfn(page));
if ( shadow_mode_refcounts(owner) )
- return;
+ return 0;
gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
ASSERT(VALID_M2P(gmfn));
@@ -1850,42 +1969,80 @@ void free_page_type(struct page_info *pa
}
}
+ if ( !(type & PGT_partial) )
+ {
+ page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
+ page->partial_pte = 0;
+ }
switch ( type & PGT_type_mask )
{
case PGT_l1_page_table:
free_l1_table(page);
+ rc = 0;
break;
-
case PGT_l2_page_table:
- free_l2_table(page);
+ rc = free_l2_table(page, preemptible);
break;
-
-#if CONFIG_PAGING_LEVELS >= 3
case PGT_l3_page_table:
- free_l3_table(page);
+#if CONFIG_PAGING_LEVELS == 3
+ if ( !(type & PGT_partial) )
+ page->nr_validated_ptes = L3_PAGETABLE_ENTRIES;
+#endif
+ rc = free_l3_table(page, preemptible);
break;
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 4
case PGT_l4_page_table:
- free_l4_table(page);
+ rc = free_l4_table(page, preemptible);
break;
-#endif
-
default:
- printk("%s: type %lx pfn %lx\n",__FUNCTION__,
- type, page_to_mfn(page));
+ MEM_LOG("type %lx pfn %lx\n", type, page_to_mfn(page));
+ rc = -EINVAL;
BUG();
}
-}
-
-
-void put_page_type(struct page_info *page)
+
+ /* No need for atomic update of type_info here: noone else updates it. */
+ if ( rc == 0 )
+ {
+ /*
+ * Record TLB information for flush later. We do not stamp page tables
+ * when running in shadow mode:
+ * 1. Pointless, since it's the shadow pt's which must be tracked.
+ * 2. Shadow mode reuses this field for shadowed page tables to
+ * store flags info -- we don't want to conflict with that.
+ */
+ if ( !(shadow_mode_enabled(page_get_owner(page)) &&
+ (page->count_info & PGC_page_table)) )
+ page->tlbflush_timestamp = tlbflush_current_time();
+ wmb();
+ page->u.inuse.type_info--;
+ }
+ else if ( rc == -EINTR )
+ {
+ ASSERT(!(page->u.inuse.type_info &
+ (PGT_count_mask|PGT_validated|PGT_partial)));
+ if ( !(shadow_mode_enabled(page_get_owner(page)) &&
+ (page->count_info & PGC_page_table)) )
+ page->tlbflush_timestamp = tlbflush_current_time();
+ wmb();
+ page->u.inuse.type_info |= PGT_validated;
+ }
+ else
+ {
+ BUG_ON(rc != -EAGAIN);
+ wmb();
+ page->u.inuse.type_info |= PGT_partial;
+ }
+
+ return rc;
+}
+
+
+static int __put_page_type(struct page_info *page,
+ int preemptible)
{
unsigned long nx, x, y = page->u.inuse.type_info;
- again:
- do {
+ for ( ; ; )
+ {
x = y;
nx = x - 1;
@@ -1894,21 +2051,19 @@ void put_page_type(struct page_info *pag
if ( unlikely((nx & PGT_count_mask) == 0) )
{
if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
- likely(nx & PGT_validated) )
+ likely(nx & (PGT_validated|PGT_partial)) )
{
/*
* Page-table pages must be unvalidated when count is zero. The
* 'free' is safe because the refcnt is non-zero and validated
* bit is clear => other ops will spin or fail.
*/
- if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
- x & ~PGT_validated)) != x) )
- goto again;
+ nx = x & ~(PGT_validated|PGT_partial);
+ if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
+ x, nx)) != x) )
+ continue;
/* We cleared the 'valid bit' so we do the clean up. */
- free_page_type(page, x);
- /* Carry on, but with the 'valid bit' now clear. */
- x &= ~PGT_validated;
- nx &= ~PGT_validated;
+ return free_page_type(page, x, preemptible);
}
/*
@@ -1922,25 +2077,33 @@ void put_page_type(struct page_info *pag
(page->count_info & PGC_page_table)) )
page->tlbflush_timestamp = tlbflush_current_time();
}
- }
- while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
-}
-
-
-int get_page_type(struct page_info *page, unsigned long type)
+
+ if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
+ break;
+
+ if ( preemptible && hypercall_preempt_check() )
+ return -EINTR;
+ }
+
+ return 0;
+}
+
+
+static int __get_page_type(struct page_info *page, unsigned long type,
+ int preemptible)
{
unsigned long nx, x, y = page->u.inuse.type_info;
ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
- again:
- do {
+ for ( ; ; )
+ {
x = y;
nx = x + 1;
if ( unlikely((nx & PGT_count_mask) == 0) )
{
MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
- return 0;
+ return -EINVAL;
}
else if ( unlikely((x & PGT_count_mask) == 0) )
{
@@ -1993,28 +2156,43 @@ int get_page_type(struct page_info *page
/* Don't log failure if it could be a recursive-mapping attempt. */
if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
(type == PGT_l1_page_table) )
- return 0;
+ return -EINVAL;
if ( ((x & PGT_type_mask) == PGT_l3_page_table) &&
(type == PGT_l2_page_table) )
- return 0;
+ return -EINVAL;
if ( ((x & PGT_type_mask) == PGT_l4_page_table) &&
(type == PGT_l3_page_table) )
- return 0;
+ return -EINVAL;
MEM_LOG("Bad type (saw %" PRtype_info " != exp %" PRtype_info ") "
"for mfn %lx (pfn %lx)",
x, type, page_to_mfn(page),
get_gpfn_from_mfn(page_to_mfn(page)));
- return 0;
+ return -EINVAL;
}
else if ( unlikely(!(x & PGT_validated)) )
{
- /* Someone else is updating validation of this page. Wait... */
- while ( (y = page->u.inuse.type_info) == x )
- cpu_relax();
- goto again;
- }
- }
- while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+ if ( !(x & PGT_partial) )
+ {
+ /* Someone else is updating validation of this page. Wait... */
+ while ( (y = page->u.inuse.type_info) == x )
+ {
+ if ( preemptible && hypercall_preempt_check() )
+ return -EINTR;
+ cpu_relax();
+ }
+ continue;
+ }
+ /* Type ref count was left at 1 when PGT_partial got set. */
+ ASSERT((x & PGT_count_mask) == 1);
+ nx = x & ~PGT_partial;
+ }
+
+ if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
+ break;
+
+ if ( preemptible && hypercall_preempt_check() )
+ return -EINTR;
+ }
if ( unlikely((x & PGT_type_mask) != type) )
{
@@ -2032,25 +2210,42 @@ int get_page_type(struct page_info *page
if ( unlikely(!(nx & PGT_validated)) )
{
- /* Try to validate page type; drop the new reference on failure. */
- if ( unlikely(!alloc_page_type(page, type)) )
- {
- MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
- PRtype_info ": caf=%08x taf=%" PRtype_info,
- page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
- type, page->count_info, page->u.inuse.type_info);
- /* Noone else can get a reference. We hold the only ref. */
- page->u.inuse.type_info = 0;
- return 0;
- }
-
- /* Noone else is updating simultaneously. */
- __set_bit(_PGT_validated, &page->u.inuse.type_info);
- }
-
- return 1;
-}
-
+ if ( !(x & PGT_partial) )
+ {
+ page->nr_validated_ptes = 0;
+ page->partial_pte = 0;
+ }
+ return alloc_page_type(page, type, preemptible);
+ }
+
+ return 0;
+}
+
+void put_page_type(struct page_info *page)
+{
+ int rc = __put_page_type(page, 0);
+ ASSERT(rc == 0);
+ (void)rc;
+}
+
+int get_page_type(struct page_info *page, unsigned long type)
+{
+ int rc = __get_page_type(page, type, 0);
+ if ( likely(rc == 0) )
+ return 1;
+ ASSERT(rc == -EINVAL);
+ return 0;
+}
+
+int put_page_type_preemptible(struct page_info *page)
+{
+ return __put_page_type(page, 1);
+}
+
+int get_page_type_preemptible(struct page_info *page, unsigned long type)
+{
+ return __get_page_type(page, type, 1);
+}
void cleanup_page_cacheattr(struct page_info *page)
{
@@ -2087,7 +2282,7 @@ int new_guest_cr3(unsigned long mfn)
l4e_from_pfn(
mfn,
(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
- pagetable_get_pfn(v->arch.guest_table), 0);
+ pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0;
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new compat baseptr %lx", mfn);
@@ -2102,7 +2297,7 @@ int new_guest_cr3(unsigned long mfn)
#endif
okay = paging_mode_refcounts(d)
? get_page_from_pagenr(mfn, d)
- : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
+ : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0);
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new baseptr %lx", mfn);
@@ -2276,9 +2471,7 @@ int do_mmuext_op(
{
if ( hypercall_preempt_check() )
{
- rc = hypercall_create_continuation(
- __HYPERVISOR_mmuext_op, "hihi",
- uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+ rc = -EAGAIN;
break;
}
@@ -2325,10 +2518,14 @@ int do_mmuext_op(
if ( paging_mode_refcounts(FOREIGNDOM) )
break;
- okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
+ rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1);
+ okay = !rc;
if ( unlikely(!okay) )
{
- MEM_LOG("Error while pinning mfn %lx", mfn);
+ if ( rc == -EINTR )
+ rc = -EAGAIN;
+ else if ( rc != -EAGAIN )
+ MEM_LOG("Error while pinning mfn %lx", mfn);
break;
}
@@ -2373,8 +2570,11 @@ int do_mmuext_op(
{
put_page_and_type(page);
put_page(page);
- /* A page is dirtied when its pin status is cleared. */
- paging_mark_dirty(d, mfn);
+ if ( !rc )
+ {
+ /* A page is dirtied when its pin status is cleared. */
+ paging_mark_dirty(d, mfn);
+ }
}
else
{
@@ -2398,8 +2598,8 @@ int do_mmuext_op(
if ( paging_mode_refcounts(d) )
okay = get_page_from_pagenr(mfn, d);
else
- okay = get_page_and_type_from_pagenr(
- mfn, PGT_root_page_table, d);
+ okay = !get_page_and_type_from_pagenr(
+ mfn, PGT_root_page_table, d, 0);
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new mfn %lx", mfn);
@@ -2517,6 +2717,11 @@ int do_mmuext_op(
guest_handle_add_offset(uops, 1);
}
+ if ( rc == -EAGAIN )
+ rc = hypercall_create_continuation(
+ __HYPERVISOR_mmuext_op, "hihi",
+ uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+
process_deferred_ops();
perfc_add(num_mmuext_ops, i);
@@ -2576,9 +2781,7 @@ int do_mmu_update(
{
if ( hypercall_preempt_check() )
{
- rc = hypercall_create_continuation(
- __HYPERVISOR_mmu_update, "hihi",
- ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+ rc = -EAGAIN;
break;
}
@@ -2601,7 +2804,7 @@ int do_mmu_update(
*/
case MMU_NORMAL_PT_UPDATE:
case MMU_PT_UPDATE_PRESERVE_AD:
- rc = xsm_mmu_normal_update(d, req.val);
+ rc = xsm_mmu_normal_update(d, FOREIGNDOM, req.val);
if ( rc )
break;
@@ -2653,27 +2856,29 @@ int do_mmu_update(
cmd == MMU_PT_UPDATE_PRESERVE_AD);
}
break;
-#if CONFIG_PAGING_LEVELS >= 3
case PGT_l3_page_table:
{
l3_pgentry_t l3e = l3e_from_intpte(req.val);
- okay = mod_l3_entry(va, l3e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD);
+ rc = mod_l3_entry(va, l3e, mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
+ okay = !rc;
}
break;
-#endif
#if CONFIG_PAGING_LEVELS >= 4
case PGT_l4_page_table:
{
l4_pgentry_t l4e = l4e_from_intpte(req.val);
- okay = mod_l4_entry(va, l4e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD);
+ rc = mod_l4_entry(va, l4e, mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
+ okay = !rc;
}
break;
#endif
}
put_page_type(page);
+ if ( rc == -EINTR )
+ rc = -EAGAIN;
}
break;
@@ -2741,6 +2946,11 @@ int do_mmu_update(
guest_handle_add_offset(ureqs, 1);
}
+
+ if ( rc == -EAGAIN )
+ rc = hypercall_create_continuation(
+ __HYPERVISOR_mmu_update, "hihi",
+ ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
process_deferred_ops();
@@ -3111,7 +3321,7 @@ int do_update_va_mapping(unsigned long v
if ( unlikely(!access_ok(va, 1) && !paging_mode_external(d)) )
return -EINVAL;
- rc = xsm_update_va_mapping(d, val);
+ rc = xsm_update_va_mapping(d, FOREIGNDOM, val);
if ( rc )
return rc;
@@ -3695,9 +3905,8 @@ static int ptwr_emulated_update(
nl1e = l1e_from_intpte(val);
if ( unlikely(!get_page_from_l1e(nl1e, d)) )
{
- if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) &&
- (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg &&
- (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
+ if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
+ !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
{
/*
* If this is an upper-half write to a PAE PTE then we assume that
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/mm/hap/hap.c Fri Sep 12 14:47:40 2008 +0900
@@ -37,6 +37,7 @@
#include <asm/shared.h>
#include <asm/hap.h>
#include <asm/paging.h>
+#include <asm/p2m.h>
#include <asm/domain.h>
#include <xen/numa.h>
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/mm/shadow/common.c Fri Sep 12 14:47:40 2008 +0900
@@ -39,6 +39,7 @@
#include <xen/numa.h>
#include "private.h"
+DEFINE_PER_CPU(uint32_t,trace_shadow_path_flags);
/* Set up the shadow-specific parts of a domain struct at start of day.
* Called for every domain from arch_domain_create() */
@@ -630,6 +631,8 @@ void oos_fixup_add(struct vcpu *v, mfn_t
if ( mfn_x(oos_fixup[idx].smfn[next]) != INVALID_MFN )
{
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_OOS_FIXUP_EVICT);
+
/* Reuse this slot and remove current writable mapping. */
sh_remove_write_access_from_sl1p(v, gmfn,
oos_fixup[idx].smfn[next],
@@ -645,6 +648,8 @@ void oos_fixup_add(struct vcpu *v, mfn_t
oos_fixup[idx].smfn[next] = smfn;
oos_fixup[idx].off[next] = off;
oos_fixup[idx].next = (next + 1) % SHADOW_OOS_FIXUPS;
+
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_OOS_FIXUP_ADD);
return;
}
}
@@ -687,6 +692,16 @@ static int oos_remove_write_access(struc
}
+static inline void trace_resync(int event, mfn_t gmfn)
+{
+ if ( tb_init_done )
+ {
+ /* Convert gmfn to gfn */
+ unsigned long gfn = mfn_to_gfn(current->domain, gmfn);
+ __trace_var(event, 0/*!tsc*/, sizeof(gfn), (unsigned char*)&gfn);
+ }
+}
+
/* Pull all the entries on an out-of-sync page back into sync. */
static void _sh_resync(struct vcpu *v, mfn_t gmfn,
struct oos_fixup *fixup, mfn_t snp)
@@ -700,8 +715,8 @@ static void _sh_resync(struct vcpu *v, m
& ~SHF_L1_ANY));
ASSERT(!sh_page_has_multiple_shadows(mfn_to_page(gmfn)));
- SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, va=%lx\n",
- v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va);
+ SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
+ v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
/* Need to pull write access so the page *stays* in sync. */
if ( oos_remove_write_access(v, gmfn, fixup) )
@@ -719,6 +734,7 @@ static void _sh_resync(struct vcpu *v, m
/* Now we know all the entries are synced, and will stay that way */
pg->shadow_flags &= ~SHF_out_of_sync;
perfc_incr(shadow_resync);
+ trace_resync(TRC_SHADOW_RESYNC_FULL, gmfn);
}
@@ -930,6 +946,7 @@ void sh_resync_all(struct vcpu *v, int s
/* Update the shadows and leave the page OOS. */
if ( sh_skip_sync(v, oos[idx]) )
continue;
+ trace_resync(TRC_SHADOW_RESYNC_ONLY, oos[idx]);
_sh_resync_l1(other, oos[idx], oos_snapshot[idx]);
}
else
@@ -945,15 +962,16 @@ void sh_resync_all(struct vcpu *v, int s
}
}
-/* Allow a shadowed page to go out of sync */
+/* Allow a shadowed page to go out of sync. Unsyncs are traced in
+ * multi.c:sh_page_fault() */
int sh_unsync(struct vcpu *v, mfn_t gmfn)
{
struct page_info *pg;
ASSERT(shadow_locked_by_me(v->domain));
- SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx va %lx\n",
- v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va);
+ SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
+ v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
pg = mfn_to_page(gmfn);
@@ -970,6 +988,7 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn
pg->shadow_flags |= SHF_out_of_sync|SHF_oos_may_write;
oos_hash_add(v, gmfn);
perfc_incr(shadow_unsync);
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_UNSYNC);
return 1;
}
@@ -1005,6 +1024,7 @@ void shadow_promote(struct vcpu *v, mfn_
ASSERT(!test_bit(type, &page->shadow_flags));
set_bit(type, &page->shadow_flags);
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PROMOTE);
}
void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type)
@@ -1027,6 +1047,8 @@ void shadow_demote(struct vcpu *v, mfn_t
#endif
clear_bit(_PGC_page_table, &page->count_info);
}
+
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_DEMOTE);
}
/**************************************************************************/
@@ -1094,6 +1116,7 @@ sh_validate_guest_entry(struct vcpu *v,
ASSERT((page->shadow_flags
& (SHF_L4_64|SHF_L3_64|SHF_L2H_64|SHF_L2_64|SHF_L1_64)) == 0);
#endif
+ this_cpu(trace_shadow_path_flags) |= (result<<(TRCE_SFLAG_SET_CHANGED));
return result;
}
@@ -1295,6 +1318,18 @@ static void shadow_unhook_mappings(struc
}
}
+static inline void trace_shadow_prealloc_unpin(struct domain *d, mfn_t smfn)
+{
+ if ( tb_init_done )
+ {
+ /* Convert smfn to gfn */
+ unsigned long gfn;
+ ASSERT(mfn_valid(smfn));
+ gfn = mfn_to_gfn(d, _mfn(mfn_to_shadow_page(smfn)->backpointer));
+ __trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/,
+ sizeof(gfn), (unsigned char*)&gfn);
+ }
+}
/* Make sure there are at least count order-sized pages
* available in the shadow page pool. */
@@ -1327,6 +1362,7 @@ static void _shadow_prealloc(
smfn = shadow_page_to_mfn(sp);
/* Unpin this top-level shadow */
+ trace_shadow_prealloc_unpin(d, smfn);
sh_unpin(v, smfn);
/* See if that freed up enough space */
@@ -1343,6 +1379,7 @@ static void _shadow_prealloc(
{
if ( !pagetable_is_null(v2->arch.shadow_table[i]) )
{
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PREALLOC_UNHOOK);
shadow_unhook_mappings(v,
pagetable_get_mfn(v2->arch.shadow_table[i]));
@@ -2200,6 +2237,16 @@ void sh_destroy_shadow(struct vcpu *v, m
}
}
+static inline void trace_shadow_wrmap_bf(mfn_t gmfn)
+{
+ if ( tb_init_done )
+ {
+ /* Convert gmfn to gfn */
+ unsigned long gfn = mfn_to_gfn(current->domain, gmfn);
+ __trace_var(TRC_SHADOW_WRMAP_BF, 0/*!tsc*/, sizeof(gfn), (unsigned
char*)&gfn);
+ }
+}
+
/**************************************************************************/
/* Remove all writeable mappings of a guest frame from the shadow tables
* Returns non-zero if we need to flush TLBs.
@@ -2265,6 +2312,8 @@ int sh_remove_write_access(struct vcpu *
|| (pg->u.inuse.type_info & PGT_count_mask) == 0 )
return 0;
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP);
+
perfc_incr(shadow_writeable);
/* If this isn't a "normal" writeable page, the domain is trying to
@@ -2285,11 +2334,14 @@ int sh_remove_write_access(struct vcpu *
* and that mapping is likely to be in the current pagetable,
* in the guest's linear map (on non-HIGHPTE linux and windows)*/
-#define GUESS(_a, _h) do { \
+#define GUESS(_a, _h) do { \
if ( v->arch.paging.mode->shadow.guess_wrmap(v, (_a), gmfn) ) \
- perfc_incr(shadow_writeable_h_ ## _h); \
- if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \
- return 1; \
+ perfc_incr(shadow_writeable_h_ ## _h); \
+ if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \
+ { \
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND); \
+ return 1; \
+ } \
} while (0)
if ( level == 0 && fault_addr )
@@ -2377,6 +2429,7 @@ int sh_remove_write_access(struct vcpu *
#endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */
/* Brute-force search of all the shadows, by walking the hash */
+ trace_shadow_wrmap_bf(gmfn);
if ( level == 0 )
perfc_incr(shadow_writeable_bf_1);
else
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/mm/shadow/multi.c Fri Sep 12 14:47:40 2008 +0900
@@ -225,6 +225,7 @@ static uint32_t set_ad_bits(void *guest_
static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty)
{
guest_intpte_t old, new;
+ int ret = 0;
old = *(guest_intpte_t *)walk_p;
new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0);
@@ -234,10 +235,16 @@ static uint32_t set_ad_bits(void *guest_
* into the guest table as well. If the guest table has changed
* under out feet then leave it alone. */
*(guest_intpte_t *)walk_p = new;
- if ( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old )
- return 1;
- }
- return 0;
+ if( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old )
+ ret = 1;
+
+ /* FIXME -- this code is longer than necessary */
+ if(set_dirty)
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_AD);
+ else
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_A);
+ }
+ return ret;
}
/* This validation is called with lock held, and after write permission
@@ -1432,6 +1439,7 @@ static int shadow_set_l1e(struct vcpu *v
{
/* About to install a new reference */
if ( shadow_mode_refcounts(d) ) {
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF);
if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 )
{
/* Doesn't look like a pagetable. */
@@ -1461,6 +1469,7 @@ static int shadow_set_l1e(struct vcpu *v
{
shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
shadow_put_page_from_l1e(old_sl1e, d);
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
}
}
return flags;
@@ -2896,6 +2905,7 @@ static inline void check_for_early_unsha
{
perfc_incr(shadow_early_unshadow);
sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EARLY_UNSHADOW);
}
v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(gmfn);
#endif
@@ -3012,6 +3022,132 @@ static void sh_prefetch(struct vcpu *v,
#endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
+#if GUEST_PAGING_LEVELS == 4
+typedef u64 guest_va_t;
+typedef u64 guest_pa_t;
+#elif GUEST_PAGING_LEVELS == 3
+typedef u32 guest_va_t;
+typedef u64 guest_pa_t;
+#else
+typedef u32 guest_va_t;
+typedef u32 guest_pa_t;
+#endif
+
+static inline void trace_shadow_gen(u32 event, guest_va_t va)
+{
+ if ( tb_init_done )
+ {
+ event |= (GUEST_PAGING_LEVELS-2)<<8;
+ __trace_var(event, 0/*!tsc*/, sizeof(va), (unsigned char*)&va);
+ }
+}
+
+static inline void trace_shadow_fixup(guest_l1e_t gl1e,
+ guest_va_t va)
+{
+ if ( tb_init_done )
+ {
+ struct {
+ /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+ so put it first for alignment sake. */
+ guest_l1e_t gl1e;
+ guest_va_t va;
+ u32 flags;
+ } __attribute__((packed)) d;
+ u32 event;
+
+ event = TRC_SHADOW_FIXUP | ((GUEST_PAGING_LEVELS-2)<<8);
+
+ d.gl1e = gl1e;
+ d.va = va;
+ d.flags = this_cpu(trace_shadow_path_flags);
+
+ __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
+ }
+}
+
+static inline void trace_not_shadow_fault(guest_l1e_t gl1e,
+ guest_va_t va)
+{
+ if ( tb_init_done )
+ {
+ struct {
+ /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+ so put it first for alignment sake. */
+ guest_l1e_t gl1e;
+ guest_va_t va;
+ u32 flags;
+ } __attribute__((packed)) d;
+ u32 event;
+
+ event = TRC_SHADOW_NOT_SHADOW | ((GUEST_PAGING_LEVELS-2)<<8);
+
+ d.gl1e = gl1e;
+ d.va = va;
+ d.flags = this_cpu(trace_shadow_path_flags);
+
+ __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
+ }
+}
+
+static inline void trace_shadow_emulate_other(u32 event,
+ guest_va_t va,
+ gfn_t gfn)
+{
+ if ( tb_init_done )
+ {
+ struct {
+ /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+ so put it first for alignment sake. */
+#if GUEST_PAGING_LEVELS == 2
+ u32 gfn;
+#else
+ u64 gfn;
+#endif
+ guest_va_t va;
+ } __attribute__((packed)) d;
+
+ event |= ((GUEST_PAGING_LEVELS-2)<<8);
+
+ d.gfn=gfn_x(gfn);
+ d.va = va;
+
+ __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
+ }
+}
+
+#if GUEST_PAGING_LEVELS == 3
+static DEFINE_PER_CPU(guest_va_t,trace_emulate_initial_va);
+static DEFINE_PER_CPU(int,trace_extra_emulation_count);
+#endif
+static DEFINE_PER_CPU(guest_pa_t,trace_emulate_write_val);
+
+static inline void trace_shadow_emulate(guest_l1e_t gl1e, unsigned long va)
+{
+ if ( tb_init_done )
+ {
+ struct {
+ /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+ so put it first for alignment sake. */
+ guest_l1e_t gl1e, write_val;
+ guest_va_t va;
+ unsigned flags:29, emulation_count:3;
+ } __attribute__((packed)) d;
+ u32 event;
+
+ event = TRC_SHADOW_EMULATE | ((GUEST_PAGING_LEVELS-2)<<8);
+
+ d.gl1e = gl1e;
+ d.write_val.l1 = this_cpu(trace_emulate_write_val);
+ d.va = va;
+#if GUEST_PAGING_LEVELS == 3
+ d.emulation_count = this_cpu(trace_extra_emulation_count);
+#endif
+ d.flags = this_cpu(trace_shadow_path_flags);
+
+ __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
+ }
+}
/**************************************************************************/
/* Entry points into the shadow code */
@@ -3027,8 +3163,8 @@ static int sh_page_fault(struct vcpu *v,
{
struct domain *d = v->domain;
walk_t gw;
- gfn_t gfn;
- mfn_t gmfn, sl1mfn=_mfn(0);
+ gfn_t gfn = _gfn(0);
+ mfn_t gmfn, sl1mfn = _mfn(0);
shadow_l1e_t sl1e, *ptr_sl1e;
paddr_t gpa;
struct sh_emulate_ctxt emul_ctxt;
@@ -3043,7 +3179,7 @@ static int sh_page_fault(struct vcpu *v,
SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u, rip=%lx\n",
v->domain->domain_id, v->vcpu_id, va, regs->error_code,
- regs->rip);
+ regs->eip);
perfc_incr(shadow_fault);
@@ -3132,6 +3268,7 @@ static int sh_page_fault(struct vcpu *v,
reset_early_unshadow(v);
perfc_incr(shadow_fault_fast_gnp);
SHADOW_PRINTK("fast path not-present\n");
+ trace_shadow_gen(TRC_SHADOW_FAST_PROPAGATE, va);
return 0;
}
else
@@ -3145,6 +3282,7 @@ static int sh_page_fault(struct vcpu *v,
perfc_incr(shadow_fault_fast_mmio);
SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa);
reset_early_unshadow(v);
+ trace_shadow_gen(TRC_SHADOW_FAST_MMIO, va);
return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT)
? EXCRET_fault_fixed : 0);
}
@@ -3155,6 +3293,7 @@ static int sh_page_fault(struct vcpu *v,
* Retry and let the hardware give us the right fault next time. */
perfc_incr(shadow_fault_fast_fail);
SHADOW_PRINTK("fast path false alarm!\n");
+ trace_shadow_gen(TRC_SHADOW_FALSE_FAST_PATH, va);
return EXCRET_fault_fixed;
}
}
@@ -3190,7 +3329,7 @@ static int sh_page_fault(struct vcpu *v,
perfc_incr(shadow_fault_bail_real_fault);
SHADOW_PRINTK("not a shadow fault\n");
reset_early_unshadow(v);
- return 0;
+ goto propagate;
}
/* It's possible that the guest has put pagetables in memory that it has
@@ -3200,7 +3339,7 @@ static int sh_page_fault(struct vcpu *v,
if ( unlikely(d->is_shutting_down) )
{
SHADOW_PRINTK("guest is shutting down\n");
- return 0;
+ goto propagate;
}
/* What kind of access are we dealing with? */
@@ -3218,7 +3357,7 @@ static int sh_page_fault(struct vcpu *v,
SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n",
gfn_x(gfn), mfn_x(gmfn));
reset_early_unshadow(v);
- return 0;
+ goto propagate;
}
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
@@ -3229,6 +3368,8 @@ static int sh_page_fault(struct vcpu *v,
shadow_lock(d);
+ TRACE_CLEAR_PATH_FLAGS;
+
rc = gw_remove_write_accesses(v, va, &gw);
/* First bit set: Removed write access to a page. */
@@ -3281,6 +3422,7 @@ static int sh_page_fault(struct vcpu *v,
* Get out of the fault handler immediately. */
ASSERT(d->is_shutting_down);
shadow_unlock(d);
+ trace_shadow_gen(TRC_SHADOW_DOMF_DYING, va);
return 0;
}
@@ -3383,6 +3525,7 @@ static int sh_page_fault(struct vcpu *v,
d->arch.paging.log_dirty.fault_count++;
reset_early_unshadow(v);
+ trace_shadow_fixup(gw.l1e, va);
done:
sh_audit_gw(v, &gw);
SHADOW_PRINTK("fixed\n");
@@ -3405,6 +3548,8 @@ static int sh_page_fault(struct vcpu *v,
mfn_x(gmfn));
perfc_incr(shadow_fault_emulate_failed);
sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
+ trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_USER,
+ va, gfn);
goto done;
}
@@ -3421,6 +3566,8 @@ static int sh_page_fault(struct vcpu *v,
shadow_audit_tables(v);
shadow_unlock(d);
+ this_cpu(trace_emulate_write_val) = 0;
+
#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
early_emulation:
#endif
@@ -3446,6 +3593,8 @@ static int sh_page_fault(struct vcpu *v,
"injection: cr2=%#lx, mfn=%#lx\n",
va, mfn_x(gmfn));
sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
+ trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ,
+ va, gfn);
return EXCRET_fault_fixed;
}
}
@@ -3478,6 +3627,10 @@ static int sh_page_fault(struct vcpu *v,
* to support more operations in the emulator. More likely,
* though, this is a hint that this page should not be shadowed. */
shadow_remove_all_shadows(v, gmfn);
+
+ trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED,
+ va, gfn);
+ goto emulate_done;
}
#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
@@ -3504,7 +3657,8 @@ static int sh_page_fault(struct vcpu *v,
#if GUEST_PAGING_LEVELS == 3 /* PAE guest */
if ( r == X86EMUL_OKAY ) {
- int i;
+ int i, emulation_count=0;
+ this_cpu(trace_emulate_initial_va) = va;
/* Emulate up to four extra instructions in the hope of catching
* the "second half" of a 64-bit pagetable write. */
for ( i = 0 ; i < 4 ; i++ )
@@ -3513,10 +3667,12 @@ static int sh_page_fault(struct vcpu *v,
v->arch.paging.last_write_was_pt = 0;
r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
if ( r == X86EMUL_OKAY )
- {
+ {
+ emulation_count++;
if ( v->arch.paging.last_write_was_pt )
{
perfc_incr(shadow_em_ex_pt);
+
TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN);
break; /* Don't emulate past the other half of the write */
}
else
@@ -3525,12 +3681,16 @@ static int sh_page_fault(struct vcpu *v,
else
{
perfc_incr(shadow_em_ex_fail);
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_LAST_FAILED);
break; /* Don't emulate again if we failed! */
}
}
+ this_cpu(trace_extra_emulation_count)=emulation_count;
}
#endif /* PAE guest */
+ trace_shadow_emulate(gw.l1e, va);
+ emulate_done:
SHADOW_PRINTK("emulated\n");
return EXCRET_fault_fixed;
@@ -3543,6 +3703,7 @@ static int sh_page_fault(struct vcpu *v,
shadow_audit_tables(v);
reset_early_unshadow(v);
shadow_unlock(d);
+ trace_shadow_gen(TRC_SHADOW_MMIO, va);
return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT)
? EXCRET_fault_fixed : 0);
@@ -3552,6 +3713,10 @@ static int sh_page_fault(struct vcpu *v,
shadow_audit_tables(v);
reset_early_unshadow(v);
shadow_unlock(d);
+
+propagate:
+ trace_not_shadow_fault(gw.l1e, va);
+
return 0;
}
@@ -3990,7 +4155,7 @@ sh_detach_old_tables(struct vcpu *v)
sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
v->arch.paging.shadow.guest_vtable = NULL;
}
-#endif
+#endif // !NDEBUG
////
@@ -4446,6 +4611,7 @@ static int sh_guess_wrmap(struct vcpu *v
sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
r = shadow_set_l1e(v, sl1p, sl1e, sl1mfn);
ASSERT( !(r & SHADOW_SET_ERROR) );
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND);
return 1;
}
#endif
@@ -4800,7 +4966,7 @@ static void emulate_unmap_dest(struct vc
static int
sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
- u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
+ u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
{
void *addr;
@@ -4814,6 +4980,22 @@ sh_x86_emulate_write(struct vcpu *v, uns
shadow_lock(v->domain);
memcpy(addr, src, bytes);
+
+ if ( tb_init_done )
+ {
+#if GUEST_PAGING_LEVELS == 3
+ if ( vaddr == this_cpu(trace_emulate_initial_va) )
+ memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
+ else if ( (vaddr & ~(0x7UL)) == this_cpu(trace_emulate_initial_va) )
+ {
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATE_FULL_PT);
+ memcpy(&this_cpu(trace_emulate_write_val),
+ (void *)(((unsigned long) addr) & ~(0x7UL)),
GUEST_PTE_SIZE);
+ }
+#else
+ memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
+#endif
+ }
emulate_unmap_dest(v, addr, bytes, sh_ctxt);
shadow_audit_tables(v);
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/mm/shadow/private.h Fri Sep 12 14:47:40 2008 +0900
@@ -90,6 +90,43 @@ extern int shadow_audit_enable;
#define SHADOW_DEBUG_EMULATE 1
#define SHADOW_DEBUG_P2M 1
#define SHADOW_DEBUG_LOGDIRTY 0
+
+/******************************************************************************
+ * Tracing
+ */
+DECLARE_PER_CPU(uint32_t,trace_shadow_path_flags);
+
+#define TRACE_SHADOW_PATH_FLAG(_x) \
+ do { \
+ this_cpu(trace_shadow_path_flags) |= (1<<(_x)); \
+ } while(0)
+
+#define TRACE_CLEAR_PATH_FLAGS \
+ this_cpu(trace_shadow_path_flags) = 0
+
+enum {
+ TRCE_SFLAG_SET_AD,
+ TRCE_SFLAG_SET_A,
+ TRCE_SFLAG_SHADOW_L1_GET_REF,
+ TRCE_SFLAG_SHADOW_L1_PUT_REF,
+ TRCE_SFLAG_L2_PROPAGATE,
+ TRCE_SFLAG_SET_CHANGED,
+ TRCE_SFLAG_SET_FLUSH,
+ TRCE_SFLAG_SET_ERROR,
+ TRCE_SFLAG_DEMOTE,
+ TRCE_SFLAG_PROMOTE,
+ TRCE_SFLAG_WRMAP,
+ TRCE_SFLAG_WRMAP_GUESS_FOUND,
+ TRCE_SFLAG_WRMAP_BRUTE_FORCE,
+ TRCE_SFLAG_EARLY_UNSHADOW,
+ TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN,
+ TRCE_SFLAG_EMULATION_LAST_FAILED,
+ TRCE_SFLAG_EMULATE_FULL_PT,
+ TRCE_SFLAG_PREALLOC_UNHOOK,
+ TRCE_SFLAG_UNSYNC,
+ TRCE_SFLAG_OOS_FIXUP_ADD,
+ TRCE_SFLAG_OOS_FIXUP_EVICT,
+};
/******************************************************************************
* The shadow lock.
@@ -143,6 +180,12 @@ extern int shadow_audit_enable;
} while (0)
+/* Size (in bytes) of a guest PTE */
+#if GUEST_PAGING_LEVELS >= 3
+# define GUEST_PTE_SIZE 8
+#else
+# define GUEST_PTE_SIZE 4
+#endif
/******************************************************************************
* Auditing routines
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/physdev.c Fri Sep 12 14:47:40 2008 +0900
@@ -58,9 +58,6 @@ static int get_free_pirq(struct domain *
return i;
}
-/*
- * Caller hold the irq_lock
- */
static int map_domain_pirq(struct domain *d, int pirq, int vector,
struct physdev_map_pirq *map)
{
@@ -136,13 +133,12 @@ done:
return ret;
}
-/*
- * The pirq should has been unbound before this call
- */
+/* The pirq should have been unbound before this call. */
static int unmap_domain_pirq(struct domain *d, int pirq)
{
- int ret = 0;
- int vector;
+ unsigned long flags;
+ irq_desc_t *desc;
+ int vector, ret = 0;
if ( d == NULL || pirq < 0 || pirq >= NR_PIRQS )
return -EINVAL;
@@ -159,33 +155,29 @@ static int unmap_domain_pirq(struct doma
gdprintk(XENLOG_G_ERR, "domain %X: pirq %x not mapped still\n",
d->domain_id, pirq);
ret = -EINVAL;
- }
- else
- {
- unsigned long flags;
- irq_desc_t *desc;
-
- desc = &irq_desc[vector];
- spin_lock_irqsave(&desc->lock, flags);
- if ( desc->msi_desc )
- pci_disable_msi(vector);
-
- if ( desc->handler == &pci_msi_type )
- {
- /* MSI is not shared, so should be released already */
- BUG_ON(desc->status & IRQ_GUEST);
- irq_desc[vector].handler = &no_irq_type;
- }
- spin_unlock_irqrestore(&desc->lock, flags);
-
- d->arch.pirq_vector[pirq] = d->arch.vector_pirq[vector] = 0;
- }
+ goto done;
+ }
+
+ desc = &irq_desc[vector];
+ spin_lock_irqsave(&desc->lock, flags);
+ if ( desc->msi_desc )
+ pci_disable_msi(vector);
+
+ if ( desc->handler == &pci_msi_type )
+ {
+ /* MSI is not shared, so should be released already */
+ BUG_ON(desc->status & IRQ_GUEST);
+ irq_desc[vector].handler = &no_irq_type;
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ d->arch.pirq_vector[pirq] = d->arch.vector_pirq[vector] = 0;
ret = irq_deny_access(d, pirq);
-
if ( ret )
gdprintk(XENLOG_G_ERR, "deny irq %x access failed\n", pirq);
+ done:
return ret;
}
@@ -194,10 +186,6 @@ static int physdev_map_pirq(struct physd
struct domain *d;
int vector, pirq, ret = 0;
unsigned long flags;
-
- /* if msi_enable is not enabled, map always succeeds */
- if ( !msi_enable )
- return 0;
if ( !IS_PRIV(current->domain) )
return -EPERM;
@@ -308,14 +296,8 @@ static int physdev_unmap_pirq(struct phy
unsigned long flags;
int ret;
- if ( !msi_enable )
- return 0;
-
if ( !IS_PRIV(current->domain) )
return -EPERM;
-
- if ( !unmap )
- return -EINVAL;
if ( unmap->domid == DOMID_SELF )
d = rcu_lock_domain(current->domain);
@@ -323,14 +305,12 @@ static int physdev_unmap_pirq(struct phy
d = rcu_lock_domain_by_id(unmap->domid);
if ( d == NULL )
- {
- rcu_unlock_domain(d);
return -ESRCH;
- }
spin_lock_irqsave(&d->arch.irq_lock, flags);
ret = unmap_domain_pirq(d, unmap->pirq);
spin_unlock_irqrestore(&d->arch.irq_lock, flags);
+
rcu_unlock_domain(d);
return ret;
@@ -452,20 +432,14 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
irq = irq_op.irq;
ret = -EINVAL;
- if ( ((irq < 0) && (irq != AUTO_ASSIGN)) || (irq >= NR_IRQS) )
+ if ( (irq < 0) || (irq >= NR_IRQS) )
break;
irq_op.vector = assign_irq_vector(irq);
- ret = 0;
-
- if ( msi_enable )
- {
- spin_lock_irqsave(&dom0->arch.irq_lock, flags);
- if ( irq != AUTO_ASSIGN )
- ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL);
- spin_unlock_irqrestore(&dom0->arch.irq_lock, flags);
- }
+ spin_lock_irqsave(&dom0->arch.irq_lock, flags);
+ ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL);
+ spin_unlock_irqrestore(&dom0->arch.irq_lock, flags);
if ( copy_to_guest(arg, &irq_op, 1) != 0 )
ret = -EFAULT;
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/platform_hypercall.c Fri Sep 12 14:47:40 2008 +0900
@@ -192,6 +192,10 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
break;
case XENPF_firmware_info:
+ ret = xsm_firmware_info();
+ if ( ret )
+ break;
+
switch ( op->u.firmware_info.type )
{
case XEN_FW_DISK_INFO: {
@@ -280,10 +284,18 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
break;
case XENPF_enter_acpi_sleep:
+ ret = xsm_acpi_sleep();
+ if ( ret )
+ break;
+
ret = acpi_enter_sleep(&op->u.enter_acpi_sleep);
break;
case XENPF_change_freq:
+ ret = xsm_change_freq();
+ if ( ret )
+ break;
+
ret = -ENOSYS;
if ( cpufreq_controller != FREQCTL_dom0_kernel )
break;
@@ -305,6 +317,10 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
cpumask_t cpumap;
XEN_GUEST_HANDLE(uint8) cpumap_bitmap;
XEN_GUEST_HANDLE(uint64) idletimes;
+
+ ret = xsm_getidletime();
+ if ( ret )
+ break;
ret = -ENOSYS;
if ( cpufreq_controller != FREQCTL_dom0_kernel )
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/smpboot.c Fri Sep 12 14:47:40 2008 +0900
@@ -1225,15 +1225,6 @@ int __cpu_disable(void)
if (cpu == 0)
return -EBUSY;
- /*
- * Only S3 is using this path, and thus idle vcpus are running on all
- * APs when we are called. To support full cpu hotplug, other
- * notification mechanisms should be introduced (e.g., migrate vcpus
- * off this physical cpu before rendezvous point).
- */
- if (!is_idle_vcpu(current))
- return -EINVAL;
-
local_irq_disable();
clear_local_APIC();
/* Allow any queued timer interrupts to get serviced */
@@ -1249,6 +1240,9 @@ int __cpu_disable(void)
fixup_irqs(map);
/* It's now safe to remove this processor from the online map */
cpu_clear(cpu, cpu_online_map);
+
+ cpu_disable_scheduler();
+
return 0;
}
@@ -1275,28 +1269,6 @@ static int take_cpu_down(void *unused)
return __cpu_disable();
}
-/*
- * XXX: One important thing missed here is to migrate vcpus
- * from dead cpu to other online ones and then put whole
- * system into a stop state. It assures a safe environment
- * for a cpu hotplug/remove at normal running state.
- *
- * However for xen PM case, at this point:
- * -> All other domains should be notified with PM event,
- * and then in following states:
- * * Suspend state, or
- * * Paused state, which is a force step to all
- * domains if they do nothing to suspend
- * -> All vcpus of dom0 (except vcpu0) have already beem
- * hot removed
- * with the net effect that all other cpus only have idle vcpu
- * running. In this special case, we can avoid vcpu migration
- * then and system can be considered in a stop state.
- *
- * So current cpu hotplug is a special version for PM specific
- * usage, and need more effort later for full cpu hotplug.
- * (ktian1)
- */
int cpu_down(unsigned int cpu)
{
int err = 0;
@@ -1304,6 +1276,12 @@ int cpu_down(unsigned int cpu)
spin_lock(&cpu_add_remove_lock);
if (num_online_cpus() == 1) {
err = -EBUSY;
+ goto out;
+ }
+
+ /* Can not offline BSP */
+ if (cpu == 0) {
+ err = -EINVAL;
goto out;
}
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/time.c Fri Sep 12 14:47:40 2008 +0900
@@ -993,15 +993,16 @@ static void local_time_calibration(void)
* All CPUS snapshot their local TSC and extrapolation of system time.
*/
struct calibration_rendezvous {
+ cpumask_t cpu_calibration_map;
atomic_t nr_cpus;
s_time_t master_stime;
};
static void time_calibration_rendezvous(void *_r)
{
- unsigned int total_cpus = num_online_cpus();
struct cpu_calibration *c = &this_cpu(cpu_calibration);
struct calibration_rendezvous *r = _r;
+ unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
if ( smp_processor_id() == 0 )
{
@@ -1029,11 +1030,13 @@ static void time_calibration(void *unuse
static void time_calibration(void *unused)
{
struct calibration_rendezvous r = {
+ .cpu_calibration_map = cpu_online_map,
.nr_cpus = ATOMIC_INIT(0)
};
/* @wait=1 because we must wait for all cpus before freeing @r. */
- on_each_cpu(time_calibration_rendezvous, &r, 0, 1);
+ on_selected_cpus(r.cpu_calibration_map,
+ time_calibration_rendezvous, &r, 0, 1);
}
void init_percpu_time(void)
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/traps.c Fri Sep 12 14:47:40 2008 +0900
@@ -47,7 +47,7 @@
#include <xen/version.h>
#include <xen/kexec.h>
#include <xen/trace.h>
-#include <asm/paging.h>
+#include <xen/paging.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/atomic.h>
@@ -2116,6 +2116,36 @@ static int emulate_privileged_op(struct
if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
goto fail;
break;
+ case MSR_AMD64_NB_CFG:
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
+ boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x11 )
+ goto fail;
+ if ( !IS_PRIV(v->domain) )
+ break;
+ if ( (rdmsr_safe(MSR_AMD64_NB_CFG, l, h) != 0) ||
+ (eax != l) ||
+ ((edx ^ h) & ~(1 << (AMD64_NB_CFG_CF8_EXT_ENABLE_BIT - 32))) )
+ goto invalid;
+ if ( wrmsr_safe(MSR_AMD64_NB_CFG, eax, edx) != 0 )
+ goto fail;
+ break;
+ case MSR_FAM10H_MMIO_CONF_BASE:
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
+ boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x11 )
+ goto fail;
+ if ( !IS_PRIV(v->domain) )
+ break;
+ if ( (rdmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, l, h) != 0) ||
+ (((((u64)h << 32) | l) ^ res) &
+ ~((1 << FAM10H_MMIO_CONF_ENABLE_BIT) |
+ (FAM10H_MMIO_CONF_BUSRANGE_MASK <<
+ FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
+ ((u64)FAM10H_MMIO_CONF_BASE_MASK <<
+ FAM10H_MMIO_CONF_BASE_SHIFT))) )
+ goto invalid;
+ if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, eax, edx) != 0 )
+ goto fail;
+ break;
case MSR_IA32_PERF_CTL:
if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
goto fail;
@@ -2124,11 +2154,18 @@ static int emulate_privileged_op(struct
if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
goto fail;
break;
+ case MSR_IA32_THERM_CONTROL:
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+ goto fail;
+ if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
+ goto fail;
+ break;
default:
if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) )
break;
if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
(eax != l) || (edx != h) )
+ invalid:
gdprintk(XENLOG_WARNING, "Domain attempted WRMSR %p from "
"%08x:%08x to %08x:%08x.\n",
_p(regs->ecx), h, l, edx, eax);
@@ -2198,6 +2235,12 @@ static int emulate_privileged_op(struct
regs->eax |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
MSR_IA32_MISC_ENABLE_XTPR_DISABLE;
+ break;
+ case MSR_IA32_THERM_CONTROL:
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+ goto fail;
+ if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
+ goto fail;
break;
default:
if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) )
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/domain.c
--- a/xen/common/domain.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/domain.c Fri Sep 12 14:47:40 2008 +0900
@@ -651,9 +651,11 @@ void vcpu_reset(struct vcpu *v)
set_bit(_VPF_down, &v->pause_flags);
+ clear_bit(v->vcpu_id, d->poll_mask);
+ v->poll_evtchn = 0;
+
v->fpu_initialised = 0;
v->fpu_dirtied = 0;
- v->is_polling = 0;
v->is_initialised = 0;
v->nmi_pending = 0;
v->mce_pending = 0;
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/domctl.c
--- a/xen/common/domctl.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/domctl.c Fri Sep 12 14:47:40 2008 +0900
@@ -655,9 +655,6 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
spin_lock(&d->page_alloc_lock);
if ( new_max >= d->tot_pages )
{
- ret = guest_physmap_max_mem_pages(d, new_max);
- if ( ret != 0 )
- break;
d->max_pages = new_max;
ret = 0;
}
@@ -729,16 +726,11 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
if ( d == NULL )
break;
- ret = xsm_irq_permission(d, pirq, op->u.irq_permission.allow_access);
- if ( ret )
- goto irq_permission_out;
-
if ( op->u.irq_permission.allow_access )
ret = irq_permit_access(d, pirq);
else
ret = irq_deny_access(d, pirq);
- irq_permission_out:
rcu_unlock_domain(d);
}
break;
@@ -757,17 +749,12 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
d = rcu_lock_domain_by_id(op->domain);
if ( d == NULL )
break;
-
- ret = xsm_iomem_permission(d, mfn,
op->u.iomem_permission.allow_access);
- if ( ret )
- goto iomem_permission_out;
if ( op->u.iomem_permission.allow_access )
ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
else
ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
- iomem_permission_out:
rcu_unlock_domain(d);
}
break;
@@ -813,6 +800,12 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
{
put_domain(e);
goto set_target_out;
+ }
+
+ ret = xsm_set_target(d, e);
+ if ( ret ) {
+ put_domain(e);
+ goto set_target_out;
}
/* Hold reference on @e until we destroy @d. */
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/event_channel.c
--- a/xen/common/event_channel.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/event_channel.c Fri Sep 12 14:47:40 2008 +0900
@@ -545,6 +545,7 @@ static int evtchn_set_pending(struct vcp
static int evtchn_set_pending(struct vcpu *v, int port)
{
struct domain *d = v->domain;
+ int vcpuid;
/*
* The following bit operations must happen in strict order.
@@ -564,15 +565,19 @@ static int evtchn_set_pending(struct vcp
}
/* Check if some VCPU might be polling for this event. */
- if ( unlikely(d->is_polling) )
- {
- d->is_polling = 0;
- smp_mb(); /* check vcpu poll-flags /after/ clearing domain poll-flag */
- for_each_vcpu ( d, v )
+ if ( likely(bitmap_empty(d->poll_mask, MAX_VIRT_CPUS)) )
+ return 0;
+
+ /* Wake any interested (or potentially interested) pollers. */
+ for ( vcpuid = find_first_bit(d->poll_mask, MAX_VIRT_CPUS);
+ vcpuid < MAX_VIRT_CPUS;
+ vcpuid = find_next_bit(d->poll_mask, MAX_VIRT_CPUS, vcpuid+1) )
+ {
+ v = d->vcpu[vcpuid];
+ if ( ((v->poll_evtchn <= 0) || (v->poll_evtchn == port)) &&
+ test_and_clear_bit(vcpuid, d->poll_mask) )
{
- if ( !v->is_polling )
- continue;
- v->is_polling = 0;
+ v->poll_evtchn = 0;
vcpu_unblock(v);
}
}
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/rangeset.c
--- a/xen/common/rangeset.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/rangeset.c Fri Sep 12 14:47:40 2008 +0900
@@ -10,6 +10,7 @@
#include <xen/sched.h>
#include <xen/errno.h>
#include <xen/rangeset.h>
+#include <xsm/xsm.h>
/* An inclusive range [s,e] and pointer to next range in ascending order. */
struct range {
@@ -95,6 +96,10 @@ int rangeset_add_range(
{
struct range *x, *y;
int rc = 0;
+
+ rc = xsm_add_range(r->domain, r->name, s, e);
+ if ( rc )
+ return rc;
ASSERT(s <= e);
@@ -164,6 +169,10 @@ int rangeset_remove_range(
struct range *x, *y, *t;
int rc = 0;
+ rc = xsm_remove_range(r->domain, r->name, s, e);
+ if ( rc )
+ return rc;
+
ASSERT(s <= e);
spin_lock(&r->lock);
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/sched_credit.c Fri Sep 12 14:47:40 2008 +0900
@@ -1107,6 +1107,10 @@ csched_load_balance(int cpu, struct csch
BUG_ON( cpu != snext->vcpu->processor );
+ /* If this CPU is going offline we shouldn't steal work. */
+ if ( unlikely(!cpu_online(cpu)) )
+ goto out;
+
if ( snext->pri == CSCHED_PRI_IDLE )
CSCHED_STAT_CRANK(load_balance_idle);
else if ( snext->pri == CSCHED_PRI_TS_OVER )
@@ -1149,6 +1153,7 @@ csched_load_balance(int cpu, struct csch
return speer;
}
+ out:
/* Failed to find more important work elsewhere... */
__runq_remove(snext);
return snext;
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/schedule.c
--- a/xen/common/schedule.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/schedule.c Fri Sep 12 14:47:40 2008 +0900
@@ -63,11 +63,31 @@ static struct scheduler ops;
(( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \
: (typeof(ops.fn(__VA_ARGS__)))0 )
+static inline void trace_runstate_change(struct vcpu *v, int new_state)
+{
+ struct { uint32_t vcpu:16, domain:16; } d;
+ uint32_t event;
+
+ if ( likely(!tb_init_done) )
+ return;
+
+ d.vcpu = v->vcpu_id;
+ d.domain = v->domain->domain_id;
+
+ event = TRC_SCHED_RUNSTATE_CHANGE;
+ event |= ( v->runstate.state & 0x3 ) << 8;
+ event |= ( new_state & 0x3 ) << 4;
+
+ __trace_var(event, 1/*tsc*/, sizeof(d), (unsigned char *)&d);
+}
+
static inline void vcpu_runstate_change(
struct vcpu *v, int new_state, s_time_t new_entry_time)
{
ASSERT(v->runstate.state != new_state);
ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock));
+
+ trace_runstate_change(v, new_state);
v->runstate.time[v->runstate.state] +=
new_entry_time - v->runstate.state_entry_time;
@@ -198,6 +218,27 @@ void vcpu_wake(struct vcpu *v)
TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
}
+void vcpu_unblock(struct vcpu *v)
+{
+ if ( !test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
+ return;
+
+ /* Polling period ends when a VCPU is unblocked. */
+ if ( unlikely(v->poll_evtchn != 0) )
+ {
+ v->poll_evtchn = 0;
+ /*
+ * We *must* re-clear _VPF_blocked to avoid racing other wakeups of
+ * this VCPU (and it then going back to sleep on poll_mask).
+ * Test-and-clear is idiomatic and ensures clear_bit not reordered.
+ */
+ if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
+ clear_bit(_VPF_blocked, &v->pause_flags);
+ }
+
+ vcpu_wake(v);
+}
+
static void vcpu_migrate(struct vcpu *v)
{
unsigned long flags;
@@ -247,6 +288,48 @@ void vcpu_force_reschedule(struct vcpu *
}
}
+/*
+ * This function is used by cpu_hotplug code from stop_machine context.
+ * Hence we can avoid needing to take the
+ */
+void cpu_disable_scheduler(void)
+{
+ struct domain *d;
+ struct vcpu *v;
+ unsigned int cpu = smp_processor_id();
+
+ for_each_domain ( d )
+ {
+ for_each_vcpu ( d, v )
+ {
+ if ( is_idle_vcpu(v) )
+ continue;
+
+ if ( (cpus_weight(v->cpu_affinity) == 1) &&
+ cpu_isset(cpu, v->cpu_affinity) )
+ {
+ printk("Breaking vcpu affinity for domain %d vcpu %d\n",
+ v->domain->domain_id, v->vcpu_id);
+ cpus_setall(v->cpu_affinity);
+ }
+
+ /*
+ * Migrate single-shot timers to CPU0. A new cpu will automatically
+ * be chosen when the timer is next re-set.
+ */
+ if ( v->singleshot_timer.cpu == cpu )
+ migrate_timer(&v->singleshot_timer, 0);
+
+ if ( v->processor == cpu )
+ {
+ set_bit(_VPF_migrating, &v->pause_flags);
+ vcpu_sleep_nosync(v);
+ vcpu_migrate(v);
+ }
+ }
+ }
+}
+
static int __vcpu_set_affinity(
struct vcpu *v, cpumask_t *affinity,
bool_t old_lock_status, bool_t new_lock_status)
@@ -337,7 +420,7 @@ static long do_poll(struct sched_poll *s
struct vcpu *v = current;
struct domain *d = v->domain;
evtchn_port_t port;
- long rc = 0;
+ long rc;
unsigned int i;
/* Fairly arbitrary limit. */
@@ -348,11 +431,24 @@ static long do_poll(struct sched_poll *s
return -EFAULT;
set_bit(_VPF_blocked, &v->pause_flags);
- v->is_polling = 1;
- d->is_polling = 1;
-
+ v->poll_evtchn = -1;
+ set_bit(v->vcpu_id, d->poll_mask);
+
+#ifndef CONFIG_X86 /* set_bit() implies mb() on x86 */
/* Check for events /after/ setting flags: avoids wakeup waiting race. */
- smp_wmb();
+ smp_mb();
+
+ /*
+ * Someone may have seen we are blocked but not that we are polling, or
+ * vice versa. We are certainly being woken, so clean up and bail. Beyond
+ * this point others can be guaranteed to clean up for us if they wake us.
+ */
+ rc = 0;
+ if ( (v->poll_evtchn == 0) ||
+ !test_bit(_VPF_blocked, &v->pause_flags) ||
+ !test_bit(v->vcpu_id, d->poll_mask) )
+ goto out;
+#endif
for ( i = 0; i < sched_poll->nr_ports; i++ )
{
@@ -369,6 +465,9 @@ static long do_poll(struct sched_poll *s
goto out;
}
+ if ( sched_poll->nr_ports == 1 )
+ v->poll_evtchn = port;
+
if ( sched_poll->timeout != 0 )
set_timer(&v->poll_timer, sched_poll->timeout);
@@ -378,7 +477,8 @@ static long do_poll(struct sched_poll *s
return 0;
out:
- v->is_polling = 0;
+ v->poll_evtchn = 0;
+ clear_bit(v->vcpu_id, d->poll_mask);
clear_bit(_VPF_blocked, &v->pause_flags);
return rc;
}
@@ -628,7 +728,9 @@ static void vcpu_periodic_timer_work(str
return;
periodic_next_event = v->periodic_last_event + v->periodic_period;
- if ( now > periodic_next_event )
+
+ /* The timer subsystem may call us up to TIME_SLOP ahead of deadline. */
+ if ( (now + TIME_SLOP) > periodic_next_event )
{
send_timer_event(v);
v->periodic_last_event = now;
@@ -758,11 +860,8 @@ static void poll_timer_fn(void *data)
{
struct vcpu *v = data;
- if ( !v->is_polling )
- return;
-
- v->is_polling = 0;
- vcpu_unblock(v);
+ if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
+ vcpu_unblock(v);
}
/* Initialise the data structures. */
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/sysctl.c
--- a/xen/common/sysctl.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/sysctl.c Fri Sep 12 14:47:40 2008 +0900
@@ -149,6 +149,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
char c;
uint32_t i;
+ ret = xsm_debug_keys();
+ if ( ret )
+ break;
+
for ( i = 0; i < op->u.debug_keys.nr_keys; i++ )
{
if ( copy_from_guest_offset(&c, op->u.debug_keys.keys, i, 1) )
@@ -166,6 +170,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
nr_cpus = min_t(uint32_t, op->u.getcpuinfo.max_cpus, NR_CPUS);
+ ret = xsm_getcpuinfo();
+ if ( ret )
+ break;
+
for ( i = 0; i < nr_cpus; i++ )
{
/* Assume no holes in idle-vcpu map. */
@@ -188,6 +196,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
case XEN_SYSCTL_availheap:
{
+ ret = xsm_availheap();
+ if ( ret )
+ break;
+
op->u.availheap.avail_bytes = avail_domheap_pages_region(
op->u.availheap.node,
op->u.availheap.min_bitwidth,
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/trace.c
--- a/xen/common/trace.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/trace.c Fri Sep 12 14:47:40 2008 +0900
@@ -58,6 +58,7 @@ static int t_buf_highwater;
/* Number of records lost due to per-CPU trace buffer being full. */
static DEFINE_PER_CPU(unsigned long, lost_records);
+static DEFINE_PER_CPU(unsigned long, lost_records_first_tsc);
/* a flag recording whether initialization has been done */
/* or more properly, if the tbuf subsystem is enabled right now */
@@ -147,6 +148,31 @@ static int tb_set_size(int size)
return 0;
}
+int trace_will_trace_event(u32 event)
+{
+ if ( !tb_init_done )
+ return 0;
+
+ /*
+ * Copied from __trace_var()
+ */
+ if ( (tb_event_mask & event) == 0 )
+ return 0;
+
+ /* match class */
+ if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
+ return 0;
+
+ /* then match subclass */
+ if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
+ & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
+ return 0;
+
+ if ( !cpu_isset(smp_processor_id(), tb_cpu_mask) )
+ return 0;
+
+ return 1;
+}
/**
* init_trace_bufs - performs initialization of the per-cpu trace buffers.
@@ -354,22 +380,27 @@ static inline int insert_wrap_record(str
NULL);
}
-#define LOST_REC_SIZE 8
+#define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
static inline int insert_lost_records(struct t_buf *buf)
{
struct {
u32 lost_records;
- } ed;
-
+ u32 did:16, vid:16;
+ u64 first_tsc;
+ } __attribute__((packed)) ed;
+
+ ed.vid = current->vcpu_id;
+ ed.did = current->domain->domain_id;
ed.lost_records = this_cpu(lost_records);
+ ed.first_tsc = this_cpu(lost_records_first_tsc);
this_cpu(lost_records) = 0;
return __insert_record(buf,
TRC_LOST_RECORDS,
sizeof(ed),
- 0 /* !cycles */,
+ 1 /* cycles */,
LOST_REC_SIZE,
(unsigned char *)&ed);
}
@@ -401,7 +432,8 @@ void __trace_var(u32 event, int cycles,
int extra_word;
int started_below_highwater;
- ASSERT(tb_init_done);
+ if( !tb_init_done )
+ return;
/* Convert byte count into word count, rounding up */
extra_word = (extra / sizeof(u32));
@@ -479,7 +511,8 @@ void __trace_var(u32 event, int cycles,
/* Do we have enough space for everything? */
if ( total_size > bytes_to_tail )
{
- this_cpu(lost_records)++;
+ if ( ++this_cpu(lost_records) == 1 )
+ this_cpu(lost_records_first_tsc)=(u64)get_cycles();
local_irq_restore(flags);
return;
}
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/drivers/acpi/hwregs.c
--- a/xen/drivers/acpi/hwregs.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/drivers/acpi/hwregs.c Fri Sep 12 14:47:40 2008 +0900
@@ -239,11 +239,13 @@ acpi_status acpi_set_register(u32 regist
case ACPI_REGISTER_PM2_CONTROL:
+#if 0 /* Redundant read in original Linux code. */
status = acpi_hw_register_read(ACPI_REGISTER_PM2_CONTROL,
®ister_value);
if (ACPI_FAILURE(status)) {
goto unlock_and_exit;
}
+#endif
ACPI_DEBUG_PRINT((ACPI_DB_IO,
"PM2 control: Read %X from %8.8X%8.8X\n",
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/drivers/passthrough/iommu.c Fri Sep 12 14:47:40 2008 +0900
@@ -33,11 +33,13 @@ int amd_iov_detect(void);
* pv Enable IOMMU for PV domains
* no-pv Disable IOMMU for PV domains (default)
* force|required Don't boot unless IOMMU is enabled
+ * passthrough Bypass VT-d translation for Dom0
*/
custom_param("iommu", parse_iommu_param);
int iommu_enabled = 0;
int iommu_pv_enabled = 0;
int force_iommu = 0;
+int iommu_passthrough = 0;
static void __init parse_iommu_param(char *s)
{
@@ -58,6 +60,8 @@ static void __init parse_iommu_param(cha
iommu_pv_enabled = 0;
else if ( !strcmp(s, "force") || !strcmp(s, "required") )
force_iommu = 1;
+ else if ( !strcmp(s, "passthrough") )
+ iommu_passthrough = 1;
s = ss + 1;
} while ( ss );
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c Fri Sep 12 14:47:40 2008 +0900
@@ -1090,12 +1090,13 @@ static int domain_context_mapping_one(
}
spin_lock_irqsave(&iommu->lock, flags);
-
-#ifdef CONTEXT_PASSTHRU
- if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
+ if ( iommu_passthrough &&
+ ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
+ {
context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
+ agaw = level_to_agaw(iommu->nr_pt_levels);
+ }
else
-#endif
{
/* Ensure we have pagetables allocated down to leaf PTE. */
if ( hd->pgd_maddr == 0 )
@@ -1459,11 +1460,13 @@ int intel_iommu_map_page(
u64 pg_maddr;
int pte_present;
-#ifdef CONTEXT_PASSTHRU
+ drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+ iommu = drhd->iommu;
+
/* do nothing if dom0 and iommu supports pass thru */
- if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
+ if ( iommu_passthrough &&
+ ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
return 0;
-#endif
pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
if ( pg_maddr == 0 )
@@ -1500,11 +1503,10 @@ int intel_iommu_unmap_page(struct domain
drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
iommu = drhd->iommu;
-#ifdef CONTEXT_PASSTHRU
/* do nothing if dom0 and iommu supports pass thru */
- if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
+ if ( iommu_passthrough &&
+ ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
return 0;
-#endif
dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-ia64/shadow.h Fri Sep 12 14:47:40 2008 +0900
@@ -63,8 +63,6 @@ shadow_mark_page_dirty(struct domain *d,
return 0;
}
-#define guest_physmap_max_mem_pages(d, n) (0)
-
#endif // _XEN_SHADOW_H
/*
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/bitops.h
--- a/xen/include/asm-x86/bitops.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/bitops.h Fri Sep 12 14:47:40 2008 +0900
@@ -116,8 +116,8 @@ static inline void __clear_bit(int nr, v
__clear_bit(nr, addr); \
})
-#define smp_mb__before_clear_bit() barrier()
-#define smp_mb__after_clear_bit() barrier()
+#define smp_mb__before_clear_bit() ((void)0)
+#define smp_mb__after_clear_bit() ((void)0)
/**
* __change_bit - Toggle a bit in memory
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/guest_access.h
--- a/xen/include/asm-x86/guest_access.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/guest_access.h Fri Sep 12 14:47:40 2008 +0900
@@ -8,7 +8,7 @@
#define __ASM_X86_GUEST_ACCESS_H__
#include <asm/uaccess.h>
-#include <asm/shadow.h>
+#include <asm/paging.h>
#include <asm/hvm/support.h>
#include <asm/hvm/guest_access.h>
@@ -87,10 +87,10 @@
* Allows use of faster __copy_* functions.
*/
#define guest_handle_okay(hnd, nr) \
- (shadow_mode_external(current->domain) || \
+ (paging_mode_external(current->domain) || \
array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
#define guest_handle_subrange_okay(hnd, first, last) \
- (shadow_mode_external(current->domain) || \
+ (paging_mode_external(current->domain) || \
array_access_ok((hnd).p + (first), \
(last)-(first)+1, \
sizeof(*(hnd).p)))
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/hvm/trace.h
--- a/xen/include/asm-x86/hvm/trace.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/hvm/trace.h Fri Sep 12 14:47:40 2008 +0900
@@ -56,16 +56,13 @@
#define TRC_PAR_LONG(par) (par)
#endif
-#define HVMTRACE_ND(evt, cycles, vcpu, count, d1, d2, d3, d4, d5, d6) \
+#define HVMTRACE_ND(evt, cycles, count, d1, d2, d3, d4, d5, d6) \
do { \
if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt ) \
{ \
struct { \
- u32 did:16, vid:16; \
u32 d[6]; \
} _d; \
- _d.did=(vcpu)->domain->domain_id; \
- _d.vid=(vcpu)->vcpu_id; \
_d.d[0]=(d1); \
_d.d[1]=(d2); \
_d.d[2]=(d3); \
@@ -77,32 +74,32 @@
} \
} while(0)
-#define HVMTRACE_6D(evt, vcpu, d1, d2, d3, d4, d5, d6) \
- HVMTRACE_ND(evt, 0, vcpu, 6, d1, d2, d3, d4, d5, d6)
-#define HVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5) \
- HVMTRACE_ND(evt, 0, vcpu, 5, d1, d2, d3, d4, d5, 0)
-#define HVMTRACE_4D(evt, vcpu, d1, d2, d3, d4) \
- HVMTRACE_ND(evt, 0, vcpu, 4, d1, d2, d3, d4, 0, 0)
-#define HVMTRACE_3D(evt, vcpu, d1, d2, d3) \
- HVMTRACE_ND(evt, 0, vcpu, 3, d1, d2, d3, 0, 0, 0)
-#define HVMTRACE_2D(evt, vcpu, d1, d2) \
- HVMTRACE_ND(evt, 0, vcpu, 2, d1, d2, 0, 0, 0, 0)
-#define HVMTRACE_1D(evt, vcpu, d1) \
- HVMTRACE_ND(evt, 0, vcpu, 1, d1, 0, 0, 0, 0, 0)
-#define HVMTRACE_0D(evt, vcpu) \
- HVMTRACE_ND(evt, 0, vcpu, 0, 0, 0, 0, 0, 0, 0)
+#define HVMTRACE_6D(evt, d1, d2, d3, d4, d5, d6) \
+ HVMTRACE_ND(evt, 0, 6, d1, d2, d3, d4, d5, d6)
+#define HVMTRACE_5D(evt, d1, d2, d3, d4, d5) \
+ HVMTRACE_ND(evt, 0, 5, d1, d2, d3, d4, d5, 0)
+#define HVMTRACE_4D(evt, d1, d2, d3, d4) \
+ HVMTRACE_ND(evt, 0, 4, d1, d2, d3, d4, 0, 0)
+#define HVMTRACE_3D(evt, d1, d2, d3) \
+ HVMTRACE_ND(evt, 0, 3, d1, d2, d3, 0, 0, 0)
+#define HVMTRACE_2D(evt, d1, d2) \
+ HVMTRACE_ND(evt, 0, 2, d1, d2, 0, 0, 0, 0)
+#define HVMTRACE_1D(evt, d1) \
+ HVMTRACE_ND(evt, 0, 1, d1, 0, 0, 0, 0, 0)
+#define HVMTRACE_0D(evt) \
+ HVMTRACE_ND(evt, 0, 0, 0, 0, 0, 0, 0, 0)
#ifdef __x86_64__
-#define HVMTRACE_LONG_1D(evt, vcpu, d1) \
- HVMTRACE_2D(evt ## 64, vcpu, (d1) & 0xFFFFFFFF, (d1) >> 32)
-#define HVMTRACE_LONG_2D(evt,vcpu,d1,d2, ...) \
- HVMTRACE_3D(evt ## 64, vcpu, d1, d2)
-#define HVMTRACE_LONG_3D(evt, vcpu, d1, d2, d3, ...) \
- HVMTRACE_4D(evt ## 64, vcpu, d1, d2, d3)
-#define HVMTRACE_LONG_4D(evt, vcpu, d1, d2, d3, d4, ...) \
- HVMTRACE_5D(evt ## 64, vcpu, d1, d2, d3, d4)
+#define HVMTRACE_LONG_1D(evt, d1) \
+ HVMTRACE_2D(evt ## 64, (d1) & 0xFFFFFFFF, (d1) >> 32)
+#define HVMTRACE_LONG_2D(evt, d1, d2, ...) \
+ HVMTRACE_3D(evt ## 64, d1, d2)
+#define HVMTRACE_LONG_3D(evt, d1, d2, d3, ...) \
+ HVMTRACE_4D(evt ## 64, d1, d2, d3)
+#define HVMTRACE_LONG_4D(evt, d1, d2, d3, d4, ...) \
+ HVMTRACE_5D(evt ## 64, d1, d2, d3, d4)
#else
#define HVMTRACE_LONG_1D HVMTRACE_1D
#define HVMTRACE_LONG_2D HVMTRACE_2D
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/io_apic.h
--- a/xen/include/asm-x86/io_apic.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/io_apic.h Fri Sep 12 14:47:40 2008 +0900
@@ -162,8 +162,6 @@ static inline void io_apic_modify(unsign
/* 1 if "noapic" boot option passed */
extern int skip_ioapic_setup;
-extern int msi_enable;
-
/*
* If we use the IO-APIC for IRQ routing, disable automatic
* assignment of PCI IRQ's.
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/mm.h Fri Sep 12 14:47:40 2008 +0900
@@ -57,6 +57,17 @@ struct page_info
* (except page table pages when the guest is in shadow mode).
*/
u32 tlbflush_timestamp;
+
+ /*
+ * When PGT_partial is true then this field is valid and indicates
+ * that PTEs in the range [0, @nr_validated_ptes) have been validated.
+ * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been
+ * partially validated.
+ */
+ struct {
+ u16 nr_validated_ptes;
+ bool_t partial_pte;
+ };
/*
* Guest pages with a shadow. This does not conflict with
@@ -86,9 +97,12 @@ struct page_info
/* PAE only: is this an L2 page directory containing Xen-private mappings? */
#define _PGT_pae_xen_l2 26
#define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2)
-
- /* 26-bit count of uses of this frame as its current type. */
-#define PGT_count_mask ((1U<<26)-1)
+/* Has this page been *partially* validated for use as its current type? */
+#define _PGT_partial 25
+#define PGT_partial (1U<<_PGT_partial)
+
+ /* 25-bit count of uses of this frame as its current type. */
+#define PGT_count_mask ((1U<<25)-1)
/* Cleared when the owning guest 'frees' this page. */
#define _PGC_allocated 31
@@ -154,7 +168,8 @@ extern unsigned long total_pages;
extern unsigned long total_pages;
void init_frametable(void);
-void free_page_type(struct page_info *page, unsigned long type);
+int free_page_type(struct page_info *page, unsigned long type,
+ int preemptible);
int _shadow_mode_refcounts(struct domain *d);
void cleanup_page_cacheattr(struct page_info *page);
@@ -165,6 +180,8 @@ int get_page(struct page_info *page, st
int get_page(struct page_info *page, struct domain *domain);
void put_page_type(struct page_info *page);
int get_page_type(struct page_info *page, unsigned long type);
+int put_page_type_preemptible(struct page_info *page);
+int get_page_type_preemptible(struct page_info *page, unsigned long type);
int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
@@ -174,6 +191,19 @@ static inline void put_page_and_type(str
put_page(page);
}
+static inline int put_page_and_type_preemptible(struct page_info *page,
+ int preemptible)
+{
+ int rc = 0;
+
+ if ( preemptible )
+ rc = put_page_type_preemptible(page);
+ else
+ put_page_type(page);
+ if ( likely(rc == 0) )
+ put_page(page);
+ return rc;
+}
static inline int get_page_and_type(struct page_info *page,
struct domain *domain,
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/msr-index.h Fri Sep 12 14:47:40 2008 +0900
@@ -194,10 +194,22 @@
#define _K8_VMCR_SVME_DISABLE 4
#define K8_VMCR_SVME_DISABLE (1 << _K8_VMCR_SVME_DISABLE)
+/* AMD64 MSRs */
+#define MSR_AMD64_NB_CFG 0xc001001f
+#define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT 46
+
/* AMD Family10h machine check MSRs */
#define MSR_F10_MC4_MISC1 0xc0000408
#define MSR_F10_MC4_MISC2 0xc0000409
#define MSR_F10_MC4_MISC3 0xc000040A
+
+/* Other AMD Fam10h MSRs */
+#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
+#define FAM10H_MMIO_CONF_ENABLE_BIT 0
+#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
+#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
+#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff
+#define FAM10H_MMIO_CONF_BASE_SHIFT 20
/* K6 MSRs */
#define MSR_K6_EFER 0xc0000080
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/shadow.h Fri Sep 12 14:47:40 2008 +0900
@@ -115,8 +115,6 @@ static inline void shadow_remove_all_sha
sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
}
-#define guest_physmap_max_mem_pages(d, n) (0)
-
#endif /* _XEN_SHADOW_H */
/*
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/public/trace.h
--- a/xen/include/public/trace.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/public/trace.h Fri Sep 12 14:47:40 2008 +0900
@@ -37,6 +37,7 @@
#define TRC_HVM 0x0008f000 /* Xen HVM trace */
#define TRC_MEM 0x0010f000 /* Xen memory trace */
#define TRC_PV 0x0020f000 /* Xen PV traces */
+#define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */
#define TRC_ALL 0x0ffff000
#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
#define TRC_HD_CYCLE_FLAG (1UL<<31)
@@ -50,26 +51,30 @@
#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */
#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */
+#define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */
+#define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */
+
/* Trace events per class */
#define TRC_LOST_RECORDS (TRC_GEN + 1)
#define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2)
#define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3)
-#define TRC_SCHED_DOM_ADD (TRC_SCHED + 1)
-#define TRC_SCHED_DOM_REM (TRC_SCHED + 2)
-#define TRC_SCHED_SLEEP (TRC_SCHED + 3)
-#define TRC_SCHED_WAKE (TRC_SCHED + 4)
-#define TRC_SCHED_YIELD (TRC_SCHED + 5)
-#define TRC_SCHED_BLOCK (TRC_SCHED + 6)
-#define TRC_SCHED_SHUTDOWN (TRC_SCHED + 7)
-#define TRC_SCHED_CTL (TRC_SCHED + 8)
-#define TRC_SCHED_ADJDOM (TRC_SCHED + 9)
-#define TRC_SCHED_SWITCH (TRC_SCHED + 10)
-#define TRC_SCHED_S_TIMER_FN (TRC_SCHED + 11)
-#define TRC_SCHED_T_TIMER_FN (TRC_SCHED + 12)
-#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED + 13)
-#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED + 14)
-#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED + 15)
+#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1)
+#define TRC_SCHED_DOM_ADD (TRC_SCHED_VERBOSE + 1)
+#define TRC_SCHED_DOM_REM (TRC_SCHED_VERBOSE + 2)
+#define TRC_SCHED_SLEEP (TRC_SCHED_VERBOSE + 3)
+#define TRC_SCHED_WAKE (TRC_SCHED_VERBOSE + 4)
+#define TRC_SCHED_YIELD (TRC_SCHED_VERBOSE + 5)
+#define TRC_SCHED_BLOCK (TRC_SCHED_VERBOSE + 6)
+#define TRC_SCHED_SHUTDOWN (TRC_SCHED_VERBOSE + 7)
+#define TRC_SCHED_CTL (TRC_SCHED_VERBOSE + 8)
+#define TRC_SCHED_ADJDOM (TRC_SCHED_VERBOSE + 9)
+#define TRC_SCHED_SWITCH (TRC_SCHED_VERBOSE + 10)
+#define TRC_SCHED_S_TIMER_FN (TRC_SCHED_VERBOSE + 11)
+#define TRC_SCHED_T_TIMER_FN (TRC_SCHED_VERBOSE + 12)
+#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED_VERBOSE + 13)
+#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14)
+#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15)
#define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1)
#define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2)
@@ -88,6 +93,22 @@
#define TRC_PV_PTWR_EMULATION_PAE (TRC_PV + 12)
/* Indicates that addresses in trace record are 64 bits */
#define TRC_64_FLAG (0x100)
+
+#define TRC_SHADOW_NOT_SHADOW (TRC_SHADOW + 1)
+#define TRC_SHADOW_FAST_PROPAGATE (TRC_SHADOW + 2)
+#define TRC_SHADOW_FAST_MMIO (TRC_SHADOW + 3)
+#define TRC_SHADOW_FALSE_FAST_PATH (TRC_SHADOW + 4)
+#define TRC_SHADOW_MMIO (TRC_SHADOW + 5)
+#define TRC_SHADOW_FIXUP (TRC_SHADOW + 6)
+#define TRC_SHADOW_DOMF_DYING (TRC_SHADOW + 7)
+#define TRC_SHADOW_EMULATE (TRC_SHADOW + 8)
+#define TRC_SHADOW_EMULATE_UNSHADOW_USER (TRC_SHADOW + 9)
+#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ (TRC_SHADOW + 10)
+#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11)
+#define TRC_SHADOW_WRMAP_BF (TRC_SHADOW + 12)
+#define TRC_SHADOW_PREALLOC_UNPIN (TRC_SHADOW + 13)
+#define TRC_SHADOW_RESYNC_FULL (TRC_SHADOW + 14)
+#define TRC_SHADOW_RESYNC_ONLY (TRC_SHADOW + 15)
/* trace events per subclass */
#define TRC_HVM_VMENTRY (TRC_HVM_ENTRYEXIT + 0x01)
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/xen/cpuidle.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/xen/cpuidle.h Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,82 @@
+/*
+ * cpuidle.h - xen idle state module derived from Linux
+ *
+ * (C) 2007 Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>
+ * Shaohua Li <shaohua.li@xxxxxxxxx>
+ * Adam Belay <abelay@xxxxxxxxxx>
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#ifndef _XEN_CPUIDLE_H
+#define _XEN_CPUIDLE_H
+
+#define ACPI_PROCESSOR_MAX_POWER 8
+#define CPUIDLE_NAME_LEN 16
+
+struct acpi_processor_cx
+{
+ u8 valid;
+ u8 type;
+ u32 address;
+ u8 space_id;
+ u32 latency;
+ u32 latency_ticks;
+ u32 power;
+ u32 usage;
+ u64 time;
+ u32 target_residency;
+};
+
+struct acpi_processor_flags
+{
+ u8 bm_control:1;
+ u8 bm_check:1;
+ u8 has_cst:1;
+ u8 power_setup_done:1;
+ u8 bm_rld_set:1;
+};
+
+struct acpi_processor_power
+{
+ unsigned int cpu;
+ struct acpi_processor_flags flags;
+ struct acpi_processor_cx *last_state;
+ struct acpi_processor_cx *safe_state;
+ u32 last_residency;
+ void *gdata; /* governor specific data */
+ u32 count;
+ struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
+};
+
+struct cpuidle_governor
+{
+ char name[CPUIDLE_NAME_LEN];
+ unsigned int rating;
+
+ int (*enable) (struct acpi_processor_power *dev);
+ void (*disable) (struct acpi_processor_power *dev);
+
+ int (*select) (struct acpi_processor_power *dev);
+ void (*reflect) (struct acpi_processor_power *dev);
+};
+
+extern struct cpuidle_governor *cpuidle_current_governor;
+
+#endif /* _XEN_CPUIDLE_H */
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/xen/iommu.h Fri Sep 12 14:47:40 2008 +0900
@@ -31,6 +31,7 @@ extern int iommu_enabled;
extern int iommu_enabled;
extern int iommu_pv_enabled;
extern int force_iommu;
+extern int iommu_passthrough;
#define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu)
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/xen/sched.h Fri Sep 12 14:47:40 2008 +0900
@@ -106,8 +106,6 @@ struct vcpu
bool_t fpu_initialised;
/* Has the FPU been used since it was last saved? */
bool_t fpu_dirtied;
- /* Is this VCPU polling any event channels (SCHEDOP_poll)? */
- bool_t is_polling;
/* Initialization completed for this VCPU? */
bool_t is_initialised;
/* Currently running on a CPU? */
@@ -133,6 +131,13 @@ struct vcpu
bool_t paused_for_shutdown;
/* VCPU affinity is temporarily locked from controller changes? */
bool_t affinity_locked;
+
+ /*
+ * > 0: a single port is being polled;
+ * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
+ * < 0: multiple ports may be being polled.
+ */
+ int poll_evtchn;
unsigned long pause_flags;
atomic_t pause_count;
@@ -209,14 +214,15 @@ struct domain
struct domain *target;
/* Is this guest being debugged by dom0? */
bool_t debugger_attached;
- /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
- bool_t is_polling;
/* Is this guest dying (i.e., a zombie)? */
enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
/* Domain is paused by controller software? */
bool_t is_paused_by_controller;
/* Domain's VCPUs are pinned 1:1 to physical CPUs? */
bool_t is_pinned;
+
+ /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
+ DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
/* Guest has shut down (inc. reason code)? */
spinlock_t shutdown_lock;
@@ -507,6 +513,7 @@ static inline int vcpu_runnable(struct v
atomic_read(&v->domain->pause_count));
}
+void vcpu_unblock(struct vcpu *v);
void vcpu_pause(struct vcpu *v);
void vcpu_pause_nosync(struct vcpu *v);
void domain_pause(struct domain *d);
@@ -517,17 +524,12 @@ void cpu_init(void);
void cpu_init(void);
void vcpu_force_reschedule(struct vcpu *v);
+void cpu_disable_scheduler(void);
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
-
-static inline void vcpu_unblock(struct vcpu *v)
-{
- if ( test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
- vcpu_wake(v);
-}
#define IS_PRIV(_d) ((_d)->is_privileged)
#define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target ==
(_t)))
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/xen/trace.h
--- a/xen/include/xen/trace.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/xen/trace.h Fri Sep 12 14:47:40 2008 +0900
@@ -33,6 +33,8 @@ void init_trace_bufs(void);
/* used to retrieve the physical address of the trace buffers */
int tb_control(struct xen_sysctl_tbuf_op *tbc);
+
+int trace_will_trace_event(u32 event);
void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/xsm/xsm.h
--- a/xen/include/xsm/xsm.h Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/xsm/xsm.h Fri Sep 12 14:47:40 2008 +0900
@@ -64,16 +64,17 @@ struct xsm_operations {
int (*getvcpucontext) (struct domain *d);
int (*getvcpuinfo) (struct domain *d);
int (*domain_settime) (struct domain *d);
+ int (*set_target) (struct domain *d, struct domain *e);
int (*tbufcontrol) (void);
int (*readconsole) (uint32_t clear);
int (*sched_id) (void);
int (*setdomainmaxmem) (struct domain *d);
int (*setdomainhandle) (struct domain *d);
int (*setdebugging) (struct domain *d);
- int (*irq_permission) (struct domain *d, uint8_t pirq, uint8_t access);
- int (*iomem_permission) (struct domain *d, unsigned long mfn,
- uint8_t
access);
int (*perfcontrol) (void);
+ int (*debug_keys) (void);
+ int (*getcpuinfo) (void);
+ int (*availheap) (void);
int (*evtchn_unbound) (struct domain *d, struct evtchn *chn, domid_t id2);
int (*evtchn_interdomain) (struct domain *d1, struct evtchn *chn1,
@@ -106,13 +107,13 @@ struct xsm_operations {
int (*kexec) (void);
int (*schedop_shutdown) (struct domain *d1, struct domain *d2);
+ int (*add_range) (struct domain *d, char *name, unsigned long s, unsigned
long e);
+ int (*remove_range) (struct domain *d, char *name, unsigned long s,
unsigned long e);
long (*__do_xsm_op) (XEN_GUEST_HANDLE(xsm_op_t) op);
#ifdef CONFIG_X86
int (*shadow_control) (struct domain *d, uint32_t op);
- int (*ioport_permission) (struct domain *d, uint32_t ioport,
- uint8_t
access);
int (*getpageframeinfo) (struct page_info *page);
int (*getmemlist) (struct domain *d);
int (*hypercall_init) (struct domain *d);
@@ -130,13 +131,26 @@ struct xsm_operations {
int (*microcode) (void);
int (*physinfo) (void);
int (*platform_quirk) (uint32_t);
+ int (*firmware_info) (void);
+ int (*acpi_sleep) (void);
+ int (*change_freq) (void);
+ int (*getidletime) (void);
int (*machine_memory_map) (void);
int (*domain_memory_map) (struct domain *d);
- int (*mmu_normal_update) (struct domain *d, intpte_t fpte);
+ int (*mmu_normal_update) (struct domain *d, struct domain *f,
+ intpte_t fpte);
int (*mmu_machphys_update) (struct domain *d, unsigned long mfn);
- int (*update_va_mapping) (struct domain *d, l1_pgentry_t pte);
+ int (*update_va_mapping) (struct domain *d, struct domain *f,
+ l1_pgentry_t pte);
int (*add_to_physmap) (struct domain *d1, struct domain *d2);
int (*remove_from_physmap) (struct domain *d1, struct domain *d2);
+ int (*sendtrigger) (struct domain *d);
+ int (*test_assign_device) (uint32_t machine_bdf);
+ int (*assign_device) (struct domain *d, uint32_t machine_bdf);
+ int (*deassign_device) (struct domain *d, uint32_t machine_bdf);
+ int (*bind_pt_irq) (struct domain *d, struct xen_domctl_bind_pt_irq *bind);
+ int (*pin_mem_cacheattr) (struct domain *d);
+ int (*ext_vcpucontext) (struct domain *d, uint32_t cmd);
#endif
};
@@ -215,6 +229,11 @@ static inline int xsm_domain_settime (st
return xsm_call(domain_settime(d));
}
+static inline int xsm_set_target (struct domain *d, struct domain *e)
+{
+ return xsm_call(set_target(d, e));
+}
+
static inline int xsm_tbufcontrol (void)
{
return xsm_call(tbufcontrol());
@@ -245,21 +264,24 @@ static inline int xsm_setdebugging (stru
return xsm_call(setdebugging(d));
}
-static inline int xsm_irq_permission (struct domain *d, uint8_t pirq,
- uint8_t access)
-{
- return xsm_call(irq_permission(d, pirq, access));
-}
-
-static inline int xsm_iomem_permission (struct domain *d, unsigned long mfn,
- uint8_t access)
-{
- return xsm_call(iomem_permission(d, mfn, access));
-}
-
static inline int xsm_perfcontrol (void)
{
return xsm_call(perfcontrol());
+}
+
+static inline int xsm_debug_keys (void)
+{
+ return xsm_call(debug_keys());
+}
+
+static inline int xsm_availheap (void)
+{
+ return xsm_call(availheap());
+}
+
+static inline int xsm_getcpuinfo (void)
+{
+ return xsm_call(getcpuinfo());
}
static inline int xsm_evtchn_unbound (struct domain *d1, struct evtchn *chn,
@@ -385,6 +407,18 @@ static inline int xsm_schedop_shutdown (
static inline int xsm_schedop_shutdown (struct domain *d1, struct domain *d2)
{
return xsm_call(schedop_shutdown(d1, d2));
+}
+
+static inline int xsm_add_range (struct domain *d, char *name, unsigned long s,
+
unsigned long e)
+{
+ return xsm_call(add_range(d, name, s, e));
+}
+
+static inline int xsm_remove_range (struct domain *d, char *name, unsigned
long s,
+
unsigned long e)
+{
+ return xsm_call(remove_range(d, name, s, e));
}
static inline long __do_xsm_op (XEN_GUEST_HANDLE(xsm_op_t) op)
@@ -413,12 +447,6 @@ static inline int xsm_shadow_control (st
return xsm_call(shadow_control(d, op));
}
-static inline int xsm_ioport_permission (struct domain *d, uint32_t ioport,
- uint8_t access)
-{
- return xsm_call(ioport_permission(d, ioport, access));
-}
-
static inline int xsm_getpageframeinfo (struct page_info *page)
{
return xsm_call(getpageframeinfo(page));
@@ -504,6 +532,26 @@ static inline int xsm_platform_quirk (ui
return xsm_call(platform_quirk(quirk));
}
+static inline int xsm_firmware_info (void)
+{
+ return xsm_call(firmware_info());
+}
+
+static inline int xsm_acpi_sleep (void)
+{
+ return xsm_call(acpi_sleep());
+}
+
+static inline int xsm_change_freq (void)
+{
+ return xsm_call(change_freq());
+}
+
+static inline int xsm_getidletime (void)
+{
+ return xsm_call(getidletime());
+}
+
static inline int xsm_machine_memory_map(void)
{
return xsm_call(machine_memory_map());
@@ -514,9 +562,10 @@ static inline int xsm_domain_memory_map(
return xsm_call(domain_memory_map(d));
}
-static inline int xsm_mmu_normal_update (struct domain *d, intpte_t fpte)
-{
- return xsm_call(mmu_normal_update(d, fpte));
+static inline int xsm_mmu_normal_update (struct domain *d, struct domain *f,
+ intpte_t fpte)
+{
+ return xsm_call(mmu_normal_update(d, f, fpte));
}
static inline int xsm_mmu_machphys_update (struct domain *d, unsigned long mfn)
@@ -524,9 +573,10 @@ static inline int xsm_mmu_machphys_updat
return xsm_call(mmu_machphys_update(d, mfn));
}
-static inline int xsm_update_va_mapping(struct domain *d, l1_pgentry_t pte)
-{
- return xsm_call(update_va_mapping(d, pte));
+static inline int xsm_update_va_mapping(struct domain *d, struct domain *f,
+ l1_pgentry_t pte)
+{
+ return xsm_call(update_va_mapping(d, f, pte));
}
static inline int xsm_add_to_physmap(struct domain *d1, struct domain *d2)
@@ -538,6 +588,42 @@ static inline int xsm_remove_from_physma
{
return xsm_call(remove_from_physmap(d1, d2));
}
+
+static inline int xsm_sendtrigger(struct domain *d)
+{
+ return xsm_call(sendtrigger(d));
+}
+
+static inline int xsm_test_assign_device(uint32_t machine_bdf)
+{
+ return xsm_call(test_assign_device(machine_bdf));
+}
+
+static inline int xsm_assign_device(struct domain *d, uint32_t machine_bdf)
+{
+ return xsm_call(assign_device(d, machine_bdf));
+}
+
+static inline int xsm_deassign_device(struct domain *d, uint32_t machine_bdf)
+{
+ return xsm_call(deassign_device(d, machine_bdf));
+}
+
+static inline int xsm_bind_pt_irq(struct domain *d,
+ struct xen_domctl_bind_pt_irq
*bind)
+{
+ return xsm_call(bind_pt_irq(d, bind));
+}
+
+static inline int xsm_pin_mem_cacheattr(struct domain *d)
+{
+ return xsm_call(pin_mem_cacheattr(d));
+}
+
+static inline int xsm_ext_vcpucontext(struct domain *d, uint32_t cmd)
+{
+ return xsm_call(ext_vcpucontext(d, cmd));
+}
#endif /* CONFIG_X86 */
#endif /* __XSM_H */
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/xsm/dummy.c
--- a/xen/xsm/dummy.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/xsm/dummy.c Fri Sep 12 14:47:40 2008 +0900
@@ -84,6 +84,11 @@ static int dummy_domain_settime (struct
return 0;
}
+static int dummy_set_target (struct domain *d, struct domain *e)
+{
+ return 0;
+}
+
static int dummy_tbufcontrol (void)
{
return 0;
@@ -114,18 +119,22 @@ static int dummy_setdebugging (struct do
return 0;
}
-static int dummy_irq_permission (struct domain *d, uint8_t pirq, uint8_t
access)
-{
- return 0;
-}
-
-static int dummy_iomem_permission (struct domain *d, unsigned long mfn,
- uint8_t access)
-{
- return 0;
-}
-
static int dummy_perfcontrol (void)
+{
+ return 0;
+}
+
+static int dummy_debug_keys (void)
+{
+ return 0;
+}
+
+static int dummy_getcpuinfo (void)
+{
+ return 0;
+}
+
+static int dummy_availheap (void)
{
return 0;
}
@@ -259,18 +268,23 @@ static long dummy___do_xsm_op(XEN_GUEST_
return -ENOSYS;
}
+static int dummy_add_range (struct domain *d, char *name, unsigned long s,
unsigned long e)
+{
+ return 0;
+}
+
+static int dummy_remove_range (struct domain *d, char *name, unsigned long s,
+
unsigned long e)
+{
+ return 0;
+}
+
#ifdef CONFIG_X86
static int dummy_shadow_control (struct domain *d, uint32_t op)
{
return 0;
}
-static int dummy_ioport_permission (struct domain *d, uint32_t ioport,
- uint8_t access)
-{
- return 0;
-}
-
static int dummy_getpageframeinfo (struct page_info *page)
{
return 0;
@@ -356,6 +370,26 @@ static int dummy_platform_quirk (uint32_
return 0;
}
+static int dummy_firmware_info (void)
+{
+ return 0;
+}
+
+static int dummy_acpi_sleep (void)
+{
+ return 0;
+}
+
+static int dummy_change_freq (void)
+{
+ return 0;
+}
+
+static int dummy_getidletime (void)
+{
+ return 0;
+}
+
static int dummy_machine_memory_map (void)
{
return 0;
@@ -366,7 +400,8 @@ static int dummy_domain_memory_map (stru
return 0;
}
-static int dummy_mmu_normal_update (struct domain *d, intpte_t fpte)
+static int dummy_mmu_normal_update (struct domain *d, struct domain *f,
+ intpte_t fpte)
{
return 0;
}
@@ -376,12 +411,48 @@ static int dummy_mmu_machphys_update (st
return 0;
}
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|