# HG changeset patch
# User vh249@xxxxxxxxxxxxxxxxxxxxxxxx
# Node ID de310533c48375f3008a0484c3a770b807aee5c3
# Parent fa660d79f69573459949c847743f6341f466c7d0
upgrade the sparse repository to 2.6.12
Signed-off-by: Vincent Hanquez <vincent@xxxxxxxxxxxxx>
diff -r fa660d79f695 -r de310533c483 buildconfigs/mk.linux-2.6-xen0
--- a/buildconfigs/mk.linux-2.6-xen0 Tue Aug 9 15:17:45 2005
+++ b/buildconfigs/mk.linux-2.6-xen0 Tue Aug 9 23:57:17 2005
@@ -2,7 +2,7 @@
OS = linux
LINUX_SERIES = 2.6
-LINUX_VER = 2.6.11
+LINUX_VER = 2.6.12
EXTRAVERSION = xen0
diff -r fa660d79f695 -r de310533c483 buildconfigs/mk.linux-2.6-xenU
--- a/buildconfigs/mk.linux-2.6-xenU Tue Aug 9 15:17:45 2005
+++ b/buildconfigs/mk.linux-2.6-xenU Tue Aug 9 23:57:17 2005
@@ -2,7 +2,7 @@
OS = linux
LINUX_SERIES = 2.6
-LINUX_VER = 2.6.11
+LINUX_VER = 2.6.12
EXTRAVERSION = xenU
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig Tue Aug 9 23:57:17 2005
@@ -150,3 +150,5 @@
source "crypto/Kconfig"
source "lib/Kconfig"
+
+source "arch/xen/Kconfig.debug"
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig Tue Aug 9
23:57:17 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.11-xen0
-# Tue May 3 13:22:55 2005
+# Linux kernel version: 2.6.12-xen0
+# Sat Jul 9 09:19:47 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -31,6 +31,7 @@
CONFIG_BROKEN=y
CONFIG_BROKEN_ON_SMP=y
CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
#
# General setup
@@ -42,7 +43,6 @@
# CONFIG_BSD_PROCESS_ACCT is not set
CONFIG_SYSCTL=y
# CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=14
CONFIG_HOTPLUG=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
@@ -50,15 +50,18 @@
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SHMEM=y
CONFIG_CC_ALIGN_FUNCTIONS=0
CONFIG_CC_ALIGN_LABELS=0
CONFIG_CC_ALIGN_LOOPS=0
CONFIG_CC_ALIGN_JUMPS=0
# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
#
# Loadable module support
@@ -97,6 +100,7 @@
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_X86_GENERIC is not set
@@ -117,6 +121,7 @@
# CONFIG_SMP is not set
CONFIG_PREEMPT=y
CONFIG_PREEMPT_BKL=y
+# CONFIG_X86_REBOOTFIXUPS is not set
CONFIG_MICROCODE=y
CONFIG_X86_CPUID=y
@@ -137,6 +142,8 @@
CONFIG_PCI_DIRECT=y
CONFIG_PCI_LEGACY_PROC=y
# CONFIG_PCI_NAMES is not set
+# CONFIG_PCI_DEBUG is not set
+CONFIG_ISA_DMA_API=y
CONFIG_ISA=y
# CONFIG_EISA is not set
# CONFIG_MCA is not set
@@ -148,11 +155,6 @@
# CONFIG_PCCARD is not set
#
-# PC-card bridges
-#
-CONFIG_PCMCIA_PROBE=y
-
-#
# PCI Hotplug Support
#
# CONFIG_HOTPLUG_PCI is not set
@@ -160,12 +162,14 @@
#
# Kernel hacking
#
+# CONFIG_PRINTK_TIME is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_EARLY_PRINTK=y
# CONFIG_DEBUG_STACKOVERFLOW is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUG_SLAB is not set
CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
# CONFIG_DEBUG_SPINLOCK is not set
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_DEBUG_INFO is not set
@@ -176,6 +180,7 @@
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_PC=y
+CONFIG_SECCOMP=y
#
# Executable file formats
@@ -332,7 +337,7 @@
#
# SCSI Transport Attributes
#
-# CONFIG_SCSI_SPI_ATTRS is not set
+CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
@@ -409,6 +414,7 @@
# CONFIG_SCSI_QLA2300 is not set
# CONFIG_SCSI_QLA2322 is not set
# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_LPFC is not set
# CONFIG_SCSI_SEAGATE is not set
# CONFIG_SCSI_SYM53C416 is not set
# CONFIG_SCSI_DC395x is not set
@@ -442,6 +448,7 @@
CONFIG_DM_SNAPSHOT=y
CONFIG_DM_MIRROR=y
# CONFIG_DM_ZERO is not set
+# CONFIG_DM_MULTIPATH is not set
#
# Fusion MPT device support
@@ -470,7 +477,6 @@
#
CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set
-# CONFIG_NETLINK_DEV is not set
CONFIG_UNIX=y
# CONFIG_NET_KEY is not set
CONFIG_INET=y
@@ -650,7 +656,6 @@
# CONFIG_DGRS is not set
# CONFIG_EEPRO100 is not set
CONFIG_E100=y
-# CONFIG_E100_NAPI is not set
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
CONFIG_NE2K_PCI=y
@@ -683,6 +688,7 @@
# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
CONFIG_TIGON3=y
+# CONFIG_BNX2 is not set
#
# Ethernet (10000 Mbit)
@@ -738,19 +744,6 @@
# CONFIG_INPUT_TSDEV is not set
# CONFIG_INPUT_EVDEV is not set
# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input I/O drivers
-#
-# CONFIG_GAMEPORT is not set
-CONFIG_SOUND_GAMEPORT=y
-CONFIG_SERIO=y
-CONFIG_SERIO_I8042=y
-CONFIG_SERIO_SERPORT=y
-# CONFIG_SERIO_CT82C710 is not set
-# CONFIG_SERIO_PCIPS2 is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_SERIO_RAW is not set
#
# Input Device Drivers
@@ -773,6 +766,18 @@
# CONFIG_INPUT_MISC is not set
#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
# Character devices
#
CONFIG_VT=y
@@ -788,6 +793,7 @@
#
# Non-8250 serial port support
#
+# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=256
@@ -820,7 +826,6 @@
CONFIG_AGP_AMD=m
CONFIG_AGP_AMD64=m
CONFIG_AGP_INTEL=m
-CONFIG_AGP_INTEL_MCH=m
CONFIG_AGP_NVIDIA=m
CONFIG_AGP_SIS=m
CONFIG_AGP_SWORKS=m
@@ -841,6 +846,11 @@
# CONFIG_HANGCHECK_TIMER is not set
#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+
+#
# I2C support
#
# CONFIG_I2C is not set
@@ -886,6 +896,8 @@
#
# USB support
#
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
CONFIG_USB=y
# CONFIG_USB_DEBUG is not set
@@ -896,14 +908,14 @@
# CONFIG_USB_BANDWIDTH is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_OTG is not set
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
#
# USB Host Controller Drivers
#
# CONFIG_USB_EHCI_HCD is not set
CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
@@ -940,7 +952,6 @@
#
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set
-# CONFIG_USB_HPUSBSCSI is not set
#
# USB Multimedia devices
@@ -959,6 +970,7 @@
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
#
# USB port drivers
@@ -1174,6 +1186,7 @@
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
CONFIG_CRYPTO_DES=m
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig Tue Aug 9
23:57:17 2005
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.11-xenU
-# Wed Apr 13 23:18:37 2005
+# Linux kernel version: 2.6.12-xenU
+# Sun Jul 10 17:32:04 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -28,6 +28,7 @@
CONFIG_CLEAN_COMPILE=y
CONFIG_BROKEN_ON_SMP=y
CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
#
# General setup
@@ -39,23 +40,26 @@
# CONFIG_BSD_PROCESS_ACCT is not set
CONFIG_SYSCTL=y
# CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=14
CONFIG_HOTPLUG=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SHMEM=y
CONFIG_CC_ALIGN_FUNCTIONS=0
CONFIG_CC_ALIGN_LABELS=0
CONFIG_CC_ALIGN_LOOPS=0
CONFIG_CC_ALIGN_JUMPS=0
# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
#
# Loadable module support
@@ -94,6 +98,7 @@
# CONFIG_MWINCHIPC6 is not set
# CONFIG_MWINCHIP2 is not set
# CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
# CONFIG_X86_GENERIC is not set
@@ -114,6 +119,7 @@
# CONFIG_SMP is not set
CONFIG_PREEMPT=y
CONFIG_PREEMPT_BKL=y
+# CONFIG_X86_REBOOTFIXUPS is not set
CONFIG_X86_CPUID=y
#
@@ -144,6 +150,8 @@
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_PC=y
+CONFIG_SECCOMP=y
+CONFIG_EARLY_PRINTK=y
#
# Executable file formats
@@ -239,7 +247,6 @@
#
CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set
-# CONFIG_NETLINK_DEV is not set
CONFIG_UNIX=y
# CONFIG_NET_KEY is not set
CONFIG_INET=y
@@ -506,6 +513,7 @@
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
# CONFIG_CRYPTO_DES is not set
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
@@ -534,3 +542,27 @@
# CONFIG_CRC32 is not set
CONFIG_LIBCRC32C=m
CONFIG_ZLIB_INFLATE=y
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/arch/xen/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Tue Aug 9 23:57:17 2005
@@ -65,6 +65,7 @@
- "Winchip-C6" for original IDT Winchip.
- "Winchip-2" for IDT Winchip 2.
- "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
+ - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
- "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
- "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
@@ -191,6 +192,11 @@
and alignment reqirements. Also enable out of order memory
stores for this CPU, which can increase performance of some
operations.
+
+config MGEODEGX1
+ bool "GeodeGX1"
+ help
+ Select this for a Geode GX1 (Cyrix MediaGX) chip.
config MCYRIXIII
bool "CyrixIII/VIA-C3"
@@ -240,7 +246,7 @@
int
default "7" if MPENTIUM4 || X86_GENERIC
default "4" if X86_ELAN || M486 || M386
- default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE ||
MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX
|| M586TSC || M586 || MVIAC3_2
+ default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE ||
MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX
|| M586TSC || M586 || MVIAC3_2 || MGEODEGX1
default "6" if MK7 || MK8 || MPENTIUMM
config RWSEM_GENERIC_SPINLOCK
@@ -259,7 +265,7 @@
config X86_PPRO_FENCE
bool
- depends on M686 || M586MMX || M586TSC || M586 || M486 || M386
+ depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 ||
MGEODEGX1
default y
config X86_F00F_BUG
@@ -289,7 +295,7 @@
config X86_ALIGNMENT_16
bool
- depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII ||
X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2
+ depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII ||
X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
default y
config X86_GOOD_APIC
@@ -415,7 +421,7 @@
#config X86_TSC
# bool
-# depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON ||
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ
+# depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON ||
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ
# default y
#config X86_MCE
@@ -455,6 +461,24 @@
# Enabling this feature will cause a message to be printed when the P4
# enters thermal throttling.
+config X86_REBOOTFIXUPS
+ bool "Enable X86 board specific fixups for reboot"
+ depends on X86
+ default n
+ ---help---
+ This enables chipset and/or board specific fixups to be done
+ in order to get reboot to work correctly. This is only needed on
+ some combinations of hardware and BIOS. The symptom, for which
+ this config is intended, is when reboot ends with a stalled/hung
+ system.
+
+ Currently, the only fixup is for the Geode GX1/CS5530A/TROM2.1.
+ combination.
+
+ Say Y if you want to enable the fixup. Currently, it's safe to
+ enable this option even if you don't need it.
+ Say N otherwise.
+
config MICROCODE
tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
depends on XEN_PRIVILEGED_GUEST
@@ -578,6 +602,16 @@
config HAVE_ARCH_BOOTMEM_NODE
bool
depends on NUMA
+ default y
+
+config HAVE_MEMORY_PRESENT
+ bool
+ depends on DISCONTIGMEM
+ default y
+
+config NEED_NODE_MEMMAP_SIZE
+ bool
+ depends on DISCONTIGMEM
default y
#config HIGHPTE
@@ -673,13 +707,18 @@
config X86_LOCAL_APIC
bool
- depends on (X86_VISWS || SMP) && !X86_VOYAGER
+ depends on XEN_PRIVILEGED_GUEST && (X86_UP_APIC || ((X86_VISWS || SMP)
&& !X86_VOYAGER))
default y
config X86_IO_APIC
bool
- depends on SMP && !(X86_VISWS || X86_VOYAGER)
- default y
+ depends on XEN_PRIVILEGED_GUEST && (X86_UP_IOAPIC || (SMP &&
!(X86_VISWS || X86_VOYAGER)))
+ default y
+
+config X86_VISWS_APIC
+ bool
+ depends on X86_VISWS
+ default y
config PCI
bool "PCI support" if !X86_VISWS
@@ -748,6 +787,10 @@
source "drivers/pci/Kconfig"
+config ISA_DMA_API
+ bool
+ default y
+
config ISA
bool "ISA support"
depends on !(X86_VOYAGER || X86_VISWS)
@@ -777,17 +820,13 @@
source "drivers/eisa/Kconfig"
config MCA
- bool "MCA support"
- depends on !(X86_VISWS || X86_VOYAGER)
+ bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
+ default y if X86_VOYAGER
help
MicroChannel Architecture is found in some IBM PS/2 machines and
laptops. It is a bus system similar to PCI or ISA. See
<file:Documentation/mca.txt> (and especially the web page given
there) before attempting to build an MCA bus kernel.
-
-config MCA
- depends on X86_VOYAGER
- default y if X86_VOYAGER
source "drivers/mca/Kconfig"
@@ -971,4 +1010,21 @@
depends on X86 && !EMBEDDED
default y
+config SECCOMP
+ bool "Enable seccomp to safely compute untrusted bytecode"
+ depends on PROC_FS
+ default y
+ help
+ This kernel feature is useful for number crunching applications
+ that may need to compute untrusted bytecode during their
+ execution. By using pipes or other transports made available to
+ the process as file descriptors supporting the read/write
+ syscalls, it's possible to isolate those applications in
+ their own address space using seccomp. Once seccomp is
+ enabled via /proc/<pid>/seccomp, it cannot be disabled
+ and the task is only allowed to execute a few safe syscalls
+ defined by each seccomp mode.
+
+ If unsure, say Y. Only embedded should say N here.
+
endmenu
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/arch/xen/i386/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/Makefile Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Makefile Tue Aug 9 23:57:17 2005
@@ -14,6 +14,8 @@
# 19990713 Artur Skawina <skawina@xxxxxxxxxxxxx>
# Added '-march' and '-mpreferred-stack-boundary' support
#
+# 20050320 Kianusch Sayah Karadji <kianusch@xxxxxxxxxxx>
+# Added support for GEODE CPU
XENARCH := $(subst ",,$(CONFIG_XENARCH))
@@ -55,6 +57,9 @@
# AMD Elan support
cflags-$(CONFIG_X86_ELAN) += -march=i486
+
+# Geode GX1 support
+cflags-$(CONFIG_MGEODEGX1) += $(call
cc-option,-march=pentium-mmx,-march=i486)
# -mregparm=3 works ok on gcc-3.0 and later
#
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Aug 9
23:57:17 2005
@@ -32,6 +32,7 @@
c-obj-$(CONFIG_X86_MPPARSE) += mpparse.o
c-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
c-obj-$(CONFIG_X86_IO_APIC) += io_apic.o
+c-obj-$(CONFIG_X86_REBOOTFIXUPS)+= reboot_fixups.o
c-obj-$(CONFIG_X86_NUMAQ) += numaq.o
c-obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
c-obj-$(CONFIG_MODULES) += module.o
@@ -51,11 +52,11 @@
# Note: kbuild does not track this dependency due to usage of .incbin
$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
-targets += vsyscall.lds
+targets += vsyscall-note.o vsyscall.lds
# The DSO images are built using a special linker script.
quiet_cmd_syscall = SYSCALL $@
- cmd_syscall = $(CC) -nostdlib -m32 $(SYSCFLAGS_$(@F)) \
+ cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
-Wl,-T,$(filter-out FORCE,$^) -o $@
export CPPFLAGS_vsyscall.lds += -P -C -U$(ARCH)
@@ -65,7 +66,8 @@
SYSCFLAGS_vsyscall-int80.so = $(vsyscall-flags)
$(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
-$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
+$(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
+ $(obj)/vsyscall-%.o FORCE
$(call if_changed,syscall)
# We also create a special relocatable object that should mirror the symbol
@@ -76,17 +78,20 @@
$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
SYSCFLAGS_vsyscall-syms.o = -r
-$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o FORCE
+$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
+ $(obj)/vsyscall-sysenter.o FORCE
$(call if_changed,syscall)
c-link := init_task.o
-s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o
vsyscall.lds.o
+s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o
vsyscall.lds.o syscall_table.o
$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
@ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@
$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
+$(obj)/entry.o: $(src)/entry.S $(src)/syscall_table.S
+
obj-y += $(c-obj-y) $(s-obj-y)
clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c Tue Aug 9
23:57:17 2005
@@ -22,6 +22,9 @@
DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
+DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+
static int cachesize_override __initdata = -1;
static int disable_x86_fxsr __initdata = 0;
static int disable_x86_serial_nr __initdata = 1;
@@ -202,7 +205,7 @@
/* Probe for the CPUID instruction */
-int __init have_cpuid_p(void)
+static int __init have_cpuid_p(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
@@ -210,7 +213,7 @@
/* Do minimum CPU detection early.
Fields really needed: vendor, cpuid_level, family, model, mask, cache
alignment.
The others are not touched to avoid unwanted side effects. */
-void __init early_cpu_detect(void)
+static void __init early_cpu_detect(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -243,6 +246,10 @@
}
early_intel_workaround(c);
+
+#ifdef CONFIG_X86_HT
+ phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
}
void __init generic_identify(struct cpuinfo_x86 * c)
@@ -431,25 +438,15 @@
mcheck_init(c);
#endif
}
-/*
- * Perform early boot up checks for a valid TSC. See
arch/i386/kernel/time.c
- */
-
-void __init dodgy_tsc(void)
-{
- if (( boot_cpu_data.x86_vendor == X86_VENDOR_CYRIX ) ||
- ( boot_cpu_data.x86_vendor == X86_VENDOR_NSC ))
- cpu_devs[X86_VENDOR_CYRIX]->c_init(&boot_cpu_data);
-}
#ifdef CONFIG_X86_HT
void __init detect_ht(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
- int index_lsb, index_msb, tmp;
+ int index_msb, tmp;
int cpu = smp_processor_id();
- if (!cpu_has(c, X86_FEATURE_HT))
+ if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
return;
cpuid(1, &eax, &ebx, &ecx, &edx);
@@ -458,7 +455,6 @@
if (smp_num_siblings == 1) {
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
} else if (smp_num_siblings > 1 ) {
- index_lsb = 0;
index_msb = 31;
if (smp_num_siblings > NR_CPUS) {
@@ -467,21 +463,34 @@
return;
}
tmp = smp_num_siblings;
- while ((tmp & 1) == 0) {
- tmp >>=1 ;
- index_lsb++;
- }
- tmp = smp_num_siblings;
while ((tmp & 0x80000000 ) == 0) {
tmp <<=1 ;
index_msb--;
}
- if (index_lsb != index_msb )
+ if (smp_num_siblings & (smp_num_siblings - 1))
index_msb++;
phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
phys_proc_id[cpu]);
+
+ smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+
+ tmp = smp_num_siblings;
+ index_msb = 31;
+ while ((tmp & 0x80000000) == 0) {
+ tmp <<=1 ;
+ index_msb--;
+ }
+
+ if (smp_num_siblings & (smp_num_siblings - 1))
+ index_msb++;
+
+ cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+
+ if (c->x86_num_cores > 1)
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ cpu_core_id[cpu]);
}
}
#endif
@@ -528,7 +537,6 @@
extern int rise_init_cpu(void);
extern int nexgen_init_cpu(void);
extern int umc_init_cpu(void);
-void early_cpu_detect(void);
void __init early_cpu_init(void)
{
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Tue Aug 9
23:57:17 2005
@@ -31,7 +31,7 @@
unsigned int num_var_ranges;
unsigned int *usage_table;
-void __init set_num_var_ranges(void)
+static void __init set_num_var_ranges(void)
{
dom0_op_t op;
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Tue Aug 9 23:57:17 2005
@@ -595,8 +595,6 @@
xorl %edx,%edx # error code 0
movl %esp,%eax # pt_regs pointer
call do_debug
- testl %eax,%eax
- jnz restore_all
jmp ret_from_exception
#if 0 /* XEN */
@@ -651,8 +649,6 @@
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_int3
- testl %eax,%eax
- jnz restore_all
jmp ret_from_exception
ENTRY(overflow)
@@ -736,296 +732,6 @@
pushl $do_fixup_4gb_segment
jmp error_code
-.data
-ENTRY(sys_call_table)
- .long sys_restart_syscall /* 0 - old "setup()" system call, used
for restarting */
- .long sys_exit
- .long sys_fork
- .long sys_read
- .long sys_write
- .long sys_open /* 5 */
- .long sys_close
- .long sys_waitpid
- .long sys_creat
- .long sys_link
- .long sys_unlink /* 10 */
- .long sys_execve
- .long sys_chdir
- .long sys_time
- .long sys_mknod
- .long sys_chmod /* 15 */
- .long sys_lchown16
- .long sys_ni_syscall /* old break syscall holder */
- .long sys_stat
- .long sys_lseek
- .long sys_getpid /* 20 */
- .long sys_mount
- .long sys_oldumount
- .long sys_setuid16
- .long sys_getuid16
- .long sys_stime /* 25 */
- .long sys_ptrace
- .long sys_alarm
- .long sys_fstat
- .long sys_pause
- .long sys_utime /* 30 */
- .long sys_ni_syscall /* old stty syscall holder */
- .long sys_ni_syscall /* old gtty syscall holder */
- .long sys_access
- .long sys_nice
- .long sys_ni_syscall /* 35 - old ftime syscall holder */
- .long sys_sync
- .long sys_kill
- .long sys_rename
- .long sys_mkdir
- .long sys_rmdir /* 40 */
- .long sys_dup
- .long sys_pipe
- .long sys_times
- .long sys_ni_syscall /* old prof syscall holder */
- .long sys_brk /* 45 */
- .long sys_setgid16
- .long sys_getgid16
- .long sys_signal
- .long sys_geteuid16
- .long sys_getegid16 /* 50 */
- .long sys_acct
- .long sys_umount /* recycled never used phys() */
- .long sys_ni_syscall /* old lock syscall holder */
- .long sys_ioctl
- .long sys_fcntl /* 55 */
- .long sys_ni_syscall /* old mpx syscall holder */
- .long sys_setpgid
- .long sys_ni_syscall /* old ulimit syscall holder */
- .long sys_olduname
- .long sys_umask /* 60 */
- .long sys_chroot
- .long sys_ustat
- .long sys_dup2
- .long sys_getppid
- .long sys_getpgrp /* 65 */
- .long sys_setsid
- .long sys_sigaction
- .long sys_sgetmask
- .long sys_ssetmask
- .long sys_setreuid16 /* 70 */
- .long sys_setregid16
- .long sys_sigsuspend
- .long sys_sigpending
- .long sys_sethostname
- .long sys_setrlimit /* 75 */
- .long sys_old_getrlimit
- .long sys_getrusage
- .long sys_gettimeofday
- .long sys_settimeofday
- .long sys_getgroups16 /* 80 */
- .long sys_setgroups16
- .long old_select
- .long sys_symlink
- .long sys_lstat
- .long sys_readlink /* 85 */
- .long sys_uselib
- .long sys_swapon
- .long sys_reboot
- .long old_readdir
- .long old_mmap /* 90 */
- .long sys_munmap
- .long sys_truncate
- .long sys_ftruncate
- .long sys_fchmod
- .long sys_fchown16 /* 95 */
- .long sys_getpriority
- .long sys_setpriority
- .long sys_ni_syscall /* old profil syscall holder */
- .long sys_statfs
- .long sys_fstatfs /* 100 */
- .long sys_ioperm
- .long sys_socketcall
- .long sys_syslog
- .long sys_setitimer
- .long sys_getitimer /* 105 */
- .long sys_newstat
- .long sys_newlstat
- .long sys_newfstat
- .long sys_uname
- .long sys_iopl /* 110 */
- .long sys_vhangup
- .long sys_ni_syscall /* old "idle" system call */
- .long sys_vm86old
- .long sys_wait4
- .long sys_swapoff /* 115 */
- .long sys_sysinfo
- .long sys_ipc
- .long sys_fsync
- .long sys_sigreturn
- .long sys_clone /* 120 */
- .long sys_setdomainname
- .long sys_newuname
- .long sys_modify_ldt
- .long sys_adjtimex
- .long sys_mprotect /* 125 */
- .long sys_sigprocmask
- .long sys_ni_syscall /* old "create_module" */
- .long sys_init_module
- .long sys_delete_module
- .long sys_ni_syscall /* 130: old "get_kernel_syms" */
- .long sys_quotactl
- .long sys_getpgid
- .long sys_fchdir
- .long sys_bdflush
- .long sys_sysfs /* 135 */
- .long sys_personality
- .long sys_ni_syscall /* reserved for afs_syscall */
- .long sys_setfsuid16
- .long sys_setfsgid16
- .long sys_llseek /* 140 */
- .long sys_getdents
- .long sys_select
- .long sys_flock
- .long sys_msync
- .long sys_readv /* 145 */
- .long sys_writev
- .long sys_getsid
- .long sys_fdatasync
- .long sys_sysctl
- .long sys_mlock /* 150 */
- .long sys_munlock
- .long sys_mlockall
- .long sys_munlockall
- .long sys_sched_setparam
- .long sys_sched_getparam /* 155 */
- .long sys_sched_setscheduler
- .long sys_sched_getscheduler
- .long sys_sched_yield
- .long sys_sched_get_priority_max
- .long sys_sched_get_priority_min /* 160 */
- .long sys_sched_rr_get_interval
- .long sys_nanosleep
- .long sys_mremap
- .long sys_setresuid16
- .long sys_getresuid16 /* 165 */
- .long sys_vm86
- .long sys_ni_syscall /* Old sys_query_module */
- .long sys_poll
- .long sys_nfsservctl
- .long sys_setresgid16 /* 170 */
- .long sys_getresgid16
- .long sys_prctl
- .long sys_rt_sigreturn
- .long sys_rt_sigaction
- .long sys_rt_sigprocmask /* 175 */
- .long sys_rt_sigpending
- .long sys_rt_sigtimedwait
- .long sys_rt_sigqueueinfo
- .long sys_rt_sigsuspend
- .long sys_pread64 /* 180 */
- .long sys_pwrite64
- .long sys_chown16
- .long sys_getcwd
- .long sys_capget
- .long sys_capset /* 185 */
- .long sys_sigaltstack
- .long sys_sendfile
- .long sys_ni_syscall /* reserved for streams1 */
- .long sys_ni_syscall /* reserved for streams2 */
- .long sys_vfork /* 190 */
- .long sys_getrlimit
- .long sys_mmap2
- .long sys_truncate64
- .long sys_ftruncate64
- .long sys_stat64 /* 195 */
- .long sys_lstat64
- .long sys_fstat64
- .long sys_lchown
- .long sys_getuid
- .long sys_getgid /* 200 */
- .long sys_geteuid
- .long sys_getegid
- .long sys_setreuid
- .long sys_setregid
- .long sys_getgroups /* 205 */
- .long sys_setgroups
- .long sys_fchown
- .long sys_setresuid
- .long sys_getresuid
- .long sys_setresgid /* 210 */
- .long sys_getresgid
- .long sys_chown
- .long sys_setuid
- .long sys_setgid
- .long sys_setfsuid /* 215 */
- .long sys_setfsgid
- .long sys_pivot_root
- .long sys_mincore
- .long sys_madvise
- .long sys_getdents64 /* 220 */
- .long sys_fcntl64
- .long sys_ni_syscall /* reserved for TUX */
- .long sys_ni_syscall
- .long sys_gettid
- .long sys_readahead /* 225 */
- .long sys_setxattr
- .long sys_lsetxattr
- .long sys_fsetxattr
- .long sys_getxattr
- .long sys_lgetxattr /* 230 */
- .long sys_fgetxattr
- .long sys_listxattr
- .long sys_llistxattr
- .long sys_flistxattr
- .long sys_removexattr /* 235 */
- .long sys_lremovexattr
- .long sys_fremovexattr
- .long sys_tkill
- .long sys_sendfile64
- .long sys_futex /* 240 */
- .long sys_sched_setaffinity
- .long sys_sched_getaffinity
- .long sys_set_thread_area
- .long sys_get_thread_area
- .long sys_io_setup /* 245 */
- .long sys_io_destroy
- .long sys_io_getevents
- .long sys_io_submit
- .long sys_io_cancel
- .long sys_fadvise64 /* 250 */
- .long sys_ni_syscall
- .long sys_exit_group
- .long sys_lookup_dcookie
- .long sys_epoll_create
- .long sys_epoll_ctl /* 255 */
- .long sys_epoll_wait
- .long sys_remap_file_pages
- .long sys_set_tid_address
- .long sys_timer_create
- .long sys_timer_settime /* 260 */
- .long sys_timer_gettime
- .long sys_timer_getoverrun
- .long sys_timer_delete
- .long sys_clock_settime
- .long sys_clock_gettime /* 265 */
- .long sys_clock_getres
- .long sys_clock_nanosleep
- .long sys_statfs64
- .long sys_fstatfs64
- .long sys_tgkill /* 270 */
- .long sys_utimes
- .long sys_fadvise64_64
- .long sys_ni_syscall /* sys_vserver */
- .long sys_mbind
- .long sys_get_mempolicy
- .long sys_set_mempolicy
- .long sys_mq_open
- .long sys_mq_unlink
- .long sys_mq_timedsend
- .long sys_mq_timedreceive /* 280 */
- .long sys_mq_notify
- .long sys_mq_getsetattr
- .long sys_ni_syscall /* reserved for kexec */
- .long sys_waitid
- .long sys_ni_syscall /* 285 */ /* available */
- .long sys_add_key
- .long sys_request_key
- .long sys_keyctl
+#include "syscall_table.S"
syscall_table_size=(.-sys_call_table)
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Tue Aug 9
23:57:17 2005
@@ -99,6 +99,11 @@
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);
@@ -114,7 +119,6 @@
EXPORT_SYMBOL(dma_free_coherent);
#ifdef CONFIG_PCI
-EXPORT_SYMBOL(pcibios_penalize_isa_irq);
EXPORT_SYMBOL(pci_mem_start);
#endif
@@ -146,7 +150,6 @@
/* TLB flushing */
EXPORT_SYMBOL(flush_tlb_page);
-EXPORT_SYMBOL_GPL(flush_tlb_all);
#endif
#ifdef CONFIG_X86_IO_APIC
@@ -168,10 +171,6 @@
EXPORT_SYMBOL_GPL(set_nmi_callback);
EXPORT_SYMBOL_GPL(unset_nmi_callback);
-#undef memcmp
-extern int memcmp(const void *,const void *,__kernel_size_t);
-EXPORT_SYMBOL(memcmp);
-
EXPORT_SYMBOL(register_die_notifier);
#ifdef CONFIG_HAVE_DEC_LOCK
EXPORT_SYMBOL(_atomic_dec_and_lock);
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Tue Aug 9
23:57:17 2005
@@ -86,7 +86,7 @@
dma_addr_t *dma_handle)
#else
void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, int gfp)
+ dma_addr_t *dma_handle, unsigned int __nocast gfp)
#endif
{
void *ret;
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Tue Aug 9
23:57:17 2005
@@ -36,6 +36,7 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
+#include <linux/random.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -57,7 +58,7 @@
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-int hlt_counter;
+static int hlt_counter;
unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);
@@ -74,7 +75,7 @@
* Powermanagement idle function, if any..
*/
void (*pm_idle)(void);
-static cpumask_t cpu_idle_map;
+static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
void disable_hlt(void)
{
@@ -120,11 +121,11 @@
while (1) {
while (!need_resched()) {
- if (cpu_isset(cpu, cpu_idle_map))
- cpu_clear(cpu, cpu_idle_map);
+ if (__get_cpu_var(cpu_idle_state))
+ __get_cpu_var(cpu_idle_state) = 0;
rmb();
- irq_stat[cpu].idle_timestamp = jiffies;
+ __get_cpu_var(irq_stat).idle_timestamp = jiffies;
xen_idle();
}
schedule();
@@ -133,16 +134,28 @@
void cpu_idle_wait(void)
{
- int cpu;
+ unsigned int cpu, this_cpu = get_cpu();
cpumask_t map;
- for_each_online_cpu(cpu)
- cpu_set(cpu, cpu_idle_map);
+ set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+ put_cpu();
+
+ cpus_clear(map);
+ for_each_online_cpu(cpu) {
+ per_cpu(cpu_idle_state, cpu) = 1;
+ cpu_set(cpu, map);
+ }
+
+ __get_cpu_var(cpu_idle_state) = 0;
wmb();
do {
ssleep(1);
- cpus_and(map, cpu_idle_map, cpu_online_map);
+ for_each_online_cpu(cpu) {
+ if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state,
cpu))
+ cpu_clear(cpu, map);
+ }
+ cpus_and(map, map, cpu_online_map);
} while (!cpus_empty(map));
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -286,6 +299,17 @@
unsigned long eflags;
childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long)
p->thread_info)) - 1;
+ /*
+ * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+ * This is necessary to guarantee that the entire "struct pt_regs"
+ * is accessable even if the CPU haven't stored the SS/ESP registers
+ * on the stack (interrupt gate does not save these registers
+ * when switching to the same priv ring).
+ * Therefore beware: accessing the xss/esp fields of the
+ * "struct pt_regs" is possible, but they may contain the
+ * completely wrong values.
+ */
+ childregs = (struct pt_regs *) ((unsigned long) childregs - 8);
*childregs = *regs;
childregs->eax = 0;
childregs->esp = esp;
@@ -439,12 +463,6 @@
*/
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
}
-/*
- * This special macro can be used to load a debugging register
- */
-#define loaddebug(thread,register) \
- HYPERVISOR_set_debugreg((register), \
- (thread->debugreg[register]))
/*
* switch_to(x,yn) should switch tasks from x to y.
@@ -777,3 +795,9 @@
return 0;
}
+unsigned long arch_align_stack(unsigned long sp)
+{
+ if (randomize_va_space)
+ sp -= get_random_int() % 8192;
+ return sp & ~0xf;
+}
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug 9 23:57:17 2005
@@ -40,6 +40,7 @@
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/edd.h>
+#include <linux/nodemask.h>
#include <linux/kernel.h>
#include <linux/notifier.h>
#include <video/edid.h>
@@ -80,7 +81,6 @@
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
unsigned long mmu_cr4_features;
-EXPORT_SYMBOL_GPL(mmu_cr4_features);
#ifdef CONFIG_ACPI_INTERPRETER
int acpi_disabled = 0;
@@ -122,8 +122,6 @@
struct edid_info edid_info;
struct ist_info ist_info;
struct e820map e820;
-
-unsigned char aux_device_present;
extern void early_cpu_init(void);
extern void dmi_scan_machine(void);
@@ -454,10 +452,10 @@
struct e820entry *pbios; /* pointer to original bios entry */
unsigned long long addr; /* address for this change point */
};
-struct change_member change_point_list[2*E820MAX] __initdata;
-struct change_member *change_point[2*E820MAX] __initdata;
-struct e820entry *overlap_list[E820MAX] __initdata;
-struct e820entry new_bios[E820MAX] __initdata;
+static struct change_member change_point_list[2*E820MAX] __initdata;
+static struct change_member *change_point[2*E820MAX] __initdata;
+static struct e820entry *overlap_list[E820MAX] __initdata;
+static struct e820entry new_bios[E820MAX] __initdata;
static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
{
@@ -995,8 +993,6 @@
return max_low_pfn;
}
-#ifndef CONFIG_DISCONTIGMEM
-
/*
* Free all available memory for boot time allocation. Used
* as a callback function by efi_memory_walk()
@@ -1070,15 +1066,16 @@
reserve_bootmem(addr, PAGE_SIZE);
}
+#ifndef CONFIG_DISCONTIGMEM
+void __init setup_bootmem_allocator(void);
static unsigned long __init setup_memory(void)
{
- unsigned long bootmap_size, start_pfn, max_low_pfn;
/*
* partially used pages are not usable - thus
* we are rounding upwards:
*/
- start_pfn = PFN_UP(__pa(xen_start_info.pt_base)) +
xen_start_info.nr_pt_frames;
+ min_low_pfn = PFN_UP(__pa(xen_start_info.pt_base)) +
xen_start_info.nr_pt_frames;
find_max_pfn();
@@ -1094,10 +1091,43 @@
#endif
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
pages_to_mb(max_low_pfn));
+
+ setup_bootmem_allocator();
+
+ return max_low_pfn;
+}
+
+void __init zone_sizes_init(void)
+{
+ unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned int max_dma, low;
+
+ max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+ low = max_low_pfn;
+
+ if (low < max_dma)
+ zones_size[ZONE_DMA] = low;
+ else {
+ zones_size[ZONE_DMA] = max_dma;
+ zones_size[ZONE_NORMAL] = low - max_dma;
+#ifdef CONFIG_HIGHMEM
+ zones_size[ZONE_HIGHMEM] = highend_pfn - low;
+#endif
+ }
+ free_area_init(zones_size);
+}
+#else
+extern unsigned long setup_memory(void);
+extern void zone_sizes_init(void);
+#endif /* !CONFIG_DISCONTIGMEM */
+
+void __init setup_bootmem_allocator(void)
+{
+ unsigned long bootmap_size;
/*
* Initialize the boot-time allocator (with low memory only):
*/
- bootmap_size = init_bootmem(start_pfn, max_low_pfn);
+ bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
register_bootmem_low_pages(max_low_pfn);
@@ -1107,7 +1137,7 @@
* the (very unlikely) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area.
*/
- reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
+ reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
/* reserve EBDA region, it's a 4K region */
@@ -1160,12 +1190,25 @@
#endif
phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
-
- return max_low_pfn;
-}
-#else
-extern unsigned long setup_memory(void);
-#endif /* !CONFIG_DISCONTIGMEM */
+}
+
+/*
+ * The node 0 pgdat is initialized before all of these because
+ * it's needed for bootmem. node>0 pgdats have their virtual
+ * space allocated before the pagetables are in place to access
+ * them, so they can't be cleared then.
+ *
+ * This should all compile down to nothing when NUMA is off.
+ */
+void __init remapped_pgdat_init(void)
+{
+ int nid;
+
+ for_each_online_node(nid) {
+ if (nid != 0)
+ memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+ }
+}
/*
* Request address space for all standard RAM and ROM resources
@@ -1440,7 +1483,6 @@
machine_submodel_id = SYS_DESC_TABLE.table[1];
BIOS_revision = SYS_DESC_TABLE.table[2];
}
- aux_device_present = AUX_DEVICE_INFO;
bootloader_type = LOADER_TYPE;
#ifdef CONFIG_XEN_PHYSDEV_ACCESS
@@ -1500,6 +1542,8 @@
smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
#endif
paging_init();
+ remapped_pgdat_init();
+ zone_sizes_init();
/* Make sure we have a correctly sized P->M table. */
if (max_pfn != xen_start_info.nr_pages) {
@@ -1564,11 +1608,13 @@
if (efi_enabled)
efi_map_memmap();
+#ifdef CONFIG_ACPI_BOOT
/*
* Parse the ACPI tables for possible boot-time SMP configuration.
*/
acpi_boot_table_init();
acpi_boot_init();
+#endif
#ifdef CONFIG_X86_LOCAL_APIC
if (smp_found_config)
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c Tue Aug 9
23:57:17 2005
@@ -93,7 +93,7 @@
if (act) {
old_sigset_t mask;
- if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+ if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
return -EFAULT;
@@ -105,7 +105,7 @@
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
- if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+ if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
__put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
__put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
return -EFAULT;
@@ -187,7 +187,7 @@
struct _fpstate __user * buf;
err |= __get_user(buf, &sc->fpstate);
if (buf) {
- if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+ if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
goto badframe;
err |= restore_i387(buf);
} else {
@@ -213,7 +213,7 @@
sigset_t set;
int eax;
- if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
|| (_NSIG_WORDS > 1
@@ -243,7 +243,7 @@
sigset_t set;
int eax;
- if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
@@ -557,6 +557,16 @@
}
}
+ /*
+ * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
+ * that register information in the sigcontext is correct.
+ */
+ if (unlikely(regs->eflags & TF_MASK)
+ && likely(current->ptrace & PT_DTRACE)) {
+ current->ptrace &= ~PT_DTRACE;
+ regs->eflags &= ~TF_MASK;
+ }
+
/* Set up the stack frame */
if (ka->sa.sa_flags & SA_SIGINFO)
setup_rt_frame(sig, ka, info, oldset, regs);
@@ -608,8 +618,7 @@
* inside the kernel.
*/
if (unlikely(current->thread.debugreg[7])) {
- HYPERVISOR_set_debugreg(7,
- current->thread.debugreg[7]);
+ loaddebug(¤t->thread, 7);
}
/* Whee! Actually deliver the signal. */
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Tue Aug 9 23:57:17 2005
@@ -176,6 +176,35 @@
({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); })
/*
+ * This is a special lock that is owned by the CPU and holds the index
+ * register we are working with. It is required for NMI access to the
+ * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
+ */
+volatile unsigned long cmos_lock = 0;
+EXPORT_SYMBOL(cmos_lock);
+
+/* Routines for accessing the CMOS RAM/RTC. */
+unsigned char rtc_cmos_read(unsigned char addr)
+{
+ unsigned char val;
+ lock_cmos_prefix(addr);
+ outb_p(addr, RTC_PORT(0));
+ val = inb_p(RTC_PORT(1));
+ lock_cmos_suffix(addr);
+ return val;
+}
+EXPORT_SYMBOL(rtc_cmos_read);
+
+void rtc_cmos_write(unsigned char val, unsigned char addr)
+{
+ lock_cmos_prefix(addr);
+ outb_p(addr, RTC_PORT(0));
+ outb_p(val, RTC_PORT(1));
+ lock_cmos_suffix(addr);
+}
+EXPORT_SYMBOL(rtc_cmos_write);
+
+/*
* This version of gettimeofday has microsecond resolution
* and better than microsecond precision on fast x86 machines with TSC.
*/
@@ -335,15 +364,22 @@
{
int retval;
+ WARN_ON(irqs_disabled());
+
/* gets recalled with irq locally disabled */
- spin_lock(&rtc_lock);
+ spin_lock_irq(&rtc_lock);
if (efi_enabled)
retval = efi_set_rtc_mmss(nowtime);
else
retval = mach_set_rtc_mmss(nowtime);
- spin_unlock(&rtc_lock);
+ spin_unlock_irq(&rtc_lock);
return retval;
+}
+#else
+static int set_rtc_mmss(unsigned long nowtime)
+{
+ return 0;
}
#endif
@@ -476,29 +512,6 @@
last_update_to_xen = xtime.tv_sec;
}
-
- /*
- * If we have an externally synchronized Linux clock, then update
- * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
- * called as close as possible to 500 ms before the new second starts.
- */
- if ((time_status & STA_UNSYNC) == 0 &&
- xtime.tv_sec > last_rtc_update + 660 &&
- (xtime.tv_nsec / 1000)
- >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
- (xtime.tv_nsec / 1000)
- <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
- /* horrible...FIXME */
- if (efi_enabled) {
- if (efi_set_rtc_mmss(xtime.tv_sec) == 0)
- last_rtc_update = xtime.tv_sec;
- else
- last_rtc_update = xtime.tv_sec - 600;
- } else if (set_rtc_mmss(xtime.tv_sec) == 0)
- last_rtc_update = xtime.tv_sec;
- else
- last_rtc_update = xtime.tv_sec - 600; /* do it again in
60 s */
- }
#endif
}
@@ -538,10 +551,59 @@
return retval;
}
+static void sync_cmos_clock(unsigned long dummy);
+
+static struct timer_list sync_cmos_timer =
+ TIMER_INITIALIZER(sync_cmos_clock, 0, 0);
+
+static void sync_cmos_clock(unsigned long dummy)
+{
+ struct timeval now, next;
+ int fail = 1;
+
+ /*
+ * If we have an externally synchronized Linux clock, then update
+ * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+ * called as close as possible to 500 ms before the new second starts.
+ * This code is run on a timer. If the clock is set, that timer
+ * may not expire at the correct time. Thus, we adjust...
+ */
+ if ((time_status & STA_UNSYNC) != 0)
+ /*
+ * Not synced, exit, do not restart a timer (if one is
+ * running, let it run out).
+ */
+ return;
+
+ do_gettimeofday(&now);
+ if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
+ now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
+ fail = set_rtc_mmss(now.tv_sec);
+
+ next.tv_usec = USEC_AFTER - now.tv_usec;
+ if (next.tv_usec <= 0)
+ next.tv_usec += USEC_PER_SEC;
+
+ if (!fail)
+ next.tv_sec = 659;
+ else
+ next.tv_sec = 0;
+
+ if (next.tv_usec >= USEC_PER_SEC) {
+ next.tv_sec++;
+ next.tv_usec -= USEC_PER_SEC;
+ }
+ mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
+}
+
+void notify_arch_cmos_timer(void)
+{
+ mod_timer(&sync_cmos_timer, jiffies + 1);
+}
static long clock_cmos_diff, sleep_start;
-static int timer_suspend(struct sys_device *dev, u32 state)
+static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
* Estimate time zone so that set_time can update the clock
@@ -599,14 +661,14 @@
#ifdef CONFIG_HPET_TIMER
extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
-void __init hpet_time_init(void)
+static void __init hpet_time_init(void)
{
xtime.tv_sec = get_cmos_time();
xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
- if (hpet_enable() >= 0) {
+ if ((hpet_enable() >= 0) && hpet_use_timer) {
printk("Using HPET for base-timer\n");
}
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Tue Aug 9 23:57:17 2005
@@ -342,8 +342,7 @@
if (panic_on_oops) {
printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(5 * HZ);
+ ssleep(5);
panic("Fatal exception");
}
do_exit(SIGSEGV);
@@ -450,6 +449,7 @@
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
#ifdef CONFIG_X86_MCE
DO_ERROR(18, SIGBUS, "machine check", machine_check)
#endif
@@ -635,16 +635,15 @@
}
#ifdef CONFIG_KPROBES
-fastcall int do_int3(struct pt_regs *regs, long error_code)
+fastcall void do_int3(struct pt_regs *regs, long error_code)
{
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
- return 1;
+ return;
/* This is an interrupt gate, because kprobes wants interrupts
disabled. Normal trap handlers don't. */
restore_interrupts(regs);
do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
- return 0;
}
#endif
@@ -701,8 +700,6 @@
/*
* Single-stepping through TF: make sure we ignore any events in
* kernel space (but re-enable TF when returning to user mode).
- * And if the event was due to a debugger (PT_DTRACE), clear the
- * TF flag so that register information is correct.
*/
if (condition & DR_STEP) {
/*
@@ -712,11 +709,6 @@
*/
if ((regs->xcs & 2) == 0)
goto clear_TF_reenable;
-
- if (likely(tsk->ptrace & PT_DTRACE)) {
- tsk->ptrace &= ~PT_DTRACE;
- regs->eflags &= ~TF_MASK;
- }
}
/* Ok, finally something we can handle */
@@ -806,7 +798,7 @@
math_error((void __user *)regs->eip);
}
-void simd_math_error(void __user *eip)
+static void simd_math_error(void __user *eip)
{
struct task_struct * task;
siginfo_t info;
@@ -876,6 +868,51 @@
current->thread.error_code = error_code;
force_sig(SIGSEGV, current);
}
+}
+
+fastcall void setup_x86_bogus_stack(unsigned char * stk)
+{
+ unsigned long *switch16_ptr, *switch32_ptr;
+ struct pt_regs *regs;
+ unsigned long stack_top, stack_bot;
+ unsigned short iret_frame16_off;
+ int cpu = smp_processor_id();
+ /* reserve the space on 32bit stack for the magic switch16 pointer */
+ memmove(stk, stk + 8, sizeof(struct pt_regs));
+ switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
+ regs = (struct pt_regs *)stk;
+ /* now the switch32 on 16bit stack */
+ stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+ stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
+ switch32_ptr = (unsigned long *)(stack_top - 8);
+ iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
+ /* copy iret frame on 16bit stack */
+ memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20);
+ /* fill in the switch pointers */
+ switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
+ switch16_ptr[1] = __ESPFIX_SS;
+ switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
+ 8 - CPU_16BIT_STACK_SIZE;
+ switch32_ptr[1] = __KERNEL_DS;
+}
+
+fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
+{
+ unsigned long *switch32_ptr;
+ unsigned char *stack16, *stack32;
+ unsigned long stack_top, stack_bot;
+ int len;
+ int cpu = smp_processor_id();
+ stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+ stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
+ switch32_ptr = (unsigned long *)(stack_top - 8);
+ /* copy the data from 16bit stack to 32bit stack */
+ len = CPU_16BIT_STACK_SIZE - 8 - sp;
+ stack16 = (unsigned char *)(stack_bot + sp);
+ stack32 = (unsigned char *)
+ (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
+ memcpy(stack32, stack16, len);
+ return stack32;
}
/*
@@ -978,3 +1015,10 @@
*/
cpu_init();
}
+
+static int __init kstack_setup(char *s)
+{
+ kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+ return 0;
+}
+__setup("kstack=", kstack_setup);
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c Tue Aug 9 23:57:17 2005
@@ -77,7 +77,7 @@
* force other mappings to Oops if they'll try to access
* this pte without first remap it
*/
- pte_clear(kmap_pte-idx);
+ pte_clear(&init_mm, vaddr, kmap_pte-idx);
__flush_tlb_one(vaddr);
#endif
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 9 23:57:17 2005
@@ -249,13 +249,10 @@
pte_t *kmap_pte;
pgprot_t kmap_prot;
-EXPORT_SYMBOL(kmap_prot);
-EXPORT_SYMBOL(kmap_pte);
-
#define kmap_get_fixmap_pte(vaddr) \
pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr),
(vaddr)), (vaddr))
-void __init kmap_init(void)
+static void __init kmap_init(void)
{
unsigned long kmap_vstart;
@@ -266,7 +263,7 @@
kmap_prot = PAGE_KERNEL;
}
-void __init permanent_kmaps_init(pgd_t *pgd_base)
+static void __init permanent_kmaps_init(pgd_t *pgd_base)
{
pgd_t *pgd;
pud_t *pud;
@@ -298,7 +295,7 @@
}
#ifndef CONFIG_DISCONTIGMEM
-void __init set_highmem_pages_init(int bad_ppro)
+static void __init set_highmem_pages_init(int bad_ppro)
{
int pfn;
for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
@@ -426,38 +423,6 @@
flush_tlb_all();
}
-#ifndef CONFIG_DISCONTIGMEM
-void __init zone_sizes_init(void)
-{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
- unsigned int /*max_dma,*/ high, low;
-
- /*
- * XEN: Our notion of "DMA memory" is fake when running over Xen.
- * We simply put all RAM in the DMA zone so that those drivers which
- * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
- * Those drivers that *do* require lowmem are screwed anyway when
- * running over Xen!
- */
- /*max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;*/
- low = max_low_pfn;
- high = highend_pfn;
-
- /*if (low < max_dma)*/
- zones_size[ZONE_DMA] = low;
- /*else*/ {
- /*zones_size[ZONE_DMA] = max_dma;*/
- /*zones_size[ZONE_NORMAL] = low - max_dma;*/
-#ifdef CONFIG_HIGHMEM
- zones_size[ZONE_HIGHMEM] = high - low;
-#endif
- }
- free_area_init(zones_size);
-}
-#else
-extern void zone_sizes_init(void);
-#endif /* !CONFIG_DISCONTIGMEM */
-
static int disable_nx __initdata = 0;
u64 __supported_pte_mask = ~_PAGE_NX;
@@ -560,7 +525,6 @@
__flush_tlb_all();
kmap_init();
- zone_sizes_init();
/* Switch to the real shared_info page, and clear the dummy page. */
flush_page_update_queue();
@@ -586,7 +550,7 @@
* but fortunately the switch to using exceptions got rid of all that.
*/
-void __init test_wp_bit(void)
+static void __init test_wp_bit(void)
{
printk("Checking if this processor honours the WP bit even in
supervisor mode... ");
@@ -605,20 +569,17 @@
}
}
+static void __init set_max_mapnr_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+ num_physpages = highend_pfn;
+#else
+ num_physpages = max_low_pfn;
+#endif
#ifndef CONFIG_DISCONTIGMEM
-static void __init set_max_mapnr_init(void)
-{
-#ifdef CONFIG_HIGHMEM
- max_mapnr = num_physpages = highend_pfn;
-#else
- max_mapnr = num_physpages = max_low_pfn;
-#endif
-}
-#define __free_all_bootmem() free_all_bootmem()
-#else
-#define __free_all_bootmem() free_all_bootmem_node(NODE_DATA(0))
-extern void set_max_mapnr_init(void);
-#endif /* !CONFIG_DISCONTIGMEM */
+ max_mapnr = num_physpages;
+#endif
+}
static struct kcore_list kcore_mem, kcore_vmalloc;
@@ -650,16 +611,16 @@
set_max_mapnr_init();
#ifdef CONFIG_HIGHMEM
- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE);
+ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
#else
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
#endif
printk("vmalloc area: %lx-%lx, maxmem %lx\n",
VMALLOC_START,VMALLOC_END,MAXMEM);
BUG_ON(VMALLOC_START > VMALLOC_END);
/* this will put all low memory onto the freelists */
- totalram_pages += __free_all_bootmem();
+ totalram_pages += free_all_bootmem();
/* XEN: init and count low-mem pages outside initial allocation. */
for (pfn = xen_start_info.nr_pages; pfn < max_low_pfn; pfn++) {
ClearPageReserved(&mem_map[pfn]);
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Tue Aug 9 23:57:17 2005
@@ -368,7 +368,7 @@
if (PTRS_PER_PMD > 1)
for (i = 0; i < USER_PTRS_PER_PGD; ++i)
kmem_cache_free(pmd_cache, (void
*)__va(pgd_val(pgd[i])-1));
- /* in the non-PAE case, clear_page_range() clears user pgd entries */
+ /* in the non-PAE case, free_pgtables() clears user pgd entries */
kmem_cache_free(pgd_cache, pgd);
}
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c Tue Aug 9 23:57:17 2005
@@ -114,11 +114,11 @@
if (pin != 0) {
if (dev->irq != 0)
printk(KERN_INFO "PCI: Obtained IRQ %d for device %s\n",
- dev->irq, dev->slot_name);
+ dev->irq, pci_name(dev));
else
printk(KERN_WARNING "PCI: No IRQ known for interrupt "
"pin %c of device %s.\n", 'A' + pin - 1,
- dev->slot_name);
+ pci_name(dev));
}
return 0;
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/drivers/Makefile
--- a/linux-2.6-xen-sparse/drivers/Makefile Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/drivers/Makefile Tue Aug 9 23:57:17 2005
@@ -48,8 +48,8 @@
obj-$(CONFIG_TC) += tc/
obj-$(CONFIG_USB) += usb/
obj-$(CONFIG_USB_GADGET) += usb/gadget/
+obj-$(CONFIG_GAMEPORT) += input/gameport/
obj-$(CONFIG_INPUT) += input/
-obj-$(CONFIG_GAMEPORT) += input/gameport/
obj-$(CONFIG_I2O) += message/
obj-$(CONFIG_I2C) += i2c/
obj-$(CONFIG_W1) += w1/
@@ -62,5 +62,6 @@
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_MMC) += mmc/
obj-$(CONFIG_INFINIBAND) += infiniband/
+obj-$(CONFIG_BLK_DEV_SGIIOC4) += sn/
obj-y += firmware/
obj-$(CONFIG_CRYPTO) += crypto/
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/drivers/char/mem.c
--- a/linux-2.6-xen-sparse/drivers/char/mem.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/drivers/char/mem.c Tue Aug 9 23:57:17 2005
@@ -23,6 +23,7 @@
#include <linux/devfs_fs_kernel.h>
#include <linux/ptrace.h>
#include <linux/device.h>
+#include <linux/backing-dev.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -76,14 +77,6 @@
* On ia64, we ignore O_SYNC because we cannot tolerate memory
attribute aliases.
*/
return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
-#elif defined(CONFIG_PPC64)
- /* On PPC64, we always do non-cacheable access to the IO hole and
- * cacheable elsewhere. Cache paradox can checkstop the CPU and
- * the high_memory heuristic below is wrong on machines with memory
- * above the IO hole... Ah, and of course, XFree86 doesn't pass
- * O_SYNC when mapping us to tap IO space. Surprised ?
- */
- return !page_is_ram(addr >> PAGE_SHIFT);
#else
/*
* Accessing memory above the top the kernel knows about or through a
file pointer
@@ -111,38 +104,6 @@
}
#endif
-static ssize_t do_write_mem(void *p, unsigned long realp,
- const char __user * buf, size_t count, loff_t *ppos)
-{
- ssize_t written;
- unsigned long copied;
-
- written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
- /* we don't have page 0 mapped on sparc and m68k.. */
- if (realp < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-realp;
- if (sz > count) sz = count;
- /* Hmm. Do something? */
- buf+=sz;
- p+=sz;
- count-=sz;
- written+=sz;
- }
-#endif
- copied = copy_from_user(p, buf, count);
- if (copied) {
- ssize_t ret = written + (count - copied);
-
- if (ret)
- return ret;
- return -EFAULT;
- }
- written += count;
- *ppos += written;
- return written;
-}
-
#ifndef ARCH_HAS_DEV_MEM
/*
* This funcion reads the *physical* memory. The f_pos points directly to the
@@ -152,15 +113,16 @@
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
- ssize_t read;
+ ssize_t read, sz;
+ char *ptr;
if (!valid_phys_addr_range(p, &count))
return -EFAULT;
read = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
/* we don't have page 0 mapped on sparc and m68k.. */
if (p < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-p;
+ sz = PAGE_SIZE - p;
if (sz > count)
sz = count;
if (sz > 0) {
@@ -173,9 +135,33 @@
}
}
#endif
- if (copy_to_user(buf, __va(p), count))
- return -EFAULT;
- read += count;
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ ptr = xlate_dev_mem_ptr(p);
+
+ if (copy_to_user(buf, ptr, sz))
+ return -EFAULT;
+ buf += sz;
+ p += sz;
+ count -= sz;
+ read += sz;
+ }
+
*ppos += read;
return read;
}
@@ -184,16 +170,76 @@
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
+ ssize_t written, sz;
+ unsigned long copied;
+ void *ptr;
if (!valid_phys_addr_range(p, &count))
return -EFAULT;
- return do_write_mem(__va(p), p, buf, count, ppos);
+
+ written = 0;
+
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (p < PAGE_SIZE) {
+ unsigned long sz = PAGE_SIZE - p;
+ if (sz > count)
+ sz = count;
+ /* Hmm. Do something? */
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+#endif
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ ptr = xlate_dev_mem_ptr(p);
+
+ copied = copy_from_user(ptr, buf, sz);
+ if (copied) {
+ ssize_t ret;
+
+ ret = written + (sz - copied);
+ if (ret)
+ return ret;
+ return -EFAULT;
+ }
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+
+ *ppos += written;
+ return written;
}
#endif
static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
{
-#ifdef pgprot_noncached
+#if defined(__HAVE_PHYS_MEM_ACCESS_PROT)
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+
+ vma->vm_page_prot = phys_mem_access_prot(file, offset,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+#elif defined(pgprot_noncached)
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
int uncached;
@@ -212,6 +258,25 @@
return 0;
}
+#if 0
+static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+{
+ unsigned long long val;
+ /*
+ * RED-PEN: on some architectures there is more mapped memory
+ * than available in mem_map which pfn_valid checks
+ * for. Perhaps should add a new macro here.
+ *
+ * RED-PEN: vmalloc is not supported right now.
+ */
+ if (!pfn_valid(vma->vm_pgoff))
+ return -EIO;
+ val = (u64)vma->vm_pgoff << PAGE_SHIFT;
+ vma->vm_pgoff = __pa(val) >> PAGE_SHIFT;
+ return mmap_mem(file, vma);
+}
+#endif
+
extern long vread(char *buf, char *addr, unsigned long count);
extern long vwrite(char *buf, char *addr, unsigned long count);
@@ -222,33 +287,55 @@
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
- ssize_t read = 0;
- ssize_t virtr = 0;
+ ssize_t low_count, read, sz;
char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
-
+
+ read = 0;
if (p < (unsigned long) high_memory) {
- read = count;
+ low_count = count;
if (count > (unsigned long) high_memory - p)
- read = (unsigned long) high_memory - p;
-
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+ low_count = (unsigned long) high_memory - p;
+
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
/* we don't have page 0 mapped on sparc and m68k.. */
- if (p < PAGE_SIZE && read > 0) {
+ if (p < PAGE_SIZE && low_count > 0) {
size_t tmp = PAGE_SIZE - p;
- if (tmp > read) tmp = read;
+ if (tmp > low_count) tmp = low_count;
if (clear_user(buf, tmp))
return -EFAULT;
buf += tmp;
p += tmp;
- read -= tmp;
+ read += tmp;
+ low_count -= tmp;
count -= tmp;
}
#endif
- if (copy_to_user(buf, (char *)p, read))
- return -EFAULT;
- p += read;
- buf += read;
- count -= read;
+ while (low_count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, low_count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ kbuf = xlate_dev_kmem_ptr((char *)p);
+
+ if (copy_to_user(buf, kbuf, sz))
+ return -EFAULT;
+ buf += sz;
+ p += sz;
+ read += sz;
+ low_count -= sz;
+ count -= sz;
+ }
}
if (count > 0) {
@@ -269,14 +356,78 @@
}
count -= len;
buf += len;
- virtr += len;
+ read += len;
p += len;
}
free_page((unsigned long)kbuf);
}
*ppos = p;
- return virtr + read;
-}
+ return read;
+}
+
+
+static inline ssize_t
+do_write_kmem(void *p, unsigned long realp, const char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ ssize_t written, sz;
+ unsigned long copied;
+
+ written = 0;
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (realp < PAGE_SIZE) {
+ unsigned long sz = PAGE_SIZE - realp;
+ if (sz > count)
+ sz = count;
+ /* Hmm. Do something? */
+ buf += sz;
+ p += sz;
+ realp += sz;
+ count -= sz;
+ written += sz;
+ }
+#endif
+
+ while (count > 0) {
+ char *ptr;
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-realp & (PAGE_SIZE - 1))
+ sz = -realp & (PAGE_SIZE - 1);
+ else
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ ptr = xlate_dev_kmem_ptr(p);
+
+ copied = copy_from_user(ptr, buf, sz);
+ if (copied) {
+ ssize_t ret;
+
+ ret = written + (sz - copied);
+ if (ret)
+ return ret;
+ return -EFAULT;
+ }
+ buf += sz;
+ p += sz;
+ realp += sz;
+ count -= sz;
+ written += sz;
+ }
+
+ *ppos += written;
+ return written;
+}
+
/*
* This function writes to the *virtual* memory as seen by the kernel.
@@ -296,7 +447,7 @@
if (count > (unsigned long) high_memory - p)
wrote = (unsigned long) high_memory - p;
- written = do_write_mem((void*)p, p, buf, wrote, ppos);
+ written = do_write_kmem((void*)p, p, buf, wrote, ppos);
if (written != wrote)
return written;
wrote = written;
@@ -344,7 +495,7 @@
unsigned long i = *ppos;
char __user *tmp = buf;
- if (verify_area(VERIFY_WRITE,buf,count))
+ if (!access_ok(VERIFY_WRITE, buf, count))
return -EFAULT;
while (count-- > 0 && i < 65536) {
if (__put_user(inb(i),tmp) < 0)
@@ -362,7 +513,7 @@
unsigned long i = *ppos;
const char __user * tmp = buf;
- if (verify_area(VERIFY_READ,buf,count))
+ if (!access_ok(VERIFY_READ,buf,count))
return -EFAULT;
while (count-- > 0 && i < 65536) {
char c;
@@ -568,7 +719,6 @@
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
-#define mmap_mem mmap_kmem
#define zero_lseek null_lseek
#define full_lseek null_lseek
#define write_zero write_null
@@ -581,7 +731,7 @@
.llseek = memory_lseek,
.read = read_mem,
.write = write_mem,
- .mmap = mmap_mem,
+ .mmap = mmap_kmem,
.open = open_mem,
};
#else
@@ -616,6 +766,10 @@
.read = read_zero,
.write = write_zero,
.mmap = mmap_zero,
+};
+
+static struct backing_dev_info zero_bdi = {
+ .capabilities = BDI_CAP_MAP_COPY,
};
static struct file_operations full_fops = {
@@ -664,6 +818,7 @@
break;
#endif
case 5:
+ filp->f_mapping->backing_dev_info = &zero_bdi;
filp->f_op = &zero_fops;
break;
case 7:
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/drivers/char/tty_io.c
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c Tue Aug 9 23:57:17 2005
@@ -187,7 +187,7 @@
EXPORT_SYMBOL(tty_name);
-inline int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
+int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
const char *routine)
{
#ifdef TTY_PARANOIA_CHECK
@@ -1791,7 +1791,6 @@
}
#ifdef CONFIG_VT
if (console_use_vt && (device == MKDEV(TTY_MAJOR,0))) {
- extern int fg_console;
extern struct tty_driver *console_driver;
driver = console_driver;
index = fg_console;
@@ -2018,11 +2017,10 @@
return 0;
#ifdef CONFIG_VT
if (tty->driver->type == TTY_DRIVER_TYPE_CONSOLE) {
- unsigned int currcons = tty->index;
int rc;
acquire_console_sem();
- rc = vc_resize(currcons, tmp_ws.ws_col, tmp_ws.ws_row);
+ rc = vc_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row);
release_console_sem();
if (rc)
return -ENXIO;
@@ -2634,6 +2632,7 @@
tty->magic = TTY_MAGIC;
tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
tty->pgrp = -1;
+ tty->overrun_time = jiffies;
tty->flip.char_buf_ptr = tty->flip.char_buf;
tty->flip.flag_buf_ptr = tty->flip.flag_buf;
INIT_WORK(&tty->flip.work, flush_to_ldisc, tty);
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-generic/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-generic/pgtable.h Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-generic/pgtable.h Tue Aug 9
23:57:17 2005
@@ -16,7 +16,7 @@
#ifndef __HAVE_ARCH_SET_PTE_ATOMIC
#define ptep_establish(__vma, __address, __ptep, __entry) \
do { \
- set_pte(__ptep, __entry); \
+ set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
flush_tlb_page(__vma, __address); \
} while (0)
#else /* __HAVE_ARCH_SET_PTE_ATOMIC */
@@ -37,7 +37,7 @@
*/
#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
do { \
- set_pte(__ptep, __entry); \
+ set_pte_at((__vma)>vm_mm, (__address), __ptep, __entry); \
flush_tlb_page(__vma, __address); \
} while (0)
#endif
@@ -53,20 +53,24 @@
#endif
#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(pte_t *ptep)
-{
- pte_t pte = *ptep;
- if (!pte_young(pte))
- return 0;
- set_pte(ptep, pte_mkold(pte));
- return 1;
-}
+#define ptep_test_and_clear_young(__vma, __address, __ptep) \
+({ \
+ pte_t __pte = *(__ptep); \
+ int r = 1; \
+ if (!pte_young(__pte)) \
+ r = 0; \
+ else \
+ set_pte_at((__vma)->vm_mm, (__address), \
+ (__ptep), pte_mkold(__pte)); \
+ r; \
+})
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
#define ptep_clear_flush_young(__vma, __address, __ptep) \
({ \
- int __young = ptep_test_and_clear_young(__ptep); \
+ int __young; \
+ __young = ptep_test_and_clear_young(__vma, __address, __ptep); \
if (__young) \
flush_tlb_page(__vma, __address); \
__young; \
@@ -74,20 +78,24 @@
#endif
#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-static inline int ptep_test_and_clear_dirty(pte_t *ptep)
-{
- pte_t pte = *ptep;
- if (!pte_dirty(pte))
- return 0;
- set_pte(ptep, pte_mkclean(pte));
- return 1;
-}
+#define ptep_test_and_clear_dirty(__vma, __address, __ptep) \
+({ \
+ pte_t __pte = *__ptep; \
+ int r = 1; \
+ if (!pte_dirty(__pte)) \
+ r = 0; \
+ else \
+ set_pte_at((__vma)->vm_mm, (__address), (__ptep), \
+ pte_mkclean(__pte)); \
+ r; \
+})
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
#define ptep_clear_flush_dirty(__vma, __address, __ptep) \
({ \
- int __dirty = ptep_test_and_clear_dirty(__ptep); \
+ int __dirty; \
+ __dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep); \
if (__dirty) \
flush_tlb_page(__vma, __address); \
__dirty; \
@@ -95,36 +103,29 @@
#endif
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(pte_t *ptep)
-{
- pte_t pte = *ptep;
- pte_clear(ptep);
- return pte;
-}
+#define ptep_get_and_clear(__mm, __address, __ptep) \
+({ \
+ pte_t __pte = *(__ptep); \
+ pte_clear((__mm), (__address), (__ptep)); \
+ __pte; \
+})
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
#define ptep_clear_flush(__vma, __address, __ptep) \
({ \
- pte_t __pte = ptep_get_and_clear(__ptep); \
+ pte_t __pte; \
+ __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep); \
flush_tlb_page(__vma, __address); \
__pte; \
})
#endif
#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long
address, pte_t *ptep)
{
pte_t old_pte = *ptep;
- set_pte(ptep, pte_wrprotect(old_pte));
-}
-#endif
-
-#ifndef __HAVE_ARCH_PTEP_MKDIRTY
-static inline void ptep_mkdirty(pte_t *ptep)
-{
- pte_t old_pte = *ptep;
- set_pte(ptep, pte_mkdirty(old_pte));
+ set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
}
#endif
@@ -144,4 +145,77 @@
#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
#endif
+#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
+#define lazy_mmu_prot_update(pte) do { } while (0)
+#endif
+
+/*
+ * When walking page tables, get the address of the next boundary,
+ * or the end address of the range if that comes earlier. Although no
+ * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
+ */
+
+#define pgd_addr_end(addr, end)
\
+({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+
+#ifndef pud_addr_end
+#define pud_addr_end(addr, end)
\
+({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+#endif
+
+#ifndef pmd_addr_end
+#define pmd_addr_end(addr, end)
\
+({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+#endif
+
+#ifndef __ASSEMBLY__
+/*
+ * When walking page tables, we usually want to skip any p?d_none entries;
+ * and any p?d_bad entries - reporting the error before resetting to none.
+ * Do the tests inline, but report and clear the bad entry in mm/memory.c.
+ */
+void pgd_clear_bad(pgd_t *);
+void pud_clear_bad(pud_t *);
+void pmd_clear_bad(pmd_t *);
+
+static inline int pgd_none_or_clear_bad(pgd_t *pgd)
+{
+ if (pgd_none(*pgd))
+ return 1;
+ if (unlikely(pgd_bad(*pgd))) {
+ pgd_clear_bad(pgd);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int pud_none_or_clear_bad(pud_t *pud)
+{
+ if (pud_none(*pud))
+ return 1;
+ if (unlikely(pud_bad(*pud))) {
+ pud_clear_bad(pud);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int pmd_none_or_clear_bad(pmd_t *pmd)
+{
+ if (pmd_none(*pmd))
+ return 1;
+ if (unlikely(pmd_bad(*pmd))) {
+ pmd_clear_bad(pmd);
+ return 1;
+ }
+ return 0;
+}
+#endif /* !__ASSEMBLY__ */
+
#endif /* _ASM_GENERIC_PGTABLE_H */
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h Tue Aug 9
23:57:17 2005
@@ -3,6 +3,8 @@
#include <asm/ldt.h>
#include <asm/segment.h>
+
+#define CPU_16BIT_STACK_SIZE 1024
#ifndef __ASSEMBLY__
@@ -12,6 +14,8 @@
#include <asm/mmu.h>
extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
+
+DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
struct Xgt_desc_struct {
unsigned short size;
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h Tue Aug
9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h Tue Aug
9 23:57:17 2005
@@ -11,7 +11,7 @@
#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, int flag);
+ dma_addr_t *dma_handle, unsigned int __nocast flag);
void dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle);
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h Tue Aug 9
23:57:17 2005
@@ -32,8 +32,6 @@
extern pte_t *kmap_pte;
extern pgprot_t kmap_prot;
extern pte_t *pkmap_page_table;
-
-extern void kmap_init(void);
/*
* Right now we initialize only a single pte table. It can be extended
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h Tue Aug 9
23:57:17 2005
@@ -49,6 +49,17 @@
#include <linux/vmalloc.h>
#include <asm/fixmap.h>
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p) p
/**
* virt_to_phys - map virtual addresses to physical
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h Tue Aug
9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h Tue Aug
9 23:57:17 2005
@@ -64,7 +64,7 @@
}
#define deactivate_mm(tsk, mm) \
- asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
+ asm("mov %0,%%fs ; mov %0,%%gs": :"r" (0))
#define activate_mm(prev, next) do { \
switch_mm((prev),(next),NULL); \
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h Tue Aug 9
23:57:17 2005
@@ -2,7 +2,6 @@
#define _I386_PGALLOC_H
#include <linux/config.h>
-#include <asm/processor.h>
#include <asm/fixmap.h>
#include <linux/threads.h>
#include <linux/mm.h> /* for struct page */
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Tue Aug
9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Tue Aug
9 23:57:17 2005
@@ -16,6 +16,7 @@
#define set_pte_batched(pteptr, pteval) \
queue_l1_entry_update(pteptr, (pteval).pte_low)
#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
@@ -28,14 +29,8 @@
* each domain will have separate page tables, with their own versions of
* accessed & dirty state.
*/
-static inline pte_t ptep_get_and_clear(pte_t *xp)
-{
- pte_t pte = *xp;
- if (pte.pte_low)
- set_pte(xp, __pte_ma(0));
- return pte;
-}
+#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0))
#define pte_same(a, b) ((a).pte_low == (b).pte_low)
/*
* We detect special mappings in one of two ways:
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Tue Aug 9
23:57:17 2005
@@ -64,7 +64,7 @@
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
-#define FIRST_USER_PGD_NR 0
+#define FIRST_USER_ADDRESS 0
#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
@@ -200,15 +200,15 @@
/*
* Define this if things work differently on an i386 and an i486:
* it will (on an i486) warn about kernel memory accesses that are
- * done without a 'verify_area(VERIFY_WRITE,..)'
- */
-#undef TEST_VERIFY_AREA
+ * done without a 'access_ok(VERIFY_WRITE,..)'
+ */
+#undef TEST_ACCESS_OK
/* The boot page tables (all created as a single array) */
extern unsigned long pg0[];
#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0)
+#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); }
while (0)
#define pmd_none(x) (!pmd_val(x))
/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
@@ -252,7 +252,7 @@
# include <asm/pgtable-2level.h>
#endif
-static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
pte_t pte = *ptep;
int ret = pte_dirty(pte);
@@ -261,7 +261,7 @@
return ret;
}
-static inline int ptep_test_and_clear_young(pte_t *ptep)
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
pte_t pte = *ptep;
int ret = pte_young(pte);
@@ -270,18 +270,11 @@
return ret;
}
-static inline void ptep_set_wrprotect(pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long
addr, pte_t *ptep)
{
pte_t pte = *ptep;
if (pte_write(pte))
set_pte(ptep, pte_wrprotect(pte));
-}
-
-static inline void ptep_mkdirty(pte_t *ptep)
-{
- pte_t pte = *ptep;
- if (!pte_dirty(pte))
- xen_l1_entry_update(ptep, pte_mkdirty(pte).pte_low);
}
/*
@@ -495,11 +488,14 @@
#define io_remap_pfn_range(vma,from,pfn,size,prot) \
direct_remap_area_pages(vma->vm_mm,from,pfn<<PAGE_SHIFT,size,prot,DOMID_IO)
+#define MK_IOSPACE_PFN(space, pfn) (pfn)
+#define GET_IOSPACE(pfn) 0
+#define GET_PFN(pfn) (pfn)
+
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTEP_MKDIRTY
#define __HAVE_ARCH_PTE_SAME
#include <asm-generic/pgtable.h>
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h Tue Aug 9
23:57:17 2005
@@ -99,12 +99,12 @@
#endif
extern int phys_proc_id[NR_CPUS];
+extern int cpu_core_id[NR_CPUS];
extern char ignore_fpu_irq;
extern void identify_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern void dodgy_tsc(void);
#ifdef CONFIG_X86_HT
extern void detect_ht(struct cpuinfo_x86 *c);
@@ -138,7 +138,7 @@
* clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
* resulting in stale register contents being returned.
*/
-static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int
*ebx, unsigned int *ecx, unsigned int *edx)
{
__asm__("cpuid"
: "=a" (*eax),
@@ -146,6 +146,18 @@
"=c" (*ecx),
"=d" (*edx)
: "0" (op), "c"(0));
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
+ int *edx)
+{
+ __asm__("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (op), "c" (count));
}
/*
@@ -501,6 +513,13 @@
regs->esp = new_esp; \
} while (0)
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+ HYPERVISOR_set_debugreg((register), \
+ ((thread)->debugreg[register]))
+
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h Tue Aug 9
23:57:17 2005
@@ -38,7 +38,7 @@
* 24 - APM BIOS support
* 25 - APM BIOS support
*
- * 26 - unused
+ * 26 - ESPFIX small SS
* 27 - unused
* 28 - unused
* 29 - unused
@@ -71,6 +71,9 @@
#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6)
#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11)
+#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
+
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
/*
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h Tue Aug 9
23:57:17 2005
@@ -16,7 +16,7 @@
#define MAXMEM_PFN PFN_DOWN(MAXMEM)
#define MAX_NONPAE_PFN (1 << 20)
-#define PARAM_SIZE 2048
+#define PARAM_SIZE 4096
#define COMMAND_LINE_SIZE 256
#define OLD_CL_MAGIC_ADDR 0x90020
diff -r fa660d79f695 -r de310533c483
linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h Tue Aug 9
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h Tue Aug 9
23:57:17 2005
@@ -84,7 +84,7 @@
#define loadsegment(seg,value) \
asm volatile("\n" \
"1:\t" \
- "movl %0,%%" #seg "\n" \
+ "mov %0,%%" #seg "\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3:\t" \
@@ -96,13 +96,13 @@
".align 4\n\t" \
".long 1b,3b\n" \
".previous" \
- : :"m" (*(unsigned int *)&(value)))
+ : :"m" (value))
/*
* Save a segment register away
*/
#define savesegment(seg, value) \
- asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))
+ asm volatile("mov %%" #seg ",%0":"=m" (value))
/*
* Clear and set 'TS' bit respectively
@@ -519,4 +519,6 @@
extern int es7000_plat;
void cpu_idle_wait(void);
+extern unsigned long arch_align_stack(unsigned long sp);
+
#endif
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/include/linux/gfp.h
--- a/linux-2.6-xen-sparse/include/linux/gfp.h Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/linux/gfp.h Tue Aug 9 23:57:17 2005
@@ -26,26 +26,28 @@
*
* __GFP_NORETRY: The VM implementation must not retry indefinitely.
*/
-#define __GFP_WAIT 0x10 /* Can wait and reschedule? */
-#define __GFP_HIGH 0x20 /* Should access emergency pools? */
-#define __GFP_IO 0x40 /* Can start physical IO? */
-#define __GFP_FS 0x80 /* Can call down to low-level FS? */
-#define __GFP_COLD 0x100 /* Cache-cold page required */
-#define __GFP_NOWARN 0x200 /* Suppress page allocation failure warning */
-#define __GFP_REPEAT 0x400 /* Retry the allocation. Might fail */
-#define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */
-#define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */
-#define __GFP_NO_GROW 0x2000 /* Slab internal usage */
-#define __GFP_COMP 0x4000 /* Add compound page metadata */
-#define __GFP_ZERO 0x8000 /* Return zeroed page on success */
+#define __GFP_WAIT 0x10u /* Can wait and reschedule? */
+#define __GFP_HIGH 0x20u /* Should access emergency pools? */
+#define __GFP_IO 0x40u /* Can start physical IO? */
+#define __GFP_FS 0x80u /* Can call down to low-level FS? */
+#define __GFP_COLD 0x100u /* Cache-cold page required */
+#define __GFP_NOWARN 0x200u /* Suppress page allocation failure warning */
+#define __GFP_REPEAT 0x400u /* Retry the allocation. Might fail */
+#define __GFP_NOFAIL 0x800u /* Retry for ever. Cannot fail */
+#define __GFP_NORETRY 0x1000u /* Do not retry. Might fail */
+#define __GFP_NO_GROW 0x2000u /* Slab internal usage */
+#define __GFP_COMP 0x4000u /* Add compound page metadata */
+#define __GFP_ZERO 0x8000u /* Return zeroed page on success */
+#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
-#define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
/* if you forget to add the bitmask here kernel will crash, period */
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
- __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
+ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
+ __GFP_NOMEMALLOC)
#define GFP_ATOMIC (__GFP_HIGH)
#define GFP_NOIO (__GFP_WAIT)
@@ -86,7 +88,7 @@
extern struct page *
FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
-static inline struct page *alloc_pages_node(int nid, unsigned int gfp_mask,
+static inline struct page *alloc_pages_node(int nid, unsigned int __nocast
gfp_mask,
unsigned int order)
{
if (unlikely(order >= MAX_ORDER))
@@ -97,17 +99,17 @@
}
#ifdef CONFIG_NUMA
-extern struct page *alloc_pages_current(unsigned gfp_mask, unsigned order);
+extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask,
unsigned order);
static inline struct page *
-alloc_pages(unsigned int gfp_mask, unsigned int order)
+alloc_pages(unsigned int __nocast gfp_mask, unsigned int order)
{
if (unlikely(order >= MAX_ORDER))
return NULL;
return alloc_pages_current(gfp_mask, order);
}
-extern struct page *alloc_page_vma(unsigned gfp_mask,
+extern struct page *alloc_page_vma(unsigned __nocast gfp_mask,
struct vm_area_struct *vma, unsigned long addr);
#else
#define alloc_pages(gfp_mask, order) \
@@ -116,8 +118,8 @@
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
-extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned
int order));
-extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
+extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask,
unsigned int order));
+extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask));
#define __get_free_page(gfp_mask) \
__get_free_pages((gfp_mask),0)
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/mm/highmem.c
--- a/linux-2.6-xen-sparse/mm/highmem.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/mm/highmem.c Tue Aug 9 23:57:17 2005
@@ -30,9 +30,9 @@
static mempool_t *page_pool, *isa_page_pool;
-static void *page_pool_alloc(int gfp_mask, void *data)
-{
- int gfp = gfp_mask | (int) (long) data;
+static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data)
+{
+ unsigned int gfp = gfp_mask | (unsigned int) (long) data;
return alloc_page(gfp);
}
@@ -90,7 +90,8 @@
* So no dangers, even with speculative execution.
*/
page = pte_page(pkmap_page_table[i]);
- pte_clear(&pkmap_page_table[i]);
+ pte_clear(&init_mm, (unsigned long)page_address(page),
+ &pkmap_page_table[i]);
set_page_address(page, NULL);
}
@@ -138,7 +139,8 @@
}
}
vaddr = PKMAP_ADDR(last_pkmap_nr);
- set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
+ set_pte_at(&init_mm, vaddr,
+ &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
pkmap_count[last_pkmap_nr] = 1;
set_page_address(page, (void *)vaddr);
@@ -332,6 +334,7 @@
continue;
mempool_free(bvec->bv_page, pool);
+ dec_page_state(nr_bounce);
}
bio_endio(bio_orig, bio_orig->bi_size, err);
@@ -412,6 +415,7 @@
to->bv_page = mempool_alloc(pool, q->bounce_gfp);
to->bv_len = from->bv_len;
to->bv_offset = from->bv_offset;
+ inc_page_state(nr_bounce);
if (rw == WRITE) {
char *vto, *vfrom;
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/mm/memory.c Tue Aug 9 23:57:17 2005
@@ -46,7 +46,6 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
-#include <linux/acct.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -84,116 +83,205 @@
EXPORT_SYMBOL(vmalloc_earlyreserve);
/*
+ * If a p?d_bad entry is found while walking page tables, report
+ * the error, before resetting entry to p?d_none. Usually (but
+ * very seldom) called out from the p?d_none_or_clear_bad macros.
+ */
+
+void pgd_clear_bad(pgd_t *pgd)
+{
+ pgd_ERROR(*pgd);
+ pgd_clear(pgd);
+}
+
+void pud_clear_bad(pud_t *pud)
+{
+ pud_ERROR(*pud);
+ pud_clear(pud);
+}
+
+void pmd_clear_bad(pmd_t *pmd)
+{
+ pmd_ERROR(*pmd);
+ pmd_clear(pmd);
+}
+
+/*
* Note: this doesn't free the actual pages themselves. That
* has been handled earlier when unmapping all the memory regions.
*/
-static inline void clear_pmd_range(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long start, unsigned long end)
-{
- struct page *page;
-
- if (pmd_none(*pmd))
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
+{
+ struct page *page = pmd_page(*pmd);
+ pmd_clear(pmd);
+ pte_free_tlb(tlb, page);
+ dec_page_state(nr_page_table_pages);
+ tlb->mm->nr_ptes--;
+}
+
+static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
+{
+ pmd_t *pmd;
+ unsigned long next;
+ unsigned long start;
+
+ start = addr;
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ free_pte_range(tlb, pmd);
+ } while (pmd++, addr = next, addr != end);
+
+ start &= PUD_MASK;
+ if (start < floor)
return;
- if (unlikely(pmd_bad(*pmd))) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
+ if (ceiling) {
+ ceiling &= PUD_MASK;
+ if (!ceiling)
+ return;
+ }
+ if (end - 1 > ceiling - 1)
return;
- }
- if (!((start | end) & ~PMD_MASK)) {
- /* Only clear full, aligned ranges */
- page = pmd_page(*pmd);
- pmd_clear(pmd);
- dec_page_state(nr_page_table_pages);
- tlb->mm->nr_ptes--;
- pte_free_tlb(tlb, page);
- }
-}
-
-static inline void clear_pud_range(struct mmu_gather *tlb, pud_t *pud,
unsigned long start, unsigned long end)
-{
- unsigned long addr = start, next;
- pmd_t *pmd, *__pmd;
-
- if (pud_none(*pud))
+
+ pmd = pmd_offset(pud, start);
+ pud_clear(pud);
+ pmd_free_tlb(tlb, pmd);
+}
+
+static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
+{
+ pud_t *pud;
+ unsigned long next;
+ unsigned long start;
+
+ start = addr;
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+ } while (pud++, addr = next, addr != end);
+
+ start &= PGDIR_MASK;
+ if (start < floor)
return;
- if (unlikely(pud_bad(*pud))) {
- pud_ERROR(*pud);
- pud_clear(pud);
+ if (ceiling) {
+ ceiling &= PGDIR_MASK;
+ if (!ceiling)
+ return;
+ }
+ if (end - 1 > ceiling - 1)
return;
- }
-
- pmd = __pmd = pmd_offset(pud, start);
+
+ pud = pud_offset(pgd, start);
+ pgd_clear(pgd);
+ pud_free_tlb(tlb, pud);
+}
+
+/*
+ * This function frees user-level page tables of a process.
+ *
+ * Must be called with pagetable lock held.
+ */
+void free_pgd_range(struct mmu_gather **tlb,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
+{
+ pgd_t *pgd;
+ unsigned long next;
+ unsigned long start;
+
+ /*
+ * The next few lines have given us lots of grief...
+ *
+ * Why are we testing PMD* at this top level? Because often
+ * there will be no work to do at all, and we'd prefer not to
+ * go all the way down to the bottom just to discover that.
+ *
+ * Why all these "- 1"s? Because 0 represents both the bottom
+ * of the address space and the top of it (using -1 for the
+ * top wouldn't help much: the masks would do the wrong thing).
+ * The rule is that addr 0 and floor 0 refer to the bottom of
+ * the address space, but end 0 and ceiling 0 refer to the top
+ * Comparisons need to use "end - 1" and "ceiling - 1" (though
+ * that end 0 case should be mythical).
+ *
+ * Wherever addr is brought up or ceiling brought down, we must
+ * be careful to reject "the opposite 0" before it confuses the
+ * subsequent tests. But what about where end is brought down
+ * by PMD_SIZE below? no, end can't go down to 0 there.
+ *
+ * Whereas we round start (addr) and ceiling down, by different
+ * masks at different levels, in order to test whether a table
+ * now has no other vmas using it, so can be freed, we don't
+ * bother to round floor or end up - the tests don't need that.
+ */
+
+ addr &= PMD_MASK;
+ if (addr < floor) {
+ addr += PMD_SIZE;
+ if (!addr)
+ return;
+ }
+ if (ceiling) {
+ ceiling &= PMD_MASK;
+ if (!ceiling)
+ return;
+ }
+ if (end - 1 > ceiling - 1)
+ end -= PMD_SIZE;
+ if (addr > end - 1)
+ return;
+
+ start = addr;
+ pgd = pgd_offset((*tlb)->mm, addr);
do {
- next = (addr + PMD_SIZE) & PMD_MASK;
- if (next > end || next <= addr)
- next = end;
-
- clear_pmd_range(tlb, pmd, addr, next);
- pmd++;
- addr = next;
- } while (addr && (addr < end));
-
- if (!((start | end) & ~PUD_MASK)) {
- /* Only clear full, aligned ranges */
- pud_clear(pud);
- pmd_free_tlb(tlb, __pmd);
- }
-}
-
-
-static inline void clear_pgd_range(struct mmu_gather *tlb, pgd_t *pgd,
unsigned long start, unsigned long end)
-{
- unsigned long addr = start, next;
- pud_t *pud, *__pud;
-
- if (pgd_none(*pgd))
- return;
- if (unlikely(pgd_bad(*pgd))) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
- return;
- }
-
- pud = __pud = pud_offset(pgd, start);
- do {
- next = (addr + PUD_SIZE) & PUD_MASK;
- if (next > end || next <= addr)
- next = end;
-
- clear_pud_range(tlb, pud, addr, next);
- pud++;
- addr = next;
- } while (addr && (addr < end));
-
- if (!((start | end) & ~PGDIR_MASK)) {
- /* Only clear full, aligned ranges */
- pgd_clear(pgd);
- pud_free_tlb(tlb, __pud);
- }
-}
-
-/*
- * This function clears user-level page tables of a process.
- *
- * Must be called with pagetable lock held.
- */
-void clear_page_range(struct mmu_gather *tlb, unsigned long start, unsigned
long end)
-{
- unsigned long addr = start, next;
- pgd_t * pgd = pgd_offset(tlb->mm, start);
- unsigned long i;
-
- for (i = pgd_index(start); i <= pgd_index(end-1); i++) {
- next = (addr + PGDIR_SIZE) & PGDIR_MASK;
- if (next > end || next <= addr)
- next = end;
-
- clear_pgd_range(tlb, pgd, addr, next);
- pgd++;
- addr = next;
- }
-}
-
-pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long
address)
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+ } while (pgd++, addr = next, addr != end);
+
+ if (!tlb_is_full_mm(*tlb))
+ flush_tlb_pgtables((*tlb)->mm, start, end);
+}
+
+void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
+ unsigned long floor, unsigned long ceiling)
+{
+ while (vma) {
+ struct vm_area_struct *next = vma->vm_next;
+ unsigned long addr = vma->vm_start;
+
+ if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
+ hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
+ floor, next? next->vm_start: ceiling);
+ } else {
+ /*
+ * Optimization: gather nearby vmas into one call down
+ */
+ while (next && next->vm_start <= vma->vm_end + PMD_SIZE
+ && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
+ HPAGE_SIZE)) {
+ vma = next;
+ next = vma->vm_next;
+ }
+ free_pgd_range(tlb, addr, vma->vm_end,
+ floor, next? next->vm_start: ceiling);
+ }
+ vma = next;
+ }
+}
+
+pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long address)
{
if (!pmd_present(*pmd)) {
struct page *new;
@@ -254,20 +342,7 @@
*/
static inline void
-copy_swap_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t pte)
-{
- if (pte_file(pte))
- return;
- swap_duplicate(pte_to_swp_entry(pte));
- if (list_empty(&dst_mm->mmlist)) {
- spin_lock(&mmlist_lock);
- list_add(&dst_mm->mmlist, &src_mm->mmlist);
- spin_unlock(&mmlist_lock);
- }
-}
-
-static inline void
-copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
unsigned long addr)
{
@@ -275,12 +350,21 @@
struct page *page;
unsigned long pfn;
- /* pte contains position in swap, so copy. */
- if (!pte_present(pte)) {
- copy_swap_pte(dst_mm, src_mm, pte);
- set_pte(dst_pte, pte);
+ /* pte contains position in swap or file, so copy. */
+ if (unlikely(!pte_present(pte))) {
+ if (!pte_file(pte)) {
+ swap_duplicate(pte_to_swp_entry(pte));
+ /* make sure dst_mm is on swapoff's mmlist. */
+ if (unlikely(list_empty(&dst_mm->mmlist))) {
+ spin_lock(&mmlist_lock);
+ list_add(&dst_mm->mmlist, &src_mm->mmlist);
+ spin_unlock(&mmlist_lock);
+ }
+ }
+ set_pte_at(dst_mm, addr, dst_pte, pte);
return;
}
+
pfn = pte_pfn(pte);
/* the pte points outside of valid memory, the
* mapping is assumed to be good, meaningful
@@ -292,7 +376,7 @@
page = pfn_to_page(pfn);
if (!page || PageReserved(page)) {
- set_pte(dst_pte, pte);
+ set_pte_at(dst_mm, addr, dst_pte, pte);
return;
}
@@ -301,7 +385,7 @@
* in the parent and the child
*/
if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
- ptep_set_wrprotect(src_pte);
+ ptep_set_wrprotect(src_mm, addr, src_pte);
pte = *src_pte;
}
@@ -313,172 +397,137 @@
pte = pte_mkclean(pte);
pte = pte_mkold(pte);
get_page(page);
- dst_mm->rss++;
+ inc_mm_counter(dst_mm, rss);
if (PageAnon(page))
- dst_mm->anon_rss++;
- set_pte(dst_pte, pte);
+ inc_mm_counter(dst_mm, anon_rss);
+ set_pte_at(dst_mm, addr, dst_pte, pte);
page_dup_rmap(page);
}
-static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
pte_t *src_pte, *dst_pte;
- pte_t *s, *d;
unsigned long vm_flags = vma->vm_flags;
-
- d = dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
+ int progress;
+
+again:
+ dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
if (!dst_pte)
return -ENOMEM;
-
+ src_pte = pte_offset_map_nested(src_pmd, addr);
+
+ progress = 0;
spin_lock(&src_mm->page_table_lock);
- s = src_pte = pte_offset_map_nested(src_pmd, addr);
- for (; addr < end; addr += PAGE_SIZE, s++, d++) {
- if (pte_none(*s))
+ do {
+ /*
+ * We are holding two locks at this point - either of them
+ * could generate latencies in another task on another CPU.
+ */
+ if (progress >= 32 && (need_resched() ||
+ need_lockbreak(&src_mm->page_table_lock) ||
+ need_lockbreak(&dst_mm->page_table_lock)))
+ break;
+ if (pte_none(*src_pte)) {
+ progress++;
continue;
- copy_one_pte(dst_mm, src_mm, d, s, vm_flags, addr);
- }
- pte_unmap_nested(src_pte);
- pte_unmap(dst_pte);
+ }
+ copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
+ progress += 8;
+ } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
spin_unlock(&src_mm->page_table_lock);
+
+ pte_unmap_nested(src_pte - 1);
+ pte_unmap(dst_pte - 1);
cond_resched_lock(&dst_mm->page_table_lock);
+ if (addr != end)
+ goto again;
return 0;
}
-static int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct
*src_mm,
pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
pmd_t *src_pmd, *dst_pmd;
- int err = 0;
unsigned long next;
- src_pmd = pmd_offset(src_pud, addr);
dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
if (!dst_pmd)
return -ENOMEM;
-
- for (; addr < end; addr = next, src_pmd++, dst_pmd++) {
- next = (addr + PMD_SIZE) & PMD_MASK;
- if (next > end || next <= addr)
- next = end;
- if (pmd_none(*src_pmd))
+ src_pmd = pmd_offset(src_pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(src_pmd))
continue;
- if (pmd_bad(*src_pmd)) {
- pmd_ERROR(*src_pmd);
- pmd_clear(src_pmd);
- continue;
- }
- err = copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
- vma, addr, next);
- if (err)
- break;
- }
- return err;
-}
-
-static int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
+ vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+ return 0;
+}
+
+static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct
*src_mm,
pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
pud_t *src_pud, *dst_pud;
- int err = 0;
unsigned long next;
- src_pud = pud_offset(src_pgd, addr);
dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
if (!dst_pud)
return -ENOMEM;
-
- for (; addr < end; addr = next, src_pud++, dst_pud++) {
- next = (addr + PUD_SIZE) & PUD_MASK;
- if (next > end || next <= addr)
- next = end;
- if (pud_none(*src_pud))
+ src_pud = pud_offset(src_pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(src_pud))
continue;
- if (pud_bad(*src_pud)) {
- pud_ERROR(*src_pud);
- pud_clear(src_pud);
+ if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
+ vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pud++, src_pud++, addr = next, addr != end);
+ return 0;
+}
+
+int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ struct vm_area_struct *vma)
+{
+ pgd_t *src_pgd, *dst_pgd;
+ unsigned long next;
+ unsigned long addr = vma->vm_start;
+ unsigned long end = vma->vm_end;
+
+ if (is_vm_hugetlb_page(vma))
+ return copy_hugetlb_page_range(dst_mm, src_mm, vma);
+
+ dst_pgd = pgd_offset(dst_mm, addr);
+ src_pgd = pgd_offset(src_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(src_pgd))
continue;
- }
- err = copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
- vma, addr, next);
- if (err)
- break;
- }
- return err;
-}
-
-int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
- struct vm_area_struct *vma)
-{
- pgd_t *src_pgd, *dst_pgd;
- unsigned long addr, start, end, next;
- int err = 0;
-
- if (is_vm_hugetlb_page(vma))
- return copy_hugetlb_page_range(dst, src, vma);
-
- start = vma->vm_start;
- src_pgd = pgd_offset(src, start);
- dst_pgd = pgd_offset(dst, start);
-
- end = vma->vm_end;
- addr = start;
- while (addr && (addr < end-1)) {
- next = (addr + PGDIR_SIZE) & PGDIR_MASK;
- if (next > end || next <= addr)
- next = end;
- if (pgd_none(*src_pgd))
- goto next_pgd;
- if (pgd_bad(*src_pgd)) {
- pgd_ERROR(*src_pgd);
- pgd_clear(src_pgd);
- goto next_pgd;
- }
- err = copy_pud_range(dst, src, dst_pgd, src_pgd,
- vma, addr, next);
- if (err)
- break;
-
-next_pgd:
- src_pgd++;
- dst_pgd++;
- addr = next;
- }
-
- return err;
-}
-
-static void zap_pte_range(struct mmu_gather *tlb,
- pmd_t *pmd, unsigned long address,
- unsigned long size, struct zap_details *details)
-{
- unsigned long offset;
- pte_t *ptep;
-
- if (pmd_none(*pmd))
- return;
- if (unlikely(pmd_bad(*pmd))) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
- return;
- }
- ptep = pte_offset_map(pmd, address);
- offset = address & ~PMD_MASK;
- if (offset + size > PMD_SIZE)
- size = PMD_SIZE - offset;
- size &= PAGE_MASK;
- if (details && !details->check_mapping && !details->nonlinear_vma)
- details = NULL;
- for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
- pte_t pte = *ptep;
- if (pte_none(pte))
+ if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+ vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+ return 0;
+}
+
+static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
+{
+ pte_t *pte;
+
+ pte = pte_offset_map(pmd, addr);
+ do {
+ pte_t ptent = *pte;
+ if (pte_none(ptent))
continue;
- if (pte_present(pte)) {
+ if (pte_present(ptent)) {
struct page *page = NULL;
- unsigned long pfn = pte_pfn(pte);
+ unsigned long pfn = pte_pfn(ptent);
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
if (PageReserved(page))
@@ -502,19 +551,20 @@
page->index > details->last_index))
continue;
}
- pte = ptep_get_and_clear(ptep);
- tlb_remove_tlb_entry(tlb, ptep, address+offset);
+ ptent = ptep_get_and_clear(tlb->mm, addr, pte);
+ tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
continue;
if (unlikely(details) && details->nonlinear_vma
&& linear_page_index(details->nonlinear_vma,
- address+offset) != page->index)
- set_pte(ptep, pgoff_to_pte(page->index));
- if (pte_dirty(pte))
+ addr) != page->index)
+ set_pte_at(tlb->mm, addr, pte,
+ pgoff_to_pte(page->index));
+ if (pte_dirty(ptent))
set_page_dirty(page);
if (PageAnon(page))
- tlb->mm->anon_rss--;
- else if (pte_young(pte))
+ dec_mm_counter(tlb->mm, anon_rss);
+ else if (pte_young(ptent))
mark_page_accessed(page);
tlb->freed++;
page_remove_rmap(page);
@@ -527,78 +577,64 @@
*/
if (unlikely(details))
continue;
- if (!pte_file(pte))
- free_swap_and_cache(pte_to_swp_entry(pte));
- pte_clear(ptep);
- }
- pte_unmap(ptep-1);
-}
-
-static void zap_pmd_range(struct mmu_gather *tlb,
- pud_t *pud, unsigned long address,
- unsigned long size, struct zap_details *details)
-{
- pmd_t * pmd;
- unsigned long end;
-
- if (pud_none(*pud))
- return;
- if (unlikely(pud_bad(*pud))) {
- pud_ERROR(*pud);
- pud_clear(pud);
- return;
- }
- pmd = pmd_offset(pud, address);
- end = address + size;
- if (end > ((address + PUD_SIZE) & PUD_MASK))
- end = ((address + PUD_SIZE) & PUD_MASK);
+ if (!pte_file(ptent))
+ free_swap_and_cache(pte_to_swp_entry(ptent));
+ pte_clear(tlb->mm, addr, pte);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(pte - 1);
+}
+
+static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_offset(pud, addr);
do {
- zap_pte_range(tlb, pmd, address, end - address, details);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
-}
-
-static void zap_pud_range(struct mmu_gather *tlb,
- pgd_t * pgd, unsigned long address,
- unsigned long end, struct zap_details *details)
-{
- pud_t * pud;
-
- if (pgd_none(*pgd))
- return;
- if (unlikely(pgd_bad(*pgd))) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
- return;
- }
- pud = pud_offset(pgd, address);
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ zap_pte_range(tlb, pmd, addr, next, details);
+ } while (pmd++, addr = next, addr != end);
+}
+
+static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_offset(pgd, addr);
do {
- zap_pmd_range(tlb, pud, address, end - address, details);
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
-}
-
-static void unmap_page_range(struct mmu_gather *tlb,
- struct vm_area_struct *vma, unsigned long address,
- unsigned long end, struct zap_details *details)
-{
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ zap_pmd_range(tlb, pud, addr, next, details);
+ } while (pud++, addr = next, addr != end);
+}
+
+static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct
*vma,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
+{
+ pgd_t *pgd;
unsigned long next;
- pgd_t *pgd;
- int i;
-
- BUG_ON(address >= end);
- pgd = pgd_offset(vma->vm_mm, address);
+
+ if (details && !details->check_mapping && !details->nonlinear_vma)
+ details = NULL;
+
+ BUG_ON(addr >= end);
tlb_start_vma(tlb, vma);
- for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
- next = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (next <= address || next > end)
- next = end;
- zap_pud_range(tlb, pgd, address, next, details);
- address = next;
- pgd++;
- }
+ pgd = pgd_offset(vma->vm_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ zap_pud_range(tlb, pgd, addr, next, details);
+ } while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
}
@@ -619,7 +655,7 @@
* @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
* @details: details of nonlinear truncation or shared cache invalidation
*
- * Returns the number of vma's which were covered by the unmapping.
+ * Returns the end address of the unmapping (restart addr if interrupted).
*
* Unmap all pages in the vma list. Called under page_table_lock.
*
@@ -636,7 +672,7 @@
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
-int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *details)
@@ -644,12 +680,11 @@
unsigned long zap_bytes = ZAP_BLOCK_SIZE;
unsigned long tlb_start = 0; /* For tlb_finish_mmu */
int tlb_start_valid = 0;
- int ret = 0;
+ unsigned long start = start_addr;
spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
int fullmm = tlb_is_full_mm(*tlbp);
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
- unsigned long start;
unsigned long end;
start = max(vma->vm_start, start_addr);
@@ -662,7 +697,6 @@
if (vma->vm_flags & VM_ACCOUNT)
*nr_accounted += (end - start) >> PAGE_SHIFT;
- ret++;
while (start != end) {
unsigned long block;
@@ -693,7 +727,6 @@
if (i_mmap_lock) {
/* must reset count of rss freed */
*tlbp = tlb_gather_mmu(mm, fullmm);
- details->break_addr = start;
goto out;
}
spin_unlock(&mm->page_table_lock);
@@ -707,7 +740,7 @@
}
}
out:
- return ret;
+ return start; /* which is now the end (or restart) address */
}
/**
@@ -717,7 +750,7 @@
* @size: number of bytes to zap
* @details: details of nonlinear truncation or shared cache invalidation
*/
-void zap_page_range(struct vm_area_struct *vma, unsigned long address,
+unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *details)
{
struct mm_struct *mm = vma->vm_mm;
@@ -727,16 +760,16 @@
if (is_vm_hugetlb_page(vma)) {
zap_hugepage_range(vma, address, size);
- return;
+ return end;
}
lru_add_drain();
spin_lock(&mm->page_table_lock);
tlb = tlb_gather_mmu(mm, 0);
- unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
+ end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
tlb_finish_mmu(tlb, address, end);
- acct_update_integrals();
spin_unlock(&mm->page_table_lock);
+ return end;
}
/*
@@ -987,111 +1020,78 @@
EXPORT_SYMBOL(get_user_pages);
-static void zeromap_pte_range(pte_t * pte, unsigned long address,
- unsigned long size, pgprot_t prot)
-{
- unsigned long end;
-
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
+static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+{
+ pte_t *pte;
+
+ pte = pte_alloc_map(mm, pmd, addr);
+ if (!pte)
+ return -ENOMEM;
do {
- pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address),
prot));
+ pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot));
BUG_ON(!pte_none(*pte));
- set_pte(pte, zero_pte);
- address += PAGE_SIZE;
- pte++;
- } while (address && (address < end));
-}
-
-static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd,
- unsigned long address, unsigned long size, pgprot_t prot)
-{
- unsigned long base, end;
-
- base = address & PUD_MASK;
- address &= ~PUD_MASK;
- end = address + size;
- if (end > PUD_SIZE)
- end = PUD_SIZE;
+ set_pte_at(mm, addr, pte, zero_pte);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(pte - 1);
+ return 0;
+}
+
+static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
do {
- pte_t * pte = pte_alloc_map(mm, pmd, base + address);
- if (!pte)
+ next = pmd_addr_end(addr, end);
+ if (zeromap_pte_range(mm, pmd, addr, next, prot))
return -ENOMEM;
- zeromap_pte_range(pte, base + address, end - address, prot);
- pte_unmap(pte);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
+ } while (pmd++, addr = next, addr != end);
return 0;
}
-static inline int zeromap_pud_range(struct mm_struct *mm, pud_t * pud,
- unsigned long address,
- unsigned long size, pgprot_t prot)
-{
- unsigned long base, end;
- int error = 0;
-
- base = address & PGDIR_MASK;
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
+static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
do {
- pmd_t * pmd = pmd_alloc(mm, pud, base + address);
- error = -ENOMEM;
- if (!pmd)
+ next = pud_addr_end(addr, end);
+ if (zeromap_pmd_range(mm, pud, addr, next, prot))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+}
+
+int zeromap_page_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long size, pgprot_t prot)
+{
+ pgd_t *pgd;
+ unsigned long next;
+ unsigned long end = addr + size;
+ struct mm_struct *mm = vma->vm_mm;
+ int err;
+
+ BUG_ON(addr >= end);
+ pgd = pgd_offset(mm, addr);
+ flush_cache_range(vma, addr, end);
+ spin_lock(&mm->page_table_lock);
+ do {
+ next = pgd_addr_end(addr, end);
+ err = zeromap_pud_range(mm, pgd, addr, next, prot);
+ if (err)
break;
- error = zeromap_pmd_range(mm, pmd, base + address,
- end - address, prot);
- if (error)
- break;
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
- return 0;
-}
-
-int zeromap_page_range(struct vm_area_struct *vma, unsigned long address,
- unsigned long size, pgprot_t prot)
-{
- int i;
- int error = 0;
- pgd_t * pgd;
- unsigned long beg = address;
- unsigned long end = address + size;
- unsigned long next;
- struct mm_struct *mm = vma->vm_mm;
-
- pgd = pgd_offset(mm, address);
- flush_cache_range(vma, beg, end);
- BUG_ON(address >= end);
- BUG_ON(end > vma->vm_end);
-
- spin_lock(&mm->page_table_lock);
- for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
- pud_t *pud = pud_alloc(mm, pgd, address);
- error = -ENOMEM;
- if (!pud)
- break;
- next = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (next <= beg || next > end)
- next = end;
- error = zeromap_pud_range(mm, pud, address,
- next - address, prot);
- if (error)
- break;
- address = next;
- pgd++;
- }
- /*
- * Why flush? zeromap_pte_range has a BUG_ON for !pte_none()
- */
- flush_tlb_range(vma, beg, end);
+ } while (pgd++, addr = next, addr != end);
spin_unlock(&mm->page_table_lock);
- return error;
+ return err;
}
/*
@@ -1099,95 +1099,74 @@
* mappings are removed. any references to nonexistent pages results
* in null mappings (currently treated as "copy-on-access")
*/
-static inline void
-remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
- unsigned long pfn, pgprot_t prot)
-{
- unsigned long end;
-
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
+static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
+{
+ pte_t *pte;
+
+ pte = pte_alloc_map(mm, pmd, addr);
+ if (!pte)
+ return -ENOMEM;
do {
BUG_ON(!pte_none(*pte));
if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
- set_pte(pte, pfn_pte(pfn, prot));
- address += PAGE_SIZE;
+ set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
pfn++;
- pte++;
- } while (address && (address < end));
-}
-
-static inline int
-remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
- unsigned long size, unsigned long pfn, pgprot_t prot)
-{
- unsigned long base, end;
-
- base = address & PUD_MASK;
- address &= ~PUD_MASK;
- end = address + size;
- if (end > PUD_SIZE)
- end = PUD_SIZE;
- pfn -= (address >> PAGE_SHIFT);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(pte - 1);
+ return 0;
+}
+
+static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pfn -= addr >> PAGE_SHIFT;
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
do {
- pte_t * pte = pte_alloc_map(mm, pmd, base + address);
- if (!pte)
+ next = pmd_addr_end(addr, end);
+ if (remap_pte_range(mm, pmd, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot))
return -ENOMEM;
- remap_pte_range(pte, base + address, end - address,
- (address >> PAGE_SHIFT) + pfn, prot);
- pte_unmap(pte);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
+ } while (pmd++, addr = next, addr != end);
return 0;
}
-static inline int remap_pud_range(struct mm_struct *mm, pud_t * pud,
- unsigned long address, unsigned long size,
- unsigned long pfn, pgprot_t prot)
-{
- unsigned long base, end;
- int error;
-
- base = address & PGDIR_MASK;
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- pfn -= address >> PAGE_SHIFT;
+static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pfn -= addr >> PAGE_SHIFT;
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
do {
- pmd_t *pmd = pmd_alloc(mm, pud, base+address);
- error = -ENOMEM;
- if (!pmd)
- break;
- error = remap_pmd_range(mm, pmd, base + address, end - address,
- (address >> PAGE_SHIFT) + pfn, prot);
- if (error)
- break;
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
- return error;
+ next = pud_addr_end(addr, end);
+ if (remap_pmd_range(mm, pud, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
+ return 0;
}
/* Note: this is only safe if the mm semaphore is held when called. */
-int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
+int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
- int error = 0;
pgd_t *pgd;
- unsigned long beg = from;
- unsigned long end = from + size;
unsigned long next;
+ unsigned long end = addr + size;
struct mm_struct *mm = vma->vm_mm;
- int i;
-
- pfn -= from >> PAGE_SHIFT;
- pgd = pgd_offset(mm, from);
- flush_cache_range(vma, beg, end);
- BUG_ON(from >= end);
+ int err;
/*
* Physically remapped pages are special. Tell the
@@ -1199,31 +1178,21 @@
*/
vma->vm_flags |= VM_IO | VM_RESERVED;
+ BUG_ON(addr >= end);
+ pfn -= addr >> PAGE_SHIFT;
+ pgd = pgd_offset(mm, addr);
+ flush_cache_range(vma, addr, end);
spin_lock(&mm->page_table_lock);
- for (i = pgd_index(beg); i <= pgd_index(end-1); i++) {
- pud_t *pud = pud_alloc(mm, pgd, from);
- error = -ENOMEM;
- if (!pud)
+ do {
+ next = pgd_addr_end(addr, end);
+ err = remap_pud_range(mm, pgd, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot);
+ if (err)
break;
- next = (from + PGDIR_SIZE) & PGDIR_MASK;
- if (next > end || next <= from)
- next = end;
- error = remap_pud_range(mm, pud, from, end - from,
- pfn + (from >> PAGE_SHIFT), prot);
- if (error)
- break;
- from = next;
- pgd++;
- }
- /*
- * Why flush? remap_pte_range has a BUG_ON for !pte_none()
- */
- flush_tlb_range(vma, beg, end);
+ } while (pgd++, addr = next, addr != end);
spin_unlock(&mm->page_table_lock);
-
- return error;
-}
-
+ return err;
+}
EXPORT_SYMBOL(remap_pfn_range);
/*
@@ -1247,11 +1216,11 @@
{
pte_t entry;
- flush_cache_page(vma, address);
entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
vma);
ptep_establish(vma, address, page_table, entry);
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
}
/*
@@ -1299,11 +1268,12 @@
int reuse = can_share_swap_page(old_page);
unlock_page(old_page);
if (reuse) {
- flush_cache_page(vma, address);
+ flush_cache_page(vma, address, pfn);
entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
vma);
ptep_set_access_flags(vma, address, page_table, entry,
1);
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
return VM_FAULT_MINOR;
@@ -1337,13 +1307,12 @@
page_table = pte_offset_map(pmd, address);
if (likely(pte_same(*page_table, pte))) {
if (PageAnon(old_page))
- mm->anon_rss--;
- if (PageReserved(old_page)) {
- ++mm->rss;
- acct_update_integrals();
- update_mem_hiwater();
- } else
+ dec_mm_counter(mm, anon_rss);
+ if (PageReserved(old_page))
+ inc_mm_counter(mm, rss);
+ else
page_remove_rmap(old_page);
+ flush_cache_page(vma, address, pfn);
break_cow(vma, new_page, address, page_table);
lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, vma, address);
@@ -1387,7 +1356,7 @@
* i_mmap_lock.
*
* In order to make forward progress despite repeatedly restarting some
- * large vma, note the break_addr set by unmap_vmas when it breaks out:
+ * large vma, note the restart_addr from unmap_vmas when it breaks out:
* and restart from that address when we reach that vma again. It might
* have been split or merged, shrunk or extended, but never shifted: so
* restart_addr remains valid so long as it remains in the vma's range.
@@ -1425,8 +1394,8 @@
}
}
- details->break_addr = end_addr;
- zap_page_range(vma, start_addr, end_addr - start_addr, details);
+ restart_addr = zap_page_range(vma, start_addr,
+ end_addr - start_addr, details);
/*
* We cannot rely on the break test in unmap_vmas:
@@ -1437,14 +1406,14 @@
need_break = need_resched() ||
need_lockbreak(details->i_mmap_lock);
- if (details->break_addr >= end_addr) {
+ if (restart_addr >= end_addr) {
/* We have now completed this vma: mark it so */
vma->vm_truncate_count = details->truncate_count;
if (!need_break)
return 0;
} else {
/* Note restart_addr in vma's truncate_count field */
- vma->vm_truncate_count = details->break_addr;
+ vma->vm_truncate_count = restart_addr;
if (!need_break)
goto again;
}
@@ -1732,12 +1701,13 @@
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
if (unlikely(!pte_same(*page_table, orig_pte))) {
- pte_unmap(page_table);
- spin_unlock(&mm->page_table_lock);
- unlock_page(page);
- page_cache_release(page);
ret = VM_FAULT_MINOR;
- goto out;
+ goto out_nomap;
+ }
+
+ if (unlikely(!PageUptodate(page))) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_nomap;
}
/* The page isn't present yet, go ahead with the fault. */
@@ -1746,10 +1716,7 @@
if (vm_swap_full())
remove_exclusive_swap_page(page);
- mm->rss++;
- acct_update_integrals();
- update_mem_hiwater();
-
+ inc_mm_counter(mm, rss);
pte = mk_pte(page, vma->vm_page_prot);
if (write_access && can_share_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1758,7 +1725,7 @@
unlock_page(page);
flush_icache_page(vma, page);
- set_pte(page_table, pte);
+ set_pte_at(mm, address, page_table, pte);
page_add_anon_rmap(page, vma, address);
if (write_access) {
@@ -1770,10 +1737,17 @@
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte);
+ lazy_mmu_prot_update(pte);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
out:
return ret;
+out_nomap:
+ pte_unmap(page_table);
+ spin_unlock(&mm->page_table_lock);
+ unlock_page(page);
+ page_cache_release(page);
+ goto out;
}
/*
@@ -1813,9 +1787,7 @@
spin_unlock(&mm->page_table_lock);
goto out;
}
- mm->rss++;
- acct_update_integrals();
- update_mem_hiwater();
+ inc_mm_counter(mm, rss);
entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
vma->vm_page_prot)),
vma);
@@ -1824,11 +1796,12 @@
page_add_anon_rmap(page, vma, addr);
}
- ptep_establish_new(vma, addr, page_table, entry);
+ set_pte_at(mm, addr, page_table, entry);
pte_unmap(page_table);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, entry);
+ lazy_mmu_prot_update(entry);
spin_unlock(&mm->page_table_lock);
out:
return VM_FAULT_MINOR;
@@ -1931,15 +1904,13 @@
/* Only go through if we didn't race with anybody else... */
if (pte_none(*page_table)) {
if (!PageReserved(new_page))
- ++mm->rss;
- acct_update_integrals();
- update_mem_hiwater();
+ inc_mm_counter(mm, rss);
flush_icache_page(vma, new_page);
entry = mk_pte(new_page, vma->vm_page_prot);
if (write_access)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- ptep_establish_new(vma, address, page_table, entry);
+ set_pte_at(mm, address, page_table, entry);
if (anon) {
lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, vma, address);
@@ -1956,6 +1927,7 @@
/* no need to invalidate: a not-present page shouldn't be cached */
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
spin_unlock(&mm->page_table_lock);
out:
return ret;
@@ -1983,7 +1955,7 @@
*/
if (!vma->vm_ops || !vma->vm_ops->populate ||
(write_access && !(vma->vm_flags & VM_SHARED))) {
- pte_clear(pte);
+ pte_clear(mm, address, pte);
return do_no_page(mm, vma, address, write_access, pte, pmd);
}
@@ -2050,6 +2022,7 @@
entry = pte_mkyoung(entry);
ptep_set_access_flags(vma, address, pte, entry, write_access);
update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
pte_unmap(pte);
spin_unlock(&mm->page_table_lock);
return VM_FAULT_MINOR;
@@ -2099,15 +2072,12 @@
return VM_FAULT_OOM;
}
-#ifndef __ARCH_HAS_4LEVEL_HACK
+#ifndef __PAGETABLE_PUD_FOLDED
/*
* Allocate page upper directory.
*
* We've already handled the fast-path in-line, and we own the
* page table lock.
- *
- * On a two-level or three-level page table, this ends up actually being
- * entirely optimized away.
*/
pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long
address)
{
@@ -2131,15 +2101,14 @@
out:
return pud_offset(pgd, address);
}
-
+#endif /* __PAGETABLE_PUD_FOLDED */
+
+#ifndef __PAGETABLE_PMD_FOLDED
/*
* Allocate page middle directory.
*
* We've already handled the fast-path in-line, and we own the
* page table lock.
- *
- * On a two-level page table, this ends up actually being entirely
- * optimized away.
*/
pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long
address)
{
@@ -2155,38 +2124,24 @@
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
+#ifndef __ARCH_HAS_4LEVEL_HACK
if (pud_present(*pud)) {
pmd_free(new);
goto out;
}
pud_populate(mm, pud, new);
- out:
- return pmd_offset(pud, address);
-}
#else
-pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long
address)
-{
- pmd_t *new;
-
- spin_unlock(&mm->page_table_lock);
- new = pmd_alloc_one(mm, address);
- spin_lock(&mm->page_table_lock);
- if (!new)
- return NULL;
-
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
if (pgd_present(*pud)) {
pmd_free(new);
goto out;
}
pgd_populate(mm, pud, new);
-out:
+#endif /* __ARCH_HAS_4LEVEL_HACK */
+
+ out:
return pmd_offset(pud, address);
}
-#endif
+#endif /* __PAGETABLE_PMD_FOLDED */
int make_pages_present(unsigned long addr, unsigned long end)
{
@@ -2253,13 +2208,13 @@
* update_mem_hiwater
* - update per process rss and vm high water data
*/
-void update_mem_hiwater(void)
-{
- struct task_struct *tsk = current;
-
+void update_mem_hiwater(struct task_struct *tsk)
+{
if (tsk->mm) {
- if (tsk->mm->hiwater_rss < tsk->mm->rss)
- tsk->mm->hiwater_rss = tsk->mm->rss;
+ unsigned long rss = get_mm_counter(tsk->mm, rss);
+
+ if (tsk->mm->hiwater_rss < rss)
+ tsk->mm->hiwater_rss = rss;
if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
tsk->mm->hiwater_vm = tsk->mm->total_vm;
}
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/mm/page_alloc.c
--- a/linux-2.6-xen-sparse/mm/page_alloc.c Tue Aug 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c Tue Aug 9 23:57:17 2005
@@ -31,19 +31,26 @@
#include <linux/topology.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
+#include <linux/cpuset.h>
#include <linux/nodemask.h>
#include <linux/vmalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
-/* MCD - HACK: Find somewhere to initialize this EARLY, or make this
initializer cleaner */
+/*
+ * MCD - HACK: Find somewhere to initialize this EARLY, or make this
+ * initializer cleaner
+ */
nodemask_t node_online_map = { { [0] = 1UL } };
+EXPORT_SYMBOL(node_online_map);
nodemask_t node_possible_map = NODE_MASK_ALL;
+EXPORT_SYMBOL(node_possible_map);
struct pglist_data *pgdat_list;
unsigned long totalram_pages;
unsigned long totalhigh_pages;
long nr_swap_pages;
+
/*
* results with 256, 32 in the lowmem_reserve sysctl:
* 1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
@@ -188,6 +195,37 @@
{
__ClearPagePrivate(page);
page->private = 0;
+}
+
+/*
+ * Locate the struct page for both the matching buddy in our
+ * pair (buddy1) and the combined O(n+1) page they form (page).
+ *
+ * 1) Any buddy B1 will have an order O twin B2 which satisfies
+ * the following equation:
+ * B2 = B1 ^ (1 << O)
+ * For example, if the starting buddy (buddy2) is #8 its order
+ * 1 buddy is #10:
+ * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
+ *
+ * 2) Any buddy B will have an order O+1 parent P which
+ * satisfies the following equation:
+ * P = B & ~(1 << O)
+ *
+ * Assumption: *_mem_map is contigious at least up to MAX_ORDER
+ */
+static inline struct page *
+__page_find_buddy(struct page *page, unsigned long page_idx, unsigned int
order)
+{
+ unsigned long buddy_idx = page_idx ^ (1 << order);
+
+ return page + (buddy_idx - page_idx);
+}
+
+static inline unsigned long
+__find_combined_index(unsigned long page_idx, unsigned int order)
+{
+ return (page_idx & ~(1 << order));
}
/*
@@ -233,50 +271,49 @@
* -- wli
*/
-static inline void __free_pages_bulk (struct page *page, struct page *base,
+static inline void __free_pages_bulk (struct page *page,
struct zone *zone, unsigned int order)
{
unsigned long page_idx;
- struct page *coalesced;
int order_size = 1 << order;
if (unlikely(order))
destroy_compound_page(page, order);
- page_idx = page - base;
+ page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
BUG_ON(page_idx & (order_size - 1));
BUG_ON(bad_range(zone, page));
zone->free_pages += order_size;
while (order < MAX_ORDER-1) {
+ unsigned long combined_idx;
struct free_area *area;
struct page *buddy;
- int buddy_idx;
-
- buddy_idx = (page_idx ^ (1 << order));
- buddy = base + buddy_idx;
+
+ combined_idx = __find_combined_index(page_idx, order);
+ buddy = __page_find_buddy(page, page_idx, order);
+
if (bad_range(zone, buddy))
break;
if (!page_is_buddy(buddy, order))
- break;
- /* Move the buddy up one level. */
+ break; /* Move the buddy up one level. */
list_del(&buddy->lru);
area = zone->free_area + order;
area->nr_free--;
rmv_page_order(buddy);
- page_idx &= buddy_idx;
+ page = page + (combined_idx - page_idx);
+ page_idx = combined_idx;
order++;
}
- coalesced = base + page_idx;
- set_page_order(coalesced, order);
- list_add(&coalesced->lru, &zone->free_area[order].free_list);
+ set_page_order(page, order);
+ list_add(&page->lru, &zone->free_area[order].free_list);
zone->free_area[order].nr_free++;
}
static inline void free_pages_check(const char *function, struct page *page)
{
- if ( page_mapped(page) ||
+ if ( page_mapcount(page) ||
page->mapping != NULL ||
page_count(page) != 0 ||
(page->flags & (
@@ -309,10 +346,9 @@
struct list_head *list, unsigned int order)
{
unsigned long flags;
- struct page *base, *page = NULL;
+ struct page *page = NULL;
int ret = 0;
- base = zone->zone_mem_map;
spin_lock_irqsave(&zone->lock, flags);
zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
@@ -320,7 +356,7 @@
page = list_entry(list->prev, struct page, lru);
/* have to delete it as __free_pages_bulk list manipulates */
list_del(&page->lru);
- __free_pages_bulk(page, base, zone, order);
+ __free_pages_bulk(page, zone, order);
ret++;
}
spin_unlock_irqrestore(&zone->lock, flags);
@@ -405,7 +441,7 @@
*/
static void prep_new_page(struct page *page, int order)
{
- if (page->mapping || page_mapped(page) ||
+ if (page->mapping || page_mapcount(page) ||
(page->flags & (
1 << PG_private |
1 << PG_locked |
@@ -601,7 +637,7 @@
free_hot_cold_page(page, 1);
}
-static inline void prep_zero_page(struct page *page, int order, int gfp_flags)
+static inline void prep_zero_page(struct page *page, int order, unsigned int
__nocast gfp_flags)
{
int i;
@@ -616,7 +652,7 @@
* or two.
*/
static struct page *
-buffered_rmqueue(struct zone *zone, int order, int gfp_flags)
+buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags)
{
unsigned long flags;
struct page *page = NULL;
@@ -694,7 +730,7 @@
* This is the 'heart' of the zoned buddy allocator.
*/
struct page * fastcall
-__alloc_pages(unsigned int gfp_mask, unsigned int order,
+__alloc_pages(unsigned int __nocast gfp_mask, unsigned int order,
struct zonelist *zonelist)
{
const int wait = gfp_mask & __GFP_WAIT;
@@ -734,6 +770,9 @@
classzone_idx, 0, 0))
continue;
+ if (!cpuset_zone_allowed(z))
+ continue;
+
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
@@ -745,6 +784,9 @@
/*
* Go through the zonelist again. Let __GFP_HIGH and allocations
* coming from realtime tasks to go deeper into reserves
+ *
+ * This is the last chance, in general, before the goto nopage.
+ * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
*/
for (i = 0; (z = zones[i]) != NULL; i++) {
if (!zone_watermark_ok(z, order, z->pages_min,
@@ -752,18 +794,27 @@
gfp_mask & __GFP_HIGH))
continue;
+ if (wait && !cpuset_zone_allowed(z))
+ continue;
+
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
}
/* This allocation should allow future memory freeing. */
- if (((p->flags & PF_MEMALLOC) ||
unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) {
- /* go through the zonelist yet again, ignoring mins */
- for (i = 0; (z = zones[i]) != NULL; i++) {
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
+
+ if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
+ && !in_interrupt()) {
+ if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+ /* go through the zonelist yet again, ignoring mins */
+ for (i = 0; (z = zones[i]) != NULL; i++) {
+ if (!cpuset_zone_allowed(z))
+ continue;
+ page = buffered_rmqueue(z, order, gfp_mask);
+ if (page)
+ goto got_pg;
+ }
}
goto nopage;
}
@@ -798,6 +849,9 @@
if (!zone_watermark_ok(z, order, z->pages_min,
classzone_idx, can_try_harder,
gfp_mask & __GFP_HIGH))
+ continue;
+
+ if (!cpuset_zone_allowed(z))
continue;
page = buffered_rmqueue(z, order, gfp_mask);
@@ -816,6 +870,9 @@
classzone_idx, 0, 0))
continue;
+ if (!cpuset_zone_allowed(z))
+ continue;
+
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
@@ -862,7 +919,7 @@
/*
* Common helper functions.
*/
-fastcall unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int
order)
+fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask,
unsigned int order)
{
struct page * page;
page = alloc_pages(gfp_mask, order);
@@ -873,7 +930,7 @@
EXPORT_SYMBOL(__get_free_pages);
-fastcall unsigned long get_zeroed_page(unsigned int gfp_mask)
+fastcall unsigned long get_zeroed_page(unsigned int __nocast gfp_mask)
{
struct page * page;
@@ -1302,8 +1359,7 @@
#define MAX_NODE_LOAD (num_online_nodes())
static int __initdata node_load[MAX_NUMNODES];
/**
- * find_next_best_node - find the next node that should appear in a given
- * node's fallback list
+ * find_next_best_node - find the next node that should appear in a given
node's fallback list
* @node: node whose fallback list we're appending
* @used_node_mask: nodemask_t of already used nodes
*
@@ -1372,7 +1428,6 @@
/* initialize zonelists */
for (i = 0; i < GFP_ZONETYPES; i++) {
zonelist = pgdat->node_zonelists + i;
- memset(zonelist, 0, sizeof(*zonelist));
zonelist->zones[0] = NULL;
}
@@ -1419,7 +1474,6 @@
struct zonelist *zonelist;
zonelist = pgdat->node_zonelists + i;
- memset(zonelist, 0, sizeof(*zonelist));
j = 0;
k = ZONE_NORMAL;
@@ -1461,6 +1515,7 @@
for_each_online_node(i)
build_zonelists(NODE_DATA(i));
printk("Built %i zonelists\n", num_online_nodes());
+ cpuset_init_current_mems_allowed();
}
/*
@@ -1622,6 +1677,18 @@
batch /= 4; /* We effectively *= 4 below */
if (batch < 1)
batch = 1;
+
+ /*
+ * Clamp the batch to a 2^n - 1 value. Having a power
+ * of 2 value was found to be more likely to have
+ * suboptimal cache aliasing properties in some cases.
+ *
+ * For example if 2 tasks are alternately allocating
+ * batches of pages, one task can end up with a lot
+ * of pages of one half of the possible page colors
+ * and the other with pages of the other colors.
+ */
+ batch = (1 << fls(batch + batch/2)) - 1;
for (cpu = 0; cpu < NR_CPUS; cpu++) {
struct per_cpu_pages *pcp;
@@ -1681,14 +1748,25 @@
}
}
-void __init node_alloc_mem_map(struct pglist_data *pgdat)
+static void __init alloc_node_mem_map(struct pglist_data *pgdat)
{
unsigned long size;
- size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
- pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
+ /* Skip empty nodes */
+ if (!pgdat->node_spanned_pages)
+ return;
+
+ /* ia64 gets its own node_mem_map, before this, without bootmem */
+ if (!pgdat->node_mem_map) {
+ size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
+ pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
+ }
#ifndef CONFIG_DISCONTIGMEM
- mem_map = contig_page_data.node_mem_map;
+ /*
+ * With no DISCONTIG, the global mem_map is just set as node 0's
+ */
+ if (pgdat == NODE_DATA(0))
+ mem_map = NODE_DATA(0)->node_mem_map;
#endif
}
@@ -1700,8 +1778,7 @@
pgdat->node_start_pfn = node_start_pfn;
calculate_zone_totalpages(pgdat, zones_size, zholes_size);
- if (!pfn_to_page(node_start_pfn))
- node_alloc_mem_map(pgdat);
+ alloc_node_mem_map(pgdat);
free_area_init_core(pgdat, zones_size, zholes_size);
}
@@ -1823,6 +1900,7 @@
"allocstall",
"pgrotated",
+ "nr_bounce",
};
static void *vmstat_start(struct seq_file *m, loff_t *pos)
@@ -1926,15 +2004,20 @@
for_each_pgdat(pgdat) {
for (j = 0; j < MAX_NR_ZONES; j++) {
- struct zone * zone = pgdat->node_zones + j;
+ struct zone *zone = pgdat->node_zones + j;
unsigned long present_pages = zone->present_pages;
zone->lowmem_reserve[j] = 0;
for (idx = j-1; idx >= 0; idx--) {
- struct zone * lower_zone = pgdat->node_zones +
idx;
-
- lower_zone->lowmem_reserve[j] = present_pages /
sysctl_lowmem_reserve_ratio[idx];
+ struct zone *lower_zone;
+
+ if (sysctl_lowmem_reserve_ratio[idx] < 1)
+ sysctl_lowmem_reserve_ratio[idx] = 1;
+
+ lower_zone = pgdat->node_zones + idx;
+ lower_zone->lowmem_reserve[j] = present_pages /
+ sysctl_lowmem_reserve_ratio[idx];
present_pages += lower_zone->present_pages;
}
}
@@ -2041,7 +2124,7 @@
* changes.
*/
int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t
*ppos)
+ struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec(table, write, file, buffer, length, ppos);
setup_per_zone_pages_min();
@@ -2058,7 +2141,7 @@
* if in function of the boot time zone sizes.
*/
int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t
*ppos)
+ struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec_minmax(table, write, file, buffer, length, ppos);
setup_per_zone_lowmem_reserve();
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|