Pull trivial into release branch
authorLen Brown <len.brown@intel.com>
Thu, 11 Jan 2007 06:55:25 +0000 (01:55 -0500)
committerLen Brown <len.brown@intel.com>
Thu, 11 Jan 2007 06:55:25 +0000 (01:55 -0500)
181 files changed:
Documentation/cachetlb.txt
Documentation/feature-removal-schedule.txt
Documentation/powerpc/mpc52xx-device-tree-bindings.txt
Documentation/usb/acm.txt
Documentation/x86_64/boot-options.txt
MAINTAINERS
Makefile
arch/arm/kernel/entry-armv.S
arch/arm/kernel/time.c
arch/arm/kernel/traps.c
arch/arm/mm/flush.c
arch/i386/Kconfig
arch/i386/kernel/cpu/common.c
arch/i386/kernel/smpboot.c
arch/i386/kernel/trampoline.S
arch/mips/kernel/mips_ksyms.c
arch/mips/kernel/time.c
arch/mips/lib/Makefile
arch/mips/lib/csum_partial.S
arch/mips/lib/csum_partial_copy.c [deleted file]
arch/mips/mips-boards/generic/time.c
arch/mips/mips-boards/malta/malta_mtd.c [new file with mode: 0644]
arch/mips/mips-boards/sead/sead_int.c
arch/mips/mm/pg-r4k.c
arch/mips/pci/ops-pnx8550.c
arch/mips/philips/pnx8550/common/time.c
arch/powerpc/Kconfig
arch/powerpc/boot/dts/lite5200.dts
arch/powerpc/boot/dts/lite5200b.dts
arch/powerpc/configs/ppc64_defconfig
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/entry_64.S
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/platforms/52xx/lite5200.c
arch/powerpc/platforms/52xx/mpc52xx_common.c
arch/powerpc/platforms/iseries/lpevents.c
arch/powerpc/platforms/iseries/mf.c
arch/powerpc/platforms/iseries/proc.c
arch/powerpc/platforms/iseries/setup.c
arch/powerpc/platforms/iseries/viopath.c
arch/powerpc/platforms/maple/setup.c
arch/powerpc/platforms/pasemi/setup.c
arch/powerpc/platforms/pseries/hvCall.S
arch/powerpc/platforms/pseries/hvCall_inst.c
arch/powerpc/platforms/pseries/xics.c
arch/powerpc/sysdev/Makefile
arch/s390/kernel/head31.S
arch/s390/kernel/head64.S
arch/s390/kernel/setup.c
arch/s390/kernel/smp.c
arch/s390/lib/uaccess_pt.c
arch/s390/lib/uaccess_std.c
arch/x86_64/kernel/early-quirks.c
arch/x86_64/kernel/io_apic.c
drivers/acpi/toshiba_acpi.c
drivers/ata/Kconfig
drivers/ata/pata_hpt37x.c
drivers/block/pktcdvd.c
drivers/bluetooth/hci_usb.c
drivers/char/ip2/i2ellis.h
drivers/connector/cn_proc.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/i2c-mv64xxx.c
drivers/i2c/busses/i2c-pnx.c
drivers/i2c/chips/m41t00.c
drivers/i2c/i2c-core.c
drivers/ide/pci/atiixp.c
drivers/ide/pci/via82cxxx.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/ucma.c
drivers/infiniband/hw/ehca/ehca_hca.c
drivers/infiniband/hw/ehca/ehca_irq.c
drivers/infiniband/hw/ehca/ehca_iverbs.h
drivers/infiniband/hw/ehca/ehca_main.c
drivers/infiniband/hw/ehca/ehca_mrmw.c
drivers/infiniband/hw/ehca/ehca_qp.c
drivers/infiniband/hw/mthca/mthca_cq.c
drivers/infiniband/hw/mthca/mthca_memfree.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/kvm/kvm.h
drivers/kvm/kvm_main.c
drivers/kvm/mmu.c
drivers/kvm/paging_tmpl.h
drivers/kvm/svm.c
drivers/kvm/vmx.c
drivers/kvm/x86_emulate.c
drivers/leds/leds-s3c24xx.c
drivers/macintosh/via-pmu.c
drivers/mmc/mmci.c
drivers/net/bnx2.c
drivers/net/chelsio/my3126.c
drivers/net/e1000/e1000_main.c
drivers/net/forcedeth.c
drivers/net/ixgb/ixgb.h
drivers/net/ixgb/ixgb_ethtool.c
drivers/net/ixgb/ixgb_hw.c
drivers/net/ixgb/ixgb_main.c
drivers/net/pcmcia/pcnet_cs.c
drivers/net/qla3xxx.c
drivers/net/tg3.c
drivers/net/tg3.h
drivers/net/wireless/ipw2100.c
drivers/pci/Kconfig
drivers/pci/quirks.c
drivers/pci/search.c
drivers/rtc/rtc-at91rm9200.c
drivers/rtc/rtc-rs5c372.c
drivers/s390/char/vmcp.c
drivers/s390/cio/cio.c
drivers/s390/net/Kconfig
drivers/s390/net/qeth.h
drivers/s390/net/qeth_main.c
drivers/serial/mpc52xx_uart.c
drivers/usb/class/usblp.c
drivers/usb/core/endpoint.c
drivers/usb/gadget/omap_udc.c
drivers/usb/gadget/omap_udc.h
drivers/usb/host/uhci-hcd.c
drivers/usb/misc/sisusbvga/sisusb_con.c
drivers/usb/net/asix.c
drivers/usb/serial/Kconfig
drivers/usb/serial/option.c
drivers/usb/storage/unusual_devs.h
fs/adfs/dir_f.c
fs/bad_inode.c
fs/binfmt_elf.c
fs/ufs/balloc.c
fs/ufs/inode.c
include/asm-arm/arch-iop32x/iop32x.h
include/asm-arm/cacheflush.h
include/asm-arm/hardware/iop3xx.h
include/asm-i386/boot.h
include/asm-mips/checksum.h
include/asm-mips/irq.h
include/asm-parisc/cacheflush.h
include/asm-powerpc/bug.h
include/asm-powerpc/hvcall.h
include/asm-powerpc/mpc52xx.h
include/asm-s390/futex.h
include/linux/highmem.h
include/linux/kvm.h
include/linux/magic.h
include/linux/swap.h
include/net/ieee80211.h
include/sound/version.h
init/main.c
kernel/module.c
kernel/params.c
kernel/power/swap.c
kernel/power/user.c
kernel/profile.c
mm/memory.c
mm/oom_kill.c
mm/page_alloc.c
mm/slab.c
mm/swapfile.c
mm/vmscan.c
net/bluetooth/cmtp/capi.c
net/bluetooth/hci_sysfs.c
net/bluetooth/rfcomm/sock.c
net/bluetooth/rfcomm/tty.c
net/ipv4/af_inet.c
net/ipv4/tcp_ipv4.c
net/ipv6/af_inet6.c
net/netfilter/nf_conntrack_netbios_ns.c
net/netlabel/netlabel_cipso_v4.c
net/x25/x25_facilities.c
scripts/kconfig/qconf.cc
scripts/kconfig/qconf.h
security/selinux/ss/context.h
security/selinux/ss/mls.c
security/selinux/ss/mls.h
security/selinux/ss/services.c
sound/pci/cmipci.c
sound/pci/echoaudio/midi.c
sound/pci/hda/hda_generic.c
sound/pci/hda/hda_intel.c
sound/usb/usbaudio.c
sound/usb/usbmixer.c

index 73e794f0ff0924e2432e305cb5520345f776160b..debf6813934af05e878863c4a8c53bbf6ae64e62 100644 (file)
@@ -373,14 +373,15 @@ maps this page at its virtual address.
        likely that you will need to flush the instruction cache
        for copy_to_user_page().
 
-  void flush_anon_page(struct page *page, unsigned long vmaddr)
+  void flush_anon_page(struct vm_area_struct *vma, struct page *page,
+                       unsigned long vmaddr)
        When the kernel needs to access the contents of an anonymous
        page, it calls this function (currently only
        get_user_pages()).  Note: flush_dcache_page() deliberately
        doesn't work for an anonymous page.  The default
        implementation is a nop (and should remain so for all coherent
        architectures).  For incoherent architectures, it should flush
-       the cache of the page at vmaddr in the current user process.
+       the cache of the page at vmaddr.
 
   void flush_kernel_dcache_page(struct page *page)
        When the kernel needs to modify a user page is has obtained
index 2ee16b49e10e13324a6dc66defb8c73f92d8e91c..fc532395d11687103d21748d59a61570e74c6c15 100644 (file)
@@ -226,6 +226,23 @@ Who:       Jean Delvare <khali@linux-fr.org>
 
 ---------------------------
 
+What:  i2c_adapter.dev
+       i2c_adapter.list
+When:  July 2007
+Why:   Superfluous, given i2c_adapter.class_dev:
+         * The "dev" was a stand-in for the physical device node that legacy
+           drivers would not have; but now it's almost always present.  Any
+           remaining legacy drivers must upgrade (they now trigger warnings).
+         * The "list" duplicates class device children.
+       The delay in removing this is so upgraded lm_sensors and libsensors
+       can get deployed.  (Removal causes minor changes in the sysfs layout,
+       notably the location of the adapter type name and parenting the i2c
+       client hardware directly from their controller.)
+Who:   Jean Delvare <khali@linux-fr.org>,
+       David Brownell <dbrownell@users.sourceforge.net>
+
+---------------------------
+
 What:  IPv4 only connection tracking/NAT/helpers
 When:  2.6.22
 Why:   The new layer 3 independant connection tracking replaces the old
index d077d764f82b0ce73148063e694f842e7ab00ff9..7fb3b8a44eb649f1060af13c474c812e181b0f22 100644 (file)
@@ -157,8 +157,8 @@ rtc@<addr>  rtc             *-rtc           Real time clock
 mscan@<addr>   mscan           *-mscan         CAN bus controller
 pci@<addr>     pci             *-pci           PCI bridge
 serial@<addr>  serial          *-psc-uart      PSC in serial mode
-i2s@<addr>     i2s             *-psc-i2s       PSC in i2s mode
-ac97@<addr>    ac97            *-psc-ac97      PSC in ac97 mode
+i2s@<addr>     sound           *-psc-i2s       PSC in i2s mode
+ac97@<addr>    sound           *-psc-ac97      PSC in ac97 mode
 spi@<addr>     spi             *-psc-spi       PSC in spi mode
 irda@<addr>    irda            *-psc-irda      PSC in IrDA mode
 spi@<addr>     spi             *-spi           MPC52xx spi device
index 737d6104c3f39fb294774fc5c16de0cb99612a58..17f5c2e1a5708bb493edd779730e5f40d0dc988d 100644 (file)
@@ -46,6 +46,10 @@ Abstract Control Model (USB CDC ACM) specification.
 
        3Com USR ISDN Pro TA
 
+  Some cell phones also connect via USB. I know the following phones work:
+
+       SonyEricsson K800i
+
   Unfortunately many modems and most ISDN TAs use proprietary interfaces and
 thus won't work with this drivers. Check for ACM compliance before buying.
 
index dbdcaf68e3ea382304784bf66fce502216f1d1f1..5c86ed6f0448a8e2378a4ba62d6cedecf236215c 100644 (file)
@@ -52,6 +52,10 @@ APICs
                 apicmaintimer. Useful when your PIT timer is totally
                 broken.
 
+   disable_8254_timer / enable_8254_timer
+                Enable interrupt 0 timer routing over the 8254 in addition to over
+                the IO-APIC. The kernel tries to set a sensible default.
+
 Early Console
 
    syntax: earlyprintk=vga
index d1f454c6dbe48cd1cf296a196ca8ef6feb5a5ad4..8e1d7da07ce3d8be0ef3767de778b02ec5f6c395 100644 (file)
@@ -441,20 +441,32 @@ S:      Maintained
 ARM/INTEL IOP32X ARM ARCHITECTURE
 P:     Lennert Buytenhek
 M:     kernel@wantstofly.org
+P:     Dan Williams
+M:     dan.j.williams@intel.com
 L:     linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
-S:     Maintained
+S:     Supported
+
+ARM/INTEL IOP33X ARM ARCHITECTURE
+P:     Dan Williams
+M:     dan.j.williams@intel.com
+L:     linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
+S:     Supported
 
 ARM/INTEL IOP13XX ARM ARCHITECTURE
 P:     Lennert Buytenhek
 M:     kernel@wantstofly.org
+P:     Dan Williams
+M:     dan.j.williams@intel.com
 L:     linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
-S:     Maintained
+S:     Supported
 
 ARM/INTEL IQ81342EX MACHINE SUPPORT
 P:     Lennert Buytenhek
 M:     kernel@wantstofly.org
+P:     Dan Williams
+M:     dan.j.williams@intel.com
 L:     linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
-S:     Maintained
+S:     Supported
 
 ARM/INTEL IXP2000 ARM ARCHITECTURE
 P:     Lennert Buytenhek
@@ -477,8 +489,10 @@ S: Maintained
 ARM/INTEL XSC3 (MANZANO) ARM CORE
 P:     Lennert Buytenhek
 M:     kernel@wantstofly.org
+P:     Dan Williams
+M:     dan.j.williams@intel.com
 L:     linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
-S:     Maintained
+S:     Supported
 
 ARM/IP FABRICS DOUBLE ESPRESSO MACHINE SUPPORT
 P:     Lennert Buytenhek
@@ -2608,6 +2622,12 @@ P:       Adam Belay
 M:     ambx1@neo.rr.com
 S:     Maintained
 
+PNXxxxx I2C DRIVER
+P:     Vitaly Wool
+M:     vitalywool@gmail.com
+L:     i2c@lm-sensors.org
+S:     Maintained
+
 PPP PROTOCOL DRIVERS AND COMPRESSORS
 P:     Paul Mackerras
 M:     paulus@samba.org
index 0e9eee7682888df40ad5de15c1c1f56fcd91a9e5..fb5b3ef9ab11bfa086047b3d7ef5d30718e805e7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 20
-EXTRAVERSION =-rc3
+EXTRAVERSION =-rc4
 NAME = Homicidal Dwarf Hamster
 
 # *DOCUMENTATION*
index 2db42b18f53f060a1040ba65cf01114793535ad9..8517c3c3eb3393c8ba539add942342757c117f2b 100644 (file)
@@ -436,7 +436,7 @@ __und_usr:
        usr_entry
 
        tst     r3, #PSR_T_BIT                  @ Thumb mode?
-       bne     fpundefinstr                    @ ignore FP
+       bne     __und_usr_unknown               @ ignore FP
        sub     r4, r2, #4
 
        @
@@ -448,7 +448,7 @@ __und_usr:
        @
 1:     ldrt    r0, [r4]
        adr     r9, ret_from_exception
-       adr     lr, fpundefinstr
+       adr     lr, __und_usr_unknown
        @
        @ fallthrough to call_fpe
        @
@@ -476,7 +476,9 @@ __und_usr:
  * Emulators may wish to make use of the following registers:
  *  r0  = instruction opcode.
  *  r2  = PC+4
+ *  r9  = normal "successful" return address
  *  r10 = this threads thread_info structure.
+ *  lr  = unrecognised instruction return address
  */
 call_fpe:
        tst     r0, #0x08000000                 @ only CDP/CPRT/LDC/STC have bit 27
@@ -545,10 +547,12 @@ do_fpe:
 
        .data
 ENTRY(fp_enter)
-       .word   fpundefinstr
+       .word   no_fp
        .text
 
-fpundefinstr:
+no_fp: mov     pc, lr
+
+__und_usr_unknown:
        mov     r0, sp
        adr     lr, ret_from_exception
        b       do_undefinstr
index 6ff5e3ff6cb57a781a672b631596f924c75f3ee4..3c8cdcfe8d4a9f72c9ffb3762a72b260be85d73c 100644 (file)
@@ -29,6 +29,8 @@
 #include <linux/timer.h>
 #include <linux/irq.h>
 
+#include <linux/mc146818rtc.h>
+
 #include <asm/leds.h>
 #include <asm/thread_info.h>
 #include <asm/mach/time.h>
@@ -85,6 +87,17 @@ unsigned long long __attribute__((weak)) sched_clock(void)
        return (unsigned long long)jiffies * (1000000000 / HZ);
 }
 
+/*
+ * An implementation of printk_clock() independent from
+ * sched_clock().  This avoids non-bootable kernels when
+ * printk_clock is enabled.
+ */
+unsigned long long printk_clock(void)
+{
+       return (unsigned long long)(jiffies - INITIAL_JIFFIES) *
+                       (1000000000 / HZ);
+}
+
 static unsigned long next_rtc_update;
 
 /*
index 042a12982e980c43bb0520e289477df525a50fc2..908915675edcb3f66520c9a3765441d388ef29c0 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/traps.h>
+#include <asm/io.h>
 
 #include "ptrace.h"
 #include "signal.h"
index 628348c9f6c5f17ded5fa62ec700bdb8666e85a7..9df507d36e0b9b0f0291395da5cecd007b627f0f 100644 (file)
@@ -202,3 +202,42 @@ void flush_dcache_page(struct page *page)
        }
 }
 EXPORT_SYMBOL(flush_dcache_page);
+
+/*
+ * Flush an anonymous page so that users of get_user_pages()
+ * can safely access the data.  The expected sequence is:
+ *
+ *  get_user_pages()
+ *    -> flush_anon_page
+ *  memcpy() to/from page
+ *  if written to page, flush_dcache_page()
+ */
+void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
+{
+       unsigned long pfn;
+
+       /* VIPT non-aliasing caches need do nothing */
+       if (cache_is_vipt_nonaliasing())
+               return;
+
+       /*
+        * Write back and invalidate userspace mapping.
+        */
+       pfn = page_to_pfn(page);
+       if (cache_is_vivt()) {
+               flush_cache_page(vma, vmaddr, pfn);
+       } else {
+               /*
+                * For aliasing VIPT, we can flush an alias of the
+                * userspace address only.
+                */
+               flush_pfn_alias(pfn, vmaddr);
+       }
+
+       /*
+        * Invalidate kernel mapping.  No data should be contained
+        * in this mapping of the page.  FIXME: this is overkill
+        * since we actually ask for a write-back and invalidate.
+        */
+       __cpuc_flush_dcache_page(page_address(page));
+}
index 0d67a0a1151e8ef263ae67d1489ea5ad98f8a1d4..0dfee812811a9c75427551ef8177b45f2ca7a998 100644 (file)
@@ -777,6 +777,47 @@ config CRASH_DUMP
           PHYSICAL_START.
          For more details see Documentation/kdump/kdump.txt
 
+config PHYSICAL_START
+       hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
+       default "0x100000"
+       help
+         This gives the physical address where the kernel is loaded.
+
+         If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
+         bzImage will decompress itself to above physical address and
+         run from there. Otherwise, bzImage will run from the address where
+         it has been loaded by the boot loader and will ignore above physical
+         address.
+
+         In normal kdump cases one does not have to set/change this option
+         as now bzImage can be compiled as a completely relocatable image
+         (CONFIG_RELOCATABLE=y) and be used to load and run from a different
+         address. This option is mainly useful for the folks who don't want
+         to use a bzImage for capturing the crash dump and want to use a
+         vmlinux instead. vmlinux is not relocatable hence a kernel needs
+         to be specifically compiled to run from a specific memory area
+         (normally a reserved region) and this option comes handy.
+
+         So if you are using bzImage for capturing the crash dump, leave
+         the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y.
+         Otherwise if you plan to use vmlinux for capturing the crash dump
+         change this value to start of the reserved region (Typically 16MB
+         0x1000000). In other words, it can be set based on the "X" value as
+         specified in the "crashkernel=YM@XM" command line boot parameter
+         passed to the panic-ed kernel. Typically this parameter is set as
+         crashkernel=64M@16M. Please take a look at
+         Documentation/kdump/kdump.txt for more details about crash dumps.
+
+         Usage of bzImage for capturing the crash dump is recommended as
+         one does not have to build two kernels. Same kernel can be used
+         as production kernel and capture kernel. Above option should have
+         gone away after relocatable bzImage support is introduced. But it
+         is present because there are users out there who continue to use
+         vmlinux for dump capture. This option should go away down the
+         line.
+
+         Don't change this unless you know what you are doing.
+
 config RELOCATABLE
        bool "Build a relocatable kernel(EXPERIMENTAL)"
        depends on EXPERIMENTAL
index 1b34c56f8123ac7f4ccb7a7e677bee6ef8f7cb6f..8689d62abd4adc66729c5a58185612ddfd6271d6 100644 (file)
@@ -54,7 +54,7 @@ static struct cpu_dev __cpuinitdata default_cpu = {
        .c_init = default_init,
        .c_vendor = "Unknown",
 };
-static struct cpu_dev * this_cpu = &default_cpu;
+static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
 
 static int __init cachesize_setup(char *str)
 {
index aef39be813614f0e2591836a4f1fa7aefb5ee9d3..300d9b38d02ec449ebbe9817743b5bf8a639743b 100644 (file)
@@ -227,7 +227,7 @@ static struct {
        atomic_t count_start;
        atomic_t count_stop;
        unsigned long long values[NR_CPUS];
-} tsc __initdata = {
+} tsc __cpuinitdata = {
        .start_flag = ATOMIC_INIT(0),
        .count_start = ATOMIC_INIT(0),
        .count_stop = ATOMIC_INIT(0),
@@ -332,7 +332,7 @@ static void __init synchronize_tsc_bp(void)
                printk("passed.\n");
 }
 
-static void __init synchronize_tsc_ap(void)
+static void __cpuinit synchronize_tsc_ap(void)
 {
        int i;
 
index fcce0e61b0e78835f07c85c089d76ddc913a4b24..2f1814c5cfd78d8d57dbbf97d2464e6e2b32fa6d 100644 (file)
 
 .data
 
+/* We can free up trampoline after bootup if cpu hotplug is not supported. */
+#ifndef CONFIG_HOTPLUG_CPU
+.section ".init.data","aw",@progbits
+#endif
+
 .code16
 
 ENTRY(trampoline_data)
index f44a01357adae3fcc1f560d747a3d5d74ba232ec..2ef857c3ee53ec23462906a56c5e19c6b591030a 100644 (file)
@@ -46,5 +46,7 @@ EXPORT_SYMBOL(__strnlen_user_nocheck_asm);
 EXPORT_SYMBOL(__strnlen_user_asm);
 
 EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
+EXPORT_SYMBOL(__csum_partial_copy_user);
 
 EXPORT_SYMBOL(invalid_pte_table);
index 11aab6d6bfe5e462b63f849b1dee631439a9a546..8aa544f73a5ea27fd85eb007ac9ec59938e0ff35 100644 (file)
@@ -94,10 +94,8 @@ static void c0_timer_ack(void)
 {
        unsigned int count;
 
-#ifndef CONFIG_SOC_PNX8550     /* pnx8550 resets to zero */
        /* Ack this timer interrupt and set the next one.  */
        expirelo += cycles_per_jiffy;
-#endif
        write_c0_compare(expirelo);
 
        /* Check to see if we have missed any timer interrupts.  */
index 888b61ea12feb4d58354ff62a28e87d2d5914c56..989c900b8b14a5f4b3b1422552044fcd9ac24f52 100644 (file)
@@ -2,7 +2,7 @@
 # Makefile for MIPS-specific library files..
 #
 
-lib-y  += csum_partial.o csum_partial_copy.o memcpy.o promlib.o \
+lib-y  += csum_partial.o memcpy.o promlib.o \
           strlen_user.o strncpy_user.o strnlen_user.o uncached.o
 
 obj-y  += iomap.o
index 9db357294be1efdbea0021db9fe653e5e872054d..c0a77fe038befda70c6f4c67c06557bd335a89b4 100644 (file)
@@ -8,7 +8,9 @@
  * Copyright (C) 1998, 1999 Ralf Baechle
  * Copyright (C) 1999 Silicon Graphics, Inc.
  */
+#include <linux/errno.h>
 #include <asm/asm.h>
+#include <asm/asm-offsets.h>
 #include <asm/regdef.h>
 
 #ifdef CONFIG_64BIT
@@ -271,3 +273,443 @@ small_csumcpy:
        jr      ra
        .set    noreorder
        END(csum_partial)
+
+
+/*
+ * checksum and copy routines based on memcpy.S
+ *
+ *     csum_partial_copy_nocheck(src, dst, len, sum)
+ *     __csum_partial_copy_user(src, dst, len, sum, errp)
+ *
+ * See "Spec" in memcpy.S for details.  Unlike __copy_user, all
+ * function in this file use the standard calling convention.
+ */
+
+#define src a0
+#define dst a1
+#define len a2
+#define psum a3
+#define sum v0
+#define odd t8
+#define errptr t9
+
+/*
+ * The exception handler for loads requires that:
+ *  1- AT contain the address of the byte just past the end of the source
+ *     of the copy,
+ *  2- src_entry <= src < AT, and
+ *  3- (dst - src) == (dst_entry - src_entry),
+ * The _entry suffix denotes values when __copy_user was called.
+ *
+ * (1) is set up up by __csum_partial_copy_from_user and maintained by
+ *     not writing AT in __csum_partial_copy
+ * (2) is met by incrementing src by the number of bytes copied
+ * (3) is met by not doing loads between a pair of increments of dst and src
+ *
+ * The exception handlers for stores stores -EFAULT to errptr and return.
+ * These handlers do not need to overwrite any data.
+ */
+
+#define EXC(inst_reg,addr,handler)             \
+9:     inst_reg, addr;                         \
+       .section __ex_table,"a";                \
+       PTR     9b, handler;                    \
+       .previous
+
+#ifdef USE_DOUBLE
+
+#define LOAD   ld
+#define LOADL  ldl
+#define LOADR  ldr
+#define STOREL sdl
+#define STORER sdr
+#define STORE  sd
+#define ADD    daddu
+#define SUB    dsubu
+#define SRL    dsrl
+#define SLL    dsll
+#define SLLV   dsllv
+#define SRLV   dsrlv
+#define NBYTES 8
+#define LOG_NBYTES 3
+
+#else
+
+#define LOAD   lw
+#define LOADL  lwl
+#define LOADR  lwr
+#define STOREL swl
+#define STORER swr
+#define STORE  sw
+#define ADD    addu
+#define SUB    subu
+#define SRL    srl
+#define SLL    sll
+#define SLLV   sllv
+#define SRLV   srlv
+#define NBYTES 4
+#define LOG_NBYTES 2
+
+#endif /* USE_DOUBLE */
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define LDFIRST LOADR
+#define LDREST  LOADL
+#define STFIRST STORER
+#define STREST  STOREL
+#define SHIFT_DISCARD SLLV
+#define SHIFT_DISCARD_REVERT SRLV
+#else
+#define LDFIRST LOADL
+#define LDREST  LOADR
+#define STFIRST STOREL
+#define STREST  STORER
+#define SHIFT_DISCARD SRLV
+#define SHIFT_DISCARD_REVERT SLLV
+#endif
+
+#define FIRST(unit) ((unit)*NBYTES)
+#define REST(unit)  (FIRST(unit)+NBYTES-1)
+
+#define ADDRMASK (NBYTES-1)
+
+       .set    noat
+
+LEAF(__csum_partial_copy_user)
+       PTR_ADDU        AT, src, len    /* See (1) above. */
+#ifdef CONFIG_64BIT
+       move    errptr, a4
+#else
+       lw      errptr, 16(sp)
+#endif
+FEXPORT(csum_partial_copy_nocheck)
+       move    sum, zero
+       move    odd, zero
+       /*
+        * Note: dst & src may be unaligned, len may be 0
+        * Temps
+        */
+       /*
+        * The "issue break"s below are very approximate.
+        * Issue delays for dcache fills will perturb the schedule, as will
+        * load queue full replay traps, etc.
+        *
+        * If len < NBYTES use byte operations.
+        */
+       sltu    t2, len, NBYTES
+       and     t1, dst, ADDRMASK
+       bnez    t2, copy_bytes_checklen
+        and    t0, src, ADDRMASK
+       andi    odd, dst, 0x1                   /* odd buffer? */
+       bnez    t1, dst_unaligned
+        nop
+       bnez    t0, src_unaligned_dst_aligned
+       /*
+        * use delay slot for fall-through
+        * src and dst are aligned; need to compute rem
+        */
+both_aligned:
+        SRL    t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
+       beqz    t0, cleanup_both_aligned # len < 8*NBYTES
+        nop
+       SUB     len, 8*NBYTES           # subtract here for bgez loop
+       .align  4
+1:
+EXC(   LOAD    t0, UNIT(0)(src),       l_exc)
+EXC(   LOAD    t1, UNIT(1)(src),       l_exc_copy)
+EXC(   LOAD    t2, UNIT(2)(src),       l_exc_copy)
+EXC(   LOAD    t3, UNIT(3)(src),       l_exc_copy)
+EXC(   LOAD    t4, UNIT(4)(src),       l_exc_copy)
+EXC(   LOAD    t5, UNIT(5)(src),       l_exc_copy)
+EXC(   LOAD    t6, UNIT(6)(src),       l_exc_copy)
+EXC(   LOAD    t7, UNIT(7)(src),       l_exc_copy)
+       SUB     len, len, 8*NBYTES
+       ADD     src, src, 8*NBYTES
+EXC(   STORE   t0, UNIT(0)(dst),       s_exc)
+       ADDC(sum, t0)
+EXC(   STORE   t1, UNIT(1)(dst),       s_exc)
+       ADDC(sum, t1)
+EXC(   STORE   t2, UNIT(2)(dst),       s_exc)
+       ADDC(sum, t2)
+EXC(   STORE   t3, UNIT(3)(dst),       s_exc)
+       ADDC(sum, t3)
+EXC(   STORE   t4, UNIT(4)(dst),       s_exc)
+       ADDC(sum, t4)
+EXC(   STORE   t5, UNIT(5)(dst),       s_exc)
+       ADDC(sum, t5)
+EXC(   STORE   t6, UNIT(6)(dst),       s_exc)
+       ADDC(sum, t6)
+EXC(   STORE   t7, UNIT(7)(dst),       s_exc)
+       ADDC(sum, t7)
+       bgez    len, 1b
+        ADD    dst, dst, 8*NBYTES
+       ADD     len, 8*NBYTES           # revert len (see above)
+
+       /*
+        * len == the number of bytes left to copy < 8*NBYTES
+        */
+cleanup_both_aligned:
+#define rem t7
+       beqz    len, done
+        sltu   t0, len, 4*NBYTES
+       bnez    t0, less_than_4units
+        and    rem, len, (NBYTES-1)    # rem = len % NBYTES
+       /*
+        * len >= 4*NBYTES
+        */
+EXC(   LOAD    t0, UNIT(0)(src),       l_exc)
+EXC(   LOAD    t1, UNIT(1)(src),       l_exc_copy)
+EXC(   LOAD    t2, UNIT(2)(src),       l_exc_copy)
+EXC(   LOAD    t3, UNIT(3)(src),       l_exc_copy)
+       SUB     len, len, 4*NBYTES
+       ADD     src, src, 4*NBYTES
+EXC(   STORE   t0, UNIT(0)(dst),       s_exc)
+       ADDC(sum, t0)
+EXC(   STORE   t1, UNIT(1)(dst),       s_exc)
+       ADDC(sum, t1)
+EXC(   STORE   t2, UNIT(2)(dst),       s_exc)
+       ADDC(sum, t2)
+EXC(   STORE   t3, UNIT(3)(dst),       s_exc)
+       ADDC(sum, t3)
+       beqz    len, done
+        ADD    dst, dst, 4*NBYTES
+less_than_4units:
+       /*
+        * rem = len % NBYTES
+        */
+       beq     rem, len, copy_bytes
+        nop
+1:
+EXC(   LOAD    t0, 0(src),             l_exc)
+       ADD     src, src, NBYTES
+       SUB     len, len, NBYTES
+EXC(   STORE   t0, 0(dst),             s_exc)
+       ADDC(sum, t0)
+       bne     rem, len, 1b
+        ADD    dst, dst, NBYTES
+
+       /*
+        * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
+        * A loop would do only a byte at a time with possible branch
+        * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE
+        * because can't assume read-access to dst.  Instead, use
+        * STREST dst, which doesn't require read access to dst.
+        *
+        * This code should perform better than a simple loop on modern,
+        * wide-issue mips processors because the code has fewer branches and
+        * more instruction-level parallelism.
+        */
+#define bits t2
+       beqz    len, done
+        ADD    t1, dst, len    # t1 is just past last byte of dst
+       li      bits, 8*NBYTES
+       SLL     rem, len, 3     # rem = number of bits to keep
+EXC(   LOAD    t0, 0(src),             l_exc)
+       SUB     bits, bits, rem # bits = number of bits to discard
+       SHIFT_DISCARD t0, t0, bits
+EXC(   STREST  t0, -1(t1),             s_exc)
+       SHIFT_DISCARD_REVERT t0, t0, bits
+       .set reorder
+       ADDC(sum, t0)
+       b       done
+       .set noreorder
+dst_unaligned:
+       /*
+        * dst is unaligned
+        * t0 = src & ADDRMASK
+        * t1 = dst & ADDRMASK; T1 > 0
+        * len >= NBYTES
+        *
+        * Copy enough bytes to align dst
+        * Set match = (src and dst have same alignment)
+        */
+#define match rem
+EXC(   LDFIRST t3, FIRST(0)(src),      l_exc)
+       ADD     t2, zero, NBYTES
+EXC(   LDREST  t3, REST(0)(src),       l_exc_copy)
+       SUB     t2, t2, t1      # t2 = number of bytes copied
+       xor     match, t0, t1
+EXC(   STFIRST t3, FIRST(0)(dst),      s_exc)
+       SLL     t4, t1, 3               # t4 = number of bits to discard
+       SHIFT_DISCARD t3, t3, t4
+       /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
+       ADDC(sum, t3)
+       beq     len, t2, done
+        SUB    len, len, t2
+       ADD     dst, dst, t2
+       beqz    match, both_aligned
+        ADD    src, src, t2
+
+src_unaligned_dst_aligned:
+       SRL     t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
+       beqz    t0, cleanup_src_unaligned
+        and    rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
+1:
+/*
+ * Avoid consecutive LD*'s to the same register since some mips
+ * implementations can't issue them in the same cycle.
+ * It's OK to load FIRST(N+1) before REST(N) because the two addresses
+ * are to the same unit (unless src is aligned, but it's not).
+ */
+EXC(   LDFIRST t0, FIRST(0)(src),      l_exc)
+EXC(   LDFIRST t1, FIRST(1)(src),      l_exc_copy)
+       SUB     len, len, 4*NBYTES
+EXC(   LDREST  t0, REST(0)(src),       l_exc_copy)
+EXC(   LDREST  t1, REST(1)(src),       l_exc_copy)
+EXC(   LDFIRST t2, FIRST(2)(src),      l_exc_copy)
+EXC(   LDFIRST t3, FIRST(3)(src),      l_exc_copy)
+EXC(   LDREST  t2, REST(2)(src),       l_exc_copy)
+EXC(   LDREST  t3, REST(3)(src),       l_exc_copy)
+       ADD     src, src, 4*NBYTES
+#ifdef CONFIG_CPU_SB1
+       nop                             # improves slotting
+#endif
+EXC(   STORE   t0, UNIT(0)(dst),       s_exc)
+       ADDC(sum, t0)
+EXC(   STORE   t1, UNIT(1)(dst),       s_exc)
+       ADDC(sum, t1)
+EXC(   STORE   t2, UNIT(2)(dst),       s_exc)
+       ADDC(sum, t2)
+EXC(   STORE   t3, UNIT(3)(dst),       s_exc)
+       ADDC(sum, t3)
+       bne     len, rem, 1b
+        ADD    dst, dst, 4*NBYTES
+
+cleanup_src_unaligned:
+       beqz    len, done
+        and    rem, len, NBYTES-1  # rem = len % NBYTES
+       beq     rem, len, copy_bytes
+        nop
+1:
+EXC(   LDFIRST t0, FIRST(0)(src),      l_exc)
+EXC(   LDREST  t0, REST(0)(src),       l_exc_copy)
+       ADD     src, src, NBYTES
+       SUB     len, len, NBYTES
+EXC(   STORE   t0, 0(dst),             s_exc)
+       ADDC(sum, t0)
+       bne     len, rem, 1b
+        ADD    dst, dst, NBYTES
+
+copy_bytes_checklen:
+       beqz    len, done
+        nop
+copy_bytes:
+       /* 0 < len < NBYTES  */
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define SHIFT_START 0
+#define SHIFT_INC 8
+#else
+#define SHIFT_START 8*(NBYTES-1)
+#define SHIFT_INC -8
+#endif
+       move    t2, zero        # partial word
+       li      t3, SHIFT_START # shift
+/* use l_exc_copy here to return correct sum on fault */
+#define COPY_BYTE(N)                   \
+EXC(   lbu     t0, N(src), l_exc_copy);        \
+       SUB     len, len, 1;            \
+EXC(   sb      t0, N(dst), s_exc);     \
+       SLLV    t0, t0, t3;             \
+       addu    t3, SHIFT_INC;          \
+       beqz    len, copy_bytes_done;   \
+        or     t2, t0
+
+       COPY_BYTE(0)
+       COPY_BYTE(1)
+#ifdef USE_DOUBLE
+       COPY_BYTE(2)
+       COPY_BYTE(3)
+       COPY_BYTE(4)
+       COPY_BYTE(5)
+#endif
+EXC(   lbu     t0, NBYTES-2(src), l_exc_copy)
+       SUB     len, len, 1
+EXC(   sb      t0, NBYTES-2(dst), s_exc)
+       SLLV    t0, t0, t3
+       or      t2, t0
+copy_bytes_done:
+       ADDC(sum, t2)
+done:
+       /* fold checksum */
+#ifdef USE_DOUBLE
+       dsll32  v1, sum, 0
+       daddu   sum, v1
+       sltu    v1, sum, v1
+       dsra32  sum, sum, 0
+       addu    sum, v1
+#endif
+       sll     v1, sum, 16
+       addu    sum, v1
+       sltu    v1, sum, v1
+       srl     sum, sum, 16
+       addu    sum, v1
+
+       /* odd buffer alignment? */
+       beqz    odd, 1f
+        nop
+       sll     v1, sum, 8
+       srl     sum, sum, 8
+       or      sum, v1
+       andi    sum, 0xffff
+1:
+       .set reorder
+       ADDC(sum, psum)
+       jr      ra
+       .set noreorder
+
+l_exc_copy:
+       /*
+        * Copy bytes from src until faulting load address (or until a
+        * lb faults)
+        *
+        * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
+        * may be more than a byte beyond the last address.
+        * Hence, the lb below may get an exception.
+        *
+        * Assumes src < THREAD_BUADDR($28)
+        */
+       LOAD    t0, TI_TASK($28)
+        li     t2, SHIFT_START
+       LOAD    t0, THREAD_BUADDR(t0)
+1:
+EXC(   lbu     t1, 0(src),     l_exc)
+       ADD     src, src, 1
+       sb      t1, 0(dst)      # can't fault -- we're copy_from_user
+       SLLV    t1, t1, t2
+       addu    t2, SHIFT_INC
+       ADDC(sum, t1)
+       bne     src, t0, 1b
+        ADD    dst, dst, 1
+l_exc:
+       LOAD    t0, TI_TASK($28)
+        nop
+       LOAD    t0, THREAD_BUADDR(t0)   # t0 is just past last good address
+        nop
+       SUB     len, AT, t0             # len number of uncopied bytes
+       /*
+        * Here's where we rely on src and dst being incremented in tandem,
+        *   See (3) above.
+        * dst += (fault addr - src) to put dst at first byte to clear
+        */
+       ADD     dst, t0                 # compute start address in a1
+       SUB     dst, src
+       /*
+        * Clear len bytes starting at dst.  Can't call __bzero because it
+        * might modify len.  An inefficient loop for these rare times...
+        */
+       beqz    len, done
+        SUB    src, len, 1
+1:     sb      zero, 0(dst)
+       ADD     dst, dst, 1
+       bnez    src, 1b
+        SUB    src, src, 1
+       li      v1, -EFAULT
+       b       done
+        sw     v1, (errptr)
+
+s_exc:
+       li      v0, -1 /* invalid checksum */
+       li      v1, -EFAULT
+       jr      ra
+        sw     v1, (errptr)
+       END(__csum_partial_copy_user)
diff --git a/arch/mips/lib/csum_partial_copy.c b/arch/mips/lib/csum_partial_copy.c
deleted file mode 100644 (file)
index 0677104..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1994, 1995 Waldorf Electronics GmbH
- * Copyright (C) 1998, 1999 Ralf Baechle
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
-#include <asm/string.h>
-#include <asm/uaccess.h>
-#include <net/checksum.h>
-
-/*
- * copy while checksumming, otherwise like csum_partial
- */
-__wsum csum_partial_copy_nocheck(const void *src,
-       void *dst, int len, __wsum sum)
-{
-       /*
-        * It's 2:30 am and I don't feel like doing it real ...
-        * This is lots slower than the real thing (tm)
-        */
-       sum = csum_partial(src, len, sum);
-       memcpy(dst, src, len);
-
-       return sum;
-}
-
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
-
-/*
- * Copy from userspace and compute checksum.  If we catch an exception
- * then zero the rest of the buffer.
- */
-__wsum csum_partial_copy_from_user (const void __user *src,
-       void *dst, int len, __wsum sum, int *err_ptr)
-{
-       int missing;
-
-       might_sleep();
-       missing = copy_from_user(dst, src, len);
-       if (missing) {
-               memset(dst + len - missing, 0, missing);
-               *err_ptr = -EFAULT;
-       }
-
-       return csum_partial(dst, len, sum);
-}
index e4604c73f02e9dd75099e5c9a51af8d9fcd6d759..a3c3a1d462b272f39900d9c9910ac1f1d8bde455 100644 (file)
@@ -47,6 +47,9 @@
 #ifdef CONFIG_MIPS_MALTA
 #include <asm/mips-boards/maltaint.h>
 #endif
+#ifdef CONFIG_MIPS_SEAD
+#include <asm/mips-boards/seadint.h>
+#endif
 
 unsigned long cpu_khz;
 
@@ -263,11 +266,13 @@ void __init mips_time_init(void)
 
 void __init plat_timer_setup(struct irqaction *irq)
 {
+#ifdef MSC01E_INT_BASE
        if (cpu_has_veic) {
                set_vi_handler (MSC01E_INT_CPUCTR, mips_timer_dispatch);
                mips_cpu_timer_irq = MSC01E_INT_BASE + MSC01E_INT_CPUCTR;
-       }
-       else {
+       } else
+#endif
+       {
                if (cpu_has_vint)
                        set_vi_handler (MIPSCPU_INT_CPUCTR, mips_timer_dispatch);
                mips_cpu_timer_irq = MIPSCPU_INT_BASE + MIPSCPU_INT_CPUCTR;
diff --git a/arch/mips/mips-boards/malta/malta_mtd.c b/arch/mips/mips-boards/malta/malta_mtd.c
new file mode 100644 (file)
index 0000000..8ad9bdf
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 MIPS Technologies, Inc.
+ *     written by Ralf Baechle <ralf@linux-mips.org>
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+#include <mtd/mtd-abi.h>
+
+static struct mtd_partition malta_mtd_partitions[] = {
+       {
+               .name =         "YAMON",
+               .offset =       0x0,
+               .size =         0x100000,
+               .mask_flags =   MTD_WRITEABLE
+       }, {
+               .name =         "User FS",
+               .offset =       0x100000,
+               .size =         0x2e0000
+       }, {
+               .name =         "Board Config",
+               .offset =       0x3e0000,
+               .size =         0x020000,
+               .mask_flags =   MTD_WRITEABLE
+       }
+};
+
+static struct physmap_flash_data malta_flash_data = {
+       .width          = 4,
+       .nr_parts       = ARRAY_SIZE(malta_mtd_partitions),
+       .parts          = malta_mtd_partitions
+};
+
+static struct resource malta_flash_resource = {
+       .start          = 0x1e000000,
+       .end            = 0x1e3fffff,
+       .flags          = IORESOURCE_MEM
+};
+
+static struct platform_device malta_flash = {
+       .name           = "physmap-flash",
+       .id             = 0,
+       .dev            = {
+               .platform_data  = &malta_flash_data,
+       },
+       .num_resources  = 1,
+       .resource       = &malta_flash_resource,
+};
+
+static int __init malta_mtd_init(void)
+{
+       platform_device_register(&malta_flash);
+
+       return 0;
+}
+
+module_init(malta_mtd_init)
index f445fcddfdfd925bc9ca4525db4c970998b35e29..874ccb0066b8aa7b6266ee8e377d16671d987408 100644 (file)
@@ -21,7 +21,7 @@
  * Sead board.
  */
 #include <linux/init.h>
-#include <linux/irq.h>
+#include <linux/interrupt.h>
 
 #include <asm/irq_cpu.h>
 #include <asm/mipsregs.h>
@@ -108,7 +108,7 @@ asmlinkage void plat_irq_dispatch(void)
        if (irq >= 0)
                do_IRQ(MIPSCPU_INT_BASE + irq);
        else
-               spurious_interrupt(regs);
+               spurious_interrupt();
 }
 
 void __init arch_init_irq(void)
index d41fc5885e875f67ec9de7cf3fc2057b6c7c2419..dc795be62807d1d1bf22af966ad961d0918794bf 100644 (file)
@@ -243,11 +243,10 @@ static void __init __build_store_reg(int reg)
 
 static inline void build_store_reg(int reg)
 {
-       if (cpu_has_prefetch)
-               if (reg)
-                       build_dst_pref(pref_offset_copy);
-               else
-                       build_dst_pref(pref_offset_clear);
+       int pref_off = cpu_has_prefetch ?
+               (reg ? pref_offset_copy : pref_offset_clear) : 0;
+       if (pref_off)
+               build_dst_pref(pref_off);
        else if (cpu_has_cache_cdex_s)
                build_cdex_s();
        else if (cpu_has_cache_cdex_p)
index 454b65cc335431bdf8543e96c9b00adaf8dc1682..f556b7a8dccdd29318aaaedf855f2704a7ca90bf 100644 (file)
@@ -202,7 +202,7 @@ write_config_byte(struct pci_bus *bus, unsigned int devfn, int where, u8 val)
                break;
        }
 
-       err = config_access(PCI_CMD_CONFIG_READ, bus, devfn, where, ~(1 << (where & 3)), &data);
+       err = config_access(PCI_CMD_CONFIG_WRITE, bus, devfn, where, ~(1 << (where & 3)), &data);
 
        return err;
 }
index 65c440e8480b088e46660c50fd46025444cb9430..f80acae07ceecebb0927ca759f918ea484dc1d5a 100644 (file)
 #include <int.h>
 #include <cm.h>
 
-extern unsigned int mips_hpt_frequency;
+static unsigned long cpj;
+
+static cycle_t hpt_read(void)
+{
+       return read_c0_count2();
+}
+
+static void timer_ack(void)
+{
+       write_c0_compare(cpj);
+}
 
 /*
  * pnx8550_time_init() - it does the following things:
@@ -68,27 +78,47 @@ void pnx8550_time_init(void)
         * HZ timer interrupts per second.
         */
        mips_hpt_frequency = 27UL * ((1000000UL * n)/(m * pow2p));
+       cpj = (mips_hpt_frequency + HZ / 2) / HZ;
+       timer_ack();
+
+       /* Setup Timer 2 */
+       write_c0_count2(0);
+       write_c0_compare2(0xffffffff);
+
+       clocksource_mips.read = hpt_read;
+       mips_timer_ack = timer_ack;
+}
+
+static irqreturn_t monotonic_interrupt(int irq, void *dev_id)
+{
+       /* Timer 2 clear interrupt */
+       write_c0_compare2(-1);
+       return IRQ_HANDLED;
 }
 
+static struct irqaction monotonic_irqaction = {
+       .handler = monotonic_interrupt,
+       .flags = IRQF_DISABLED,
+       .name = "Monotonic timer",
+};
+
 void __init plat_timer_setup(struct irqaction *irq)
 {
        int configPR;
 
        setup_irq(PNX8550_INT_TIMER1, irq);
+       setup_irq(PNX8550_INT_TIMER2, &monotonic_irqaction);
 
-       /* Start timer1 */
+       /* Timer 1 start */
        configPR = read_c0_config7();
        configPR &= ~0x00000008;
        write_c0_config7(configPR);
 
-       /* Timer 2 stop */
+       /* Timer 2 start */
        configPR = read_c0_config7();
-       configPR |= 0x00000010;
+       configPR &= ~0x00000010;
        write_c0_config7(configPR);
 
-       write_c0_count2(0);
-       write_c0_compare2(0xffffffff);
-
        /* Timer 3 stop */
        configPR = read_c0_config7();
        configPR |= 0x00000020;
index 8699dadcd0966028ed7207b333486b2295a4fc12..0855d55c194d7490fab02d6aad844d238d5a0a9e 100644 (file)
@@ -436,7 +436,7 @@ config PPC_EFIKA
        select RTAS_PROC
        select PPC_MPC52xx
        select PPC_NATIVE
-       default y
+       default n
 
 config PPC_LITE5200
        bool "Freescale Lite5200 Eval Board"
@@ -471,7 +471,7 @@ config PPC_PREP
        select PPC_INDIRECT_PCI
        select PPC_UDBG_16550
        select PPC_NATIVE
-       default y
+       default n
 
 config PPC_MAPLE
        depends on PPC_MULTIPLATFORM && PPC64
index 8bc0d259796d3ba918d09e4e98924c921aa04b8d..a8efb59f5dd7a8feb3a90584fa5033aa9c256689 100644 (file)
 
                // PSC3 in CODEC mode example
                i2s@2400 {              // PSC3
-                       device_type = "i2s";
+                       device_type = "sound";
                        compatible = "mpc5200-psc-i2s\0mpc52xx-psc-i2s";
                        reg = <2400 100>;
                        interrupts = <2 3 0>;
 
                // PSC6 in AC97 mode example
                ac97@2c00 {             // PSC6
-                       device_type = "ac97";
+                       device_type = "sound";
                        compatible = "mpc5200-psc-ac97\0mpc52xx-psc-ac97";
                        reg = <2c00 100>;
                        interrupts = <2 4 0>;
index 81cb76418a7816e3eab636b096d980b3930a597d..1aabee432d86f13200fdfe37aa5d77ceae897470 100644 (file)
 
                // PSC3 in CODEC mode example
                i2s@2400 {              // PSC3
-                       device_type = "i2s";
+                       device_type = "sound";
                        compatible = "mpc5200b-psc-i2s\0mpc52xx-psc-i2s";
                        reg = <2400 100>;
                        interrupts = <2 3 0>;
 
                // PSC6 in AC97 mode example
                ac97@2c00 {             // PSC6
-                       device_type = "ac97";
+                       device_type = "sound";
                        compatible = "mpc5200b-psc-ac97\0mpc52xx-psc-ac97";
                        reg = <2c00 100>;
                        interrupts = <2 4 0>;
index 1c009651f9250f18eb19bcc8a759c46e4307a8ce..340376a470017c173d06c0a6e7521079c6d430fc 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.18-rc6
-# Sun Sep 10 10:45:11 2006
+# Linux kernel version: 2.6.20-rc3
+# Tue Jan  2 15:32:44 2007
 #
 CONFIG_PPC64=y
 CONFIG_64BIT=y
@@ -10,6 +10,8 @@ CONFIG_MMU=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_IRQ_PER_CPU=y
 CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_ARCH_HAS_ILOG2_U32=y
+CONFIG_ARCH_HAS_ILOG2_U64=y
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
@@ -22,6 +24,8 @@ CONFIG_ARCH_MAY_HAVE_PC_FDC=y
 CONFIG_PPC_OF=y
 CONFIG_PPC_UDBG_16550=y
 CONFIG_GENERIC_TBSYNC=y
+CONFIG_AUDIT_ARCH=y
+CONFIG_GENERIC_BUG=y
 # CONFIG_DEFAULT_UIMAGE is not set
 
 #
@@ -31,6 +35,10 @@ CONFIG_GENERIC_TBSYNC=y
 CONFIG_POWER3=y
 CONFIG_POWER4=y
 CONFIG_PPC_FPU=y
+# CONFIG_PPC_DCR_NATIVE is not set
+CONFIG_PPC_DCR_MMIO=y
+CONFIG_PPC_DCR=y
+CONFIG_PPC_OF_PLATFORM_PCI=y
 CONFIG_ALTIVEC=y
 CONFIG_PPC_STD_MMU=y
 CONFIG_VIRT_CPU_ACCOUNTING=y
@@ -52,19 +60,24 @@ CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
+# CONFIG_IPC_NS is not set
 CONFIG_POSIX_MQUEUE=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 CONFIG_TASKSTATS=y
 CONFIG_TASK_DELAY_ACCT=y
-CONFIG_SYSCTL=y
+# CONFIG_UTS_NS is not set
 # CONFIG_AUDIT is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_CPUSETS=y
+CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_TASK_XACCT is not set
+CONFIG_SYSCTL=y
 # CONFIG_EMBEDDED is not set
+CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -73,12 +86,12 @@ CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_BASE_FULL=y
-CONFIG_RT_MUTEXES=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
 CONFIG_SHMEM=y
 CONFIG_SLAB=y
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_RT_MUTEXES=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 # CONFIG_SLOB is not set
@@ -97,6 +110,7 @@ CONFIG_STOP_MACHINE=y
 #
 # Block layer
 #
+CONFIG_BLOCK=y
 CONFIG_BLK_DEV_IO_TRACE=y
 
 #
@@ -116,16 +130,20 @@ CONFIG_DEFAULT_IOSCHED="anticipatory"
 # Platform support
 #
 CONFIG_PPC_MULTIPLATFORM=y
-# CONFIG_PPC_ISERIES is not set
 # CONFIG_EMBEDDED6xx is not set
 # CONFIG_APUS is not set
 CONFIG_PPC_PSERIES=y
+CONFIG_PPC_ISERIES=y
+# CONFIG_PPC_MPC52xx is not set
 CONFIG_PPC_PMAC=y
 CONFIG_PPC_PMAC64=y
 CONFIG_PPC_MAPLE=y
+# CONFIG_PPC_PASEMI is not set
 CONFIG_PPC_CELL=y
 CONFIG_PPC_CELL_NATIVE=y
 CONFIG_PPC_IBM_CELL_BLADE=y
+# CONFIG_PPC_PS3 is not set
+CONFIG_PPC_NATIVE=y
 CONFIG_UDBG_RTAS_CONSOLE=y
 CONFIG_XICS=y
 CONFIG_U3_DART=y
@@ -139,6 +157,8 @@ CONFIG_IBMVIO=y
 # CONFIG_IBMEBUS is not set
 # CONFIG_PPC_MPC106 is not set
 CONFIG_PPC_970_NAP=y
+CONFIG_PPC_INDIRECT_IO=y
+CONFIG_GENERIC_IOMAP=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_TABLE=y
 # CONFIG_CPU_FREQ_DEBUG is not set
@@ -160,14 +180,16 @@ CONFIG_MPIC=y
 #
 CONFIG_SPU_FS=m
 CONFIG_SPU_BASE=y
-CONFIG_SPUFS_MMAP=y
 CONFIG_CBE_RAS=y
+CONFIG_CBE_THERM=m
+CONFIG_CBE_CPUFREQ=m
 
 #
 # Kernel options
 #
 # CONFIG_HZ_100 is not set
 CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
 # CONFIG_HZ_1000 is not set
 CONFIG_HZ=250
 CONFIG_PREEMPT_NONE=y
@@ -192,6 +214,7 @@ CONFIG_ARCH_SELECT_MEMORY_MODEL=y
 CONFIG_ARCH_FLATMEM_ENABLE=y
 CONFIG_ARCH_SPARSEMEM_ENABLE=y
 CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_ARCH_POPULATES_NODE_MAP=y
 CONFIG_SELECT_MEMORY_MODEL=y
 # CONFIG_FLATMEM_MANUAL is not set
 # CONFIG_DISCONTIGMEM_MANUAL is not set
@@ -201,6 +224,7 @@ CONFIG_HAVE_MEMORY_PRESENT=y
 # CONFIG_SPARSEMEM_STATIC is not set
 CONFIG_SPARSEMEM_EXTREME=y
 CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTPLUG_SPARSE=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
 CONFIG_RESOURCES_64BIT=y
 CONFIG_ARCH_MEMORY_PROBE=y
@@ -222,6 +246,7 @@ CONFIG_PPC_I8259=y
 CONFIG_PCI=y
 CONFIG_PCI_DOMAINS=y
 # CONFIG_PCIEPORTBUS is not set
+# CONFIG_PCI_MULTITHREAD_PROBE is not set
 # CONFIG_PCI_DEBUG is not set
 
 #
@@ -254,6 +279,7 @@ CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_XFRM=y
 CONFIG_XFRM_USER=m
+# CONFIG_XFRM_SUB_POLICY is not set
 CONFIG_NET_KEY=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
@@ -272,10 +298,13 @@ CONFIG_INET_XFRM_TUNNEL=m
 CONFIG_INET_TUNNEL=y
 CONFIG_INET_XFRM_MODE_TRANSPORT=y
 CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_INET_XFRM_MODE_BEET=y
 CONFIG_INET_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
 
 #
 # IP: Virtual Server Configuration
@@ -294,25 +323,31 @@ CONFIG_NETFILTER=y
 CONFIG_NETFILTER_NETLINK=y
 CONFIG_NETFILTER_NETLINK_QUEUE=m
 CONFIG_NETFILTER_NETLINK_LOG=m
+CONFIG_NF_CONNTRACK_ENABLED=m
+CONFIG_NF_CONNTRACK_SUPPORT=y
+# CONFIG_IP_NF_CONNTRACK_SUPPORT is not set
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CT_ACCT=y
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_GRE=m
+CONFIG_NF_CT_PROTO_SCTP=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
 # CONFIG_NETFILTER_XTABLES is not set
 
 #
 # IP: Netfilter Configuration
 #
-CONFIG_IP_NF_CONNTRACK=m
-CONFIG_IP_NF_CT_ACCT=y
-CONFIG_IP_NF_CONNTRACK_MARK=y
-CONFIG_IP_NF_CONNTRACK_EVENTS=y
-CONFIG_IP_NF_CONNTRACK_NETLINK=m
-CONFIG_IP_NF_CT_PROTO_SCTP=m
-CONFIG_IP_NF_FTP=m
-CONFIG_IP_NF_IRC=m
-# CONFIG_IP_NF_NETBIOS_NS is not set
-CONFIG_IP_NF_TFTP=m
-CONFIG_IP_NF_AMANDA=m
-# CONFIG_IP_NF_PPTP is not set
-# CONFIG_IP_NF_H323 is not set
-CONFIG_IP_NF_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_CONNTRACK_PROC_COMPAT=y
 CONFIG_IP_NF_QUEUE=m
 
 #
@@ -339,7 +374,6 @@ CONFIG_LLC=y
 # CONFIG_ATALK is not set
 # CONFIG_X25 is not set
 # CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
 
@@ -411,6 +445,12 @@ CONFIG_BLK_DEV_INITRD=y
 # CONFIG_CDROM_PKTCDVD is not set
 # CONFIG_ATA_OVER_ETH is not set
 
+#
+# Misc devices
+#
+# CONFIG_SGI_IOC4 is not set
+# CONFIG_TIFM_CORE is not set
+
 #
 # ATA/ATAPI/MFM/RLL support
 #
@@ -438,7 +478,6 @@ CONFIG_IDEPCI_SHARE_IRQ=y
 # CONFIG_BLK_DEV_OFFBOARD is not set
 CONFIG_BLK_DEV_GENERIC=y
 # CONFIG_BLK_DEV_OPTI621 is not set
-CONFIG_BLK_DEV_SL82C105=y
 CONFIG_BLK_DEV_IDEDMA_PCI=y
 # CONFIG_BLK_DEV_IDEDMA_FORCED is not set
 CONFIG_IDEDMA_PCI_AUTO=y
@@ -453,6 +492,7 @@ CONFIG_BLK_DEV_AMD74XX=y
 # CONFIG_BLK_DEV_CS5530 is not set
 # CONFIG_BLK_DEV_HPT34X is not set
 # CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_JMICRON is not set
 # CONFIG_BLK_DEV_SC1200 is not set
 # CONFIG_BLK_DEV_PIIX is not set
 # CONFIG_BLK_DEV_IT821X is not set
@@ -461,6 +501,7 @@ CONFIG_BLK_DEV_AMD74XX=y
 # CONFIG_BLK_DEV_PDC202XX_NEW is not set
 # CONFIG_BLK_DEV_SVWKS is not set
 # CONFIG_BLK_DEV_SIIMAGE is not set
+CONFIG_BLK_DEV_SL82C105=y
 # CONFIG_BLK_DEV_SLC90E66 is not set
 # CONFIG_BLK_DEV_TRM290 is not set
 # CONFIG_BLK_DEV_VIA82CXXX is not set
@@ -478,6 +519,8 @@ CONFIG_IDEDMA_AUTO=y
 #
 # CONFIG_RAID_ATTRS is not set
 CONFIG_SCSI=y
+# CONFIG_SCSI_TGT is not set
+CONFIG_SCSI_NETLINK=y
 CONFIG_SCSI_PROC_FS=y
 
 #
@@ -497,14 +540,16 @@ CONFIG_CHR_DEV_SG=y
 CONFIG_SCSI_MULTI_LUN=y
 CONFIG_SCSI_CONSTANTS=y
 # CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
 
 #
-# SCSI Transport Attributes
+# SCSI Transports
 #
 CONFIG_SCSI_SPI_ATTRS=y
 CONFIG_SCSI_FC_ATTRS=y
 CONFIG_SCSI_ISCSI_ATTRS=m
 # CONFIG_SCSI_SAS_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
 
 #
 # SCSI low-level drivers
@@ -517,26 +562,12 @@ CONFIG_SCSI_ISCSI_ATTRS=m
 # CONFIG_SCSI_AIC7XXX is not set
 # CONFIG_SCSI_AIC7XXX_OLD is not set
 # CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_AIC94XX is not set
+# CONFIG_SCSI_ARCMSR is not set
 # CONFIG_MEGARAID_NEWGEN is not set
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_SAS is not set
-CONFIG_ATA=y
-# CONFIG_SATA_AHCI is not set
-CONFIG_SATA_SVW=y
-# CONFIG_SCSI_ATA_PIIX is not set
-# CONFIG_SATA_MV is not set
-# CONFIG_SATA_NV is not set
-# CONFIG_SCSI_PDC_ADMA is not set
 # CONFIG_SCSI_HPTIOP is not set
-# CONFIG_SATA_QSTOR is not set
-# CONFIG_SATA_PROMISE is not set
-# CONFIG_SATA_SX4 is not set
-# CONFIG_SATA_SIL is not set
-# CONFIG_SATA_SIL24 is not set
-# CONFIG_SATA_SIS is not set
-# CONFIG_SATA_ULI is not set
-# CONFIG_SATA_VIA is not set
-# CONFIG_SATA_VITESSE is not set
 # CONFIG_SCSI_BUSLOGIC is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_EATA is not set
@@ -546,6 +577,7 @@ CONFIG_SATA_SVW=y
 CONFIG_SCSI_IBMVSCSI=y
 # CONFIG_SCSI_INITIO is not set
 # CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_STEX is not set
 CONFIG_SCSI_SYM53C8XX_2=y
 CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
 CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
@@ -556,10 +588,66 @@ CONFIG_SCSI_IPR_TRACE=y
 CONFIG_SCSI_IPR_DUMP=y
 # CONFIG_SCSI_QLOGIC_1280 is not set
 # CONFIG_SCSI_QLA_FC is not set
+# CONFIG_SCSI_QLA_ISCSI is not set
 CONFIG_SCSI_LPFC=m
 # CONFIG_SCSI_DC395x is not set
 # CONFIG_SCSI_DC390T is not set
 CONFIG_SCSI_DEBUG=m
+# CONFIG_SCSI_SRP is not set
+
+#
+# Serial ATA (prod) and Parallel ATA (experimental) drivers
+#
+CONFIG_ATA=y
+# CONFIG_SATA_AHCI is not set
+CONFIG_SATA_SVW=y
+# CONFIG_ATA_PIIX is not set
+# CONFIG_SATA_MV is not set
+# CONFIG_SATA_NV is not set
+# CONFIG_PDC_ADMA is not set
+# CONFIG_SATA_QSTOR is not set
+# CONFIG_SATA_PROMISE is not set
+# CONFIG_SATA_SX4 is not set
+# CONFIG_SATA_SIL is not set
+# CONFIG_SATA_SIL24 is not set
+# CONFIG_SATA_SIS is not set
+# CONFIG_SATA_ULI is not set
+# CONFIG_SATA_VIA is not set
+# CONFIG_SATA_VITESSE is not set
+# CONFIG_PATA_ALI is not set
+# CONFIG_PATA_AMD is not set
+# CONFIG_PATA_ARTOP is not set
+# CONFIG_PATA_ATIIXP is not set
+# CONFIG_PATA_CMD64X is not set
+# CONFIG_PATA_CS5520 is not set
+# CONFIG_PATA_CS5530 is not set
+# CONFIG_PATA_CYPRESS is not set
+# CONFIG_PATA_EFAR is not set
+# CONFIG_ATA_GENERIC is not set
+# CONFIG_PATA_HPT366 is not set
+# CONFIG_PATA_HPT37X is not set
+# CONFIG_PATA_HPT3X2N is not set
+# CONFIG_PATA_HPT3X3 is not set
+# CONFIG_PATA_IT821X is not set
+# CONFIG_PATA_JMICRON is not set
+# CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_MARVELL is not set
+# CONFIG_PATA_MPIIX is not set
+# CONFIG_PATA_OLDPIIX is not set
+# CONFIG_PATA_NETCELL is not set
+# CONFIG_PATA_NS87410 is not set
+# CONFIG_PATA_OPTI is not set
+# CONFIG_PATA_OPTIDMA is not set
+# CONFIG_PATA_PDC_OLD is not set
+# CONFIG_PATA_RADISYS is not set
+# CONFIG_PATA_RZ1000 is not set
+# CONFIG_PATA_SC1200 is not set
+# CONFIG_PATA_SERVERWORKS is not set
+# CONFIG_PATA_PDC2027X is not set
+# CONFIG_PATA_SIL680 is not set
+# CONFIG_PATA_SIS is not set
+# CONFIG_PATA_VIA is not set
+# CONFIG_PATA_WINBOND is not set
 
 #
 # Multi-device support (RAID and LVM)
@@ -575,6 +663,7 @@ CONFIG_MD_RAID5_RESHAPE=y
 CONFIG_MD_MULTIPATH=m
 CONFIG_MD_FAULTY=m
 CONFIG_BLK_DEV_DM=y
+# CONFIG_DM_DEBUG is not set
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_MIRROR=m
@@ -630,11 +719,13 @@ CONFIG_IEEE1394_RAWIO=y
 CONFIG_ADB_PMU=y
 # CONFIG_ADB_PMU_LED is not set
 CONFIG_PMAC_SMU=y
+# CONFIG_MAC_EMUMOUSEBTN is not set
 CONFIG_THERM_PM72=y
 CONFIG_WINDFARM=y
 CONFIG_WINDFARM_PM81=y
 CONFIG_WINDFARM_PM91=y
 CONFIG_WINDFARM_PM112=y
+# CONFIG_PMAC_RACKMETER is not set
 
 #
 # Network device support
@@ -675,6 +766,7 @@ CONFIG_VORTEX=y
 CONFIG_IBMVETH=m
 CONFIG_NET_PCI=y
 CONFIG_PCNET32=y
+# CONFIG_PCNET32_NAPI is not set
 # CONFIG_AMD8111_ETH is not set
 # CONFIG_ADAPTEC_STARFIRE is not set
 # CONFIG_B44 is not set
@@ -713,7 +805,7 @@ CONFIG_E1000=y
 CONFIG_TIGON3=y
 # CONFIG_BNX2 is not set
 CONFIG_SPIDER_NET=m
-# CONFIG_MV643XX_ETH is not set
+# CONFIG_QLA3XXX is not set
 
 #
 # Ethernet (10000 Mbit)
@@ -723,6 +815,7 @@ CONFIG_IXGB=m
 # CONFIG_IXGB_NAPI is not set
 # CONFIG_S2IO is not set
 # CONFIG_MYRI10GE is not set
+# CONFIG_NETXEN_NIC is not set
 
 #
 # Token Ring devices
@@ -741,6 +834,7 @@ CONFIG_IBMOL=y
 # Wan interfaces
 #
 # CONFIG_WAN is not set
+CONFIG_ISERIES_VETH=m
 # CONFIG_FDDI is not set
 # CONFIG_HIPPI is not set
 CONFIG_PPP=m
@@ -753,6 +847,7 @@ CONFIG_PPP_BSDCOMP=m
 # CONFIG_PPP_MPPE is not set
 CONFIG_PPPOE=m
 # CONFIG_SLIP is not set
+CONFIG_SLHC=m
 # CONFIG_NET_FC is not set
 # CONFIG_SHAPER is not set
 CONFIG_NETCONSOLE=y
@@ -775,6 +870,7 @@ CONFIG_NET_POLL_CONTROLLER=y
 # Input device support
 #
 CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
 
 #
 # Userland interfaces
@@ -797,6 +893,7 @@ CONFIG_KEYBOARD_ATKBD=y
 # CONFIG_KEYBOARD_LKKBD is not set
 # CONFIG_KEYBOARD_XTKBD is not set
 # CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
 CONFIG_INPUT_MOUSE=y
 CONFIG_MOUSE_PS2=y
 # CONFIG_MOUSE_SERIAL is not set
@@ -850,6 +947,7 @@ CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
 CONFIG_HVC_DRIVER=y
 CONFIG_HVC_CONSOLE=y
+CONFIG_HVC_ISERIES=y
 CONFIG_HVC_RTAS=y
 CONFIG_HVCS=m
 
@@ -868,10 +966,6 @@ CONFIG_GEN_RTC=y
 # CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
 # CONFIG_AGP is not set
 # CONFIG_DRM is not set
 CONFIG_RAW_DRIVER=y
@@ -882,7 +976,6 @@ CONFIG_MAX_RAW_DEVS=256
 # TPM devices
 #
 # CONFIG_TCG_TPM is not set
-# CONFIG_TELCLOCK is not set
 
 #
 # I2C support
@@ -947,6 +1040,7 @@ CONFIG_I2C_POWERMAC=y
 #
 # Dallas's 1-wire bus
 #
+# CONFIG_W1 is not set
 
 #
 # Hardware Monitoring support
@@ -954,15 +1048,10 @@ CONFIG_I2C_POWERMAC=y
 # CONFIG_HWMON is not set
 # CONFIG_HWMON_VID is not set
 
-#
-# Misc devices
-#
-
 #
 # Multimedia devices
 #
 # CONFIG_VIDEO_DEV is not set
-CONFIG_VIDEO_V4L2=y
 
 #
 # Digital Video Broadcasting Devices
@@ -975,6 +1064,7 @@ CONFIG_VIDEO_V4L2=y
 #
 CONFIG_FIRMWARE_EDID=y
 CONFIG_FB=y
+CONFIG_FB_DDC=y
 CONFIG_FB_CFB_FILLRECT=y
 CONFIG_FB_CFB_COPYAREA=y
 CONFIG_FB_CFB_IMAGEBLIT=y
@@ -1011,6 +1101,7 @@ CONFIG_FB_RADEON_I2C=y
 # CONFIG_FB_3DFX is not set
 # CONFIG_FB_VOODOO1 is not set
 # CONFIG_FB_TRIDENT is not set
+CONFIG_FB_IBM_GXT4500=y
 # CONFIG_FB_VIRTUAL is not set
 
 #
@@ -1158,6 +1249,11 @@ CONFIG_SND_AOA_SOUNDBUS_I2S=m
 #
 # CONFIG_SOUND_PRIME is not set
 
+#
+# HID Devices
+#
+CONFIG_HID=y
+
 #
 # USB support
 #
@@ -1173,6 +1269,7 @@ CONFIG_USB=y
 CONFIG_USB_DEVICEFS=y
 # CONFIG_USB_BANDWIDTH is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_MULTITHREAD_PROBE is not set
 # CONFIG_USB_OTG is not set
 
 #
@@ -1214,13 +1311,13 @@ CONFIG_USB_STORAGE=m
 # CONFIG_USB_STORAGE_JUMPSHOT is not set
 # CONFIG_USB_STORAGE_ALAUDA is not set
 # CONFIG_USB_STORAGE_ONETOUCH is not set
+# CONFIG_USB_STORAGE_KARMA is not set
 # CONFIG_USB_LIBUSUAL is not set
 
 #
 # USB Input Devices
 #
 CONFIG_USB_HID=y
-CONFIG_USB_HIDINPUT=y
 # CONFIG_USB_HIDINPUT_POWERBOOK is not set
 # CONFIG_HID_FF is not set
 CONFIG_USB_HIDDEV=y
@@ -1250,6 +1347,7 @@ CONFIG_USB_HIDDEV=y
 # CONFIG_USB_KAWETH is not set
 # CONFIG_USB_PEGASUS is not set
 # CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET_MII is not set
 # CONFIG_USB_USBNET is not set
 # CONFIG_USB_MON is not set
 
@@ -1267,6 +1365,7 @@ CONFIG_USB_HIDDEV=y
 #
 # CONFIG_USB_EMI62 is not set
 # CONFIG_USB_EMI26 is not set
+# CONFIG_USB_ADUTUX is not set
 # CONFIG_USB_AUERSWALD is not set
 # CONFIG_USB_RIO500 is not set
 # CONFIG_USB_LEGOTOWER is not set
@@ -1274,12 +1373,13 @@ CONFIG_USB_HIDDEV=y
 # CONFIG_USB_LED is not set
 # CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGETKIT is not set
-# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_PHIDGET is not set
 # CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_FTDI_ELAN is not set
 CONFIG_USB_APPLEDISPLAY=m
 # CONFIG_USB_SISUSBVGA is not set
 # CONFIG_USB_LD is not set
+# CONFIG_USB_TRANCEVIBRATOR is not set
 # CONFIG_USB_TEST is not set
 
 #
@@ -1318,6 +1418,7 @@ CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_ADDR_TRANS=y
 CONFIG_INFINIBAND_MTHCA=m
 CONFIG_INFINIBAND_MTHCA_DEBUG=y
+# CONFIG_INFINIBAND_AMSO1100 is not set
 CONFIG_INFINIBAND_IPOIB=m
 CONFIG_INFINIBAND_IPOIB_DEBUG=y
 # CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
@@ -1346,6 +1447,10 @@ CONFIG_INFINIBAND_ISER=m
 # DMA Devices
 #
 
+#
+# Virtualization
+#
+
 #
 # File systems
 #
@@ -1359,6 +1464,7 @@ CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
+# CONFIG_EXT4DEV_FS is not set
 CONFIG_JBD=y
 # CONFIG_JBD_DEBUG is not set
 CONFIG_FS_MBCACHE=y
@@ -1379,6 +1485,7 @@ CONFIG_XFS_FS=m
 CONFIG_XFS_SECURITY=y
 CONFIG_XFS_POSIX_ACL=y
 # CONFIG_XFS_RT is not set
+# CONFIG_GFS2_FS is not set
 # CONFIG_OCFS2_FS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_ROMFS_FS is not set
@@ -1414,8 +1521,10 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 #
 CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
 CONFIG_HUGETLBFS=y
 CONFIG_HUGETLB_PAGE=y
 CONFIG_RAMFS=y
@@ -1539,9 +1648,24 @@ CONFIG_NLS_KOI8_R=m
 CONFIG_NLS_KOI8_U=m
 CONFIG_NLS_UTF8=m
 
+#
+# Distributed Lock Manager
+#
+# CONFIG_DLM is not set
+
+#
+# iSeries device drivers
+#
+# CONFIG_VIOCONS is not set
+CONFIG_VIODASD=y
+CONFIG_VIOCD=m
+CONFIG_VIOTAPE=m
+CONFIG_VIOPATH=y
+
 #
 # Library routines
 #
+CONFIG_BITREVERSE=y
 CONFIG_CRC_CCITT=m
 # CONFIG_CRC16 is not set
 CONFIG_CRC32=y
@@ -1551,6 +1675,7 @@ CONFIG_ZLIB_DEFLATE=m
 CONFIG_TEXTSEARCH=y
 CONFIG_TEXTSEARCH_KMP=m
 CONFIG_PLIST=y
+CONFIG_IOMAP_COPY=y
 
 #
 # Instrumentation Support
@@ -1563,8 +1688,11 @@ CONFIG_OPROFILE=y
 # Kernel hacking
 #
 # CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_MUST_CHECK=y
 CONFIG_MAGIC_SYSRQ=y
 # CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
 CONFIG_LOG_BUF_SHIFT=17
 CONFIG_DETECT_SOFTLOCKUP=y
@@ -1578,16 +1706,19 @@ CONFIG_DEBUG_MUTEXES=y
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
-CONFIG_DEBUG_FS=y
 # CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_LIST is not set
 CONFIG_FORCED_INLINING=y
 # CONFIG_RCU_TORTURE_TEST is not set
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DEBUG_STACK_USAGE=y
+# CONFIG_HCALL_STATS is not set
 CONFIG_DEBUGGER=y
 CONFIG_XMON=y
 # CONFIG_XMON_DEFAULT is not set
+CONFIG_XMON_DISASSEMBLY=y
 CONFIG_IRQSTACKS=y
 CONFIG_BOOTX_TEXT=y
 # CONFIG_PPC_EARLY_DEBUG is not set
@@ -1602,7 +1733,12 @@ CONFIG_BOOTX_TEXT=y
 # Cryptographic options
 #
 CONFIG_CRYPTO=y
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_HMAC=y
+# CONFIG_CRYPTO_XCBC is not set
 CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MD5=y
@@ -1611,9 +1747,14 @@ CONFIG_CRYPTO_SHA256=m
 CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_TGR192=m
+# CONFIG_CRYPTO_GF128MUL is not set
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_LRW is not set
 CONFIG_CRYPTO_DES=y
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_TWOFISH_COMMON=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_AES=m
 CONFIG_CRYPTO_CAST5=m
index e96521530d21b3add856b834d8ace4750234e42e..030d300cd71c8be2e04000b089363e4787ad9d1e 100644 (file)
@@ -303,5 +303,8 @@ int main(void)
        DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
        DEFINE(CLOCK_REALTIME_RES, TICK_NSEC);
 
+#ifdef CONFIG_BUG
+       DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
+#endif
        return 0;
 }
index 1a3d4de197d2bac728cc2492e3ba9565551d5717..2551c0884afcd178d957ce02304ccef119e5870d 100644 (file)
@@ -28,6 +28,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/cputable.h>
 #include <asm/firmware.h>
+#include <asm/bug.h>
 
 /*
  * System calls.
@@ -634,19 +635,15 @@ _GLOBAL(enter_rtas)
        li      r0,0
        mtcr    r0
 
+#ifdef CONFIG_BUG      
        /* There is no way it is acceptable to get here with interrupts enabled,
         * check it with the asm equivalent of WARN_ON
         */
        lbz     r0,PACASOFTIRQEN(r13)
 1:     tdnei   r0,0
-.section __bug_table,"a"
-       .llong  1b,__LINE__ + 0x1000000, 1f, 2f
-.previous
-.section .rodata,"a"
-1:     .asciz  __FILE__
-2:     .asciz "enter_rtas"
-.previous
-
+       EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+#endif
+       
        /* Hard-disable interrupts */
        mfmsr   r6
        rldicl  r7,r6,48,1
index 89c836d548096a96110c020a0383f5a711bf40fa..1bb20d841080cb0cbdb739bfd0facd9f5ef496bd 100644 (file)
@@ -744,7 +744,8 @@ static int htlb_check_hinted_area(unsigned long addr, unsigned long len)
        struct vm_area_struct *vma;
 
        vma = find_vma(current->mm, addr);
-       if (!vma || ((addr + len) <= vma->vm_start))
+       if (TASK_SIZE - len >= addr &&
+           (!vma || ((addr + len) <= vma->vm_start)))
                return 0;
 
        return -ENOMEM;
@@ -815,6 +816,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                return -EINVAL;
        if (len & ~HPAGE_MASK)
                return -EINVAL;
+       if (len > TASK_SIZE)
+               return -ENOMEM;
 
        if (!cpu_has_feature(CPU_FTR_16M_PAGE))
                return -EINVAL;
@@ -823,9 +826,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        BUG_ON((addr + len)  < addr);
 
        if (test_thread_flag(TIF_32BIT)) {
-               /* Paranoia, caller should have dealt with this */
-               BUG_ON((addr + len) > 0x100000000UL);
-
                curareas = current->mm->context.low_htlb_areas;
 
                /* First see if we can use the hint address */
index eaff71e74fb0f8a783465715f73560c877a81c69..0f21bab33f6ce31f7006f487e5b55467fb7d4ff2 100644 (file)
@@ -153,6 +153,7 @@ define_machine(lite52xx) {
        .name           = "lite52xx",
        .probe          = lite52xx_probe,
        .setup_arch     = lite52xx_setup_arch,
+       .init           = mpc52xx_declare_of_platform_devices,
        .init_IRQ       = mpc52xx_init_irq,
        .get_irq        = mpc52xx_get_irq,
        .show_cpuinfo   = lite52xx_show_cpuinfo,
index 8331ff457770dde2063e5571fdae04baba19aaca..cc40889074bd1f6c88e07a619f40186a047546d3 100644 (file)
@@ -116,11 +116,12 @@ unmap_regs:
        if (xlb) iounmap(xlb);
 }
 
-static int __init
+void __init
 mpc52xx_declare_of_platform_devices(void)
 {
        /* Find every child of the SOC node and add it to of_platform */
-       return of_platform_bus_probe(NULL, NULL, NULL);
+       if (of_platform_bus_probe(NULL, NULL, NULL))
+               printk(KERN_ERR __FILE__ ": "
+                       "Error while probing of_platform bus\n");
 }
 
-device_initcall(mpc52xx_declare_of_platform_devices);
index e3e929e1b4606cada11840a13f89aed3c5108805..c1f4502a3c6a02827773db41befced91abce8d8a 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <asm/system.h>
 #include <asm/paca.h>
+#include <asm/firmware.h>
 #include <asm/iseries/it_lp_queue.h>
 #include <asm/iseries/hv_lp_event.h>
 #include <asm/iseries/hv_call_event.h>
@@ -318,6 +319,9 @@ static int __init proc_lpevents_init(void)
 {
        struct proc_dir_entry *e;
 
+       if (!firmware_has_feature(FW_FEATURE_ISERIES))
+               return 0;
+
        e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL);
        if (e)
                e->proc_fops = &proc_lpevents_operations;
index cff15ae24f6ba8bf1184ce7cf2ad71e5f5a063f8..1ad0e4aaad1a0d5ba514e4d1d136c9cf7d40907c 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/uaccess.h>
 #include <asm/paca.h>
 #include <asm/abs_addr.h>
+#include <asm/firmware.h>
 #include <asm/iseries/vio.h>
 #include <asm/iseries/mf.h>
 #include <asm/iseries/hv_lp_config.h>
@@ -1235,6 +1236,9 @@ static int __init mf_proc_init(void)
        char name[2];
        int i;
 
+       if (!firmware_has_feature(FW_FEATURE_ISERIES))
+               return 0;
+
        mf_proc_root = proc_mkdir("iSeries/mf", NULL);
        if (!mf_proc_root)
                return 1;
index c241413629ac155266cb7fcddb8fa68da759a3c6..b54e37101e6972ba75650692385af460012c0179 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/processor.h>
 #include <asm/time.h>
 #include <asm/lppaca.h>
+#include <asm/firmware.h>
 #include <asm/iseries/hv_call_xm.h>
 
 #include "processor_vpd.h"
 
 static int __init iseries_proc_create(void)
 {
-       struct proc_dir_entry *e = proc_mkdir("iSeries", 0);
+       struct proc_dir_entry *e;
+
+       if (!firmware_has_feature(FW_FEATURE_ISERIES))
+               return 0;
+
+       e = proc_mkdir("iSeries", 0);
        if (!e)
                return 1;
 
@@ -106,6 +112,9 @@ static int __init iseries_proc_init(void)
 {
        struct proc_dir_entry *e;
 
+       if (!firmware_has_feature(FW_FEATURE_ISERIES))
+               return 0;
+
        e = create_proc_entry("iSeries/titanTod", S_IFREG|S_IRUGO, NULL);
        if (e)
                e->proc_fops = &proc_titantod_operations;
index bdf2afbb60c1c23556e0f4c38a6e7a082ce2e580..cce7e309340c99f933390dd5cf81f65a0fb0ab39 100644 (file)
@@ -527,7 +527,8 @@ static void __init iSeries_fixup_klimit(void)
 static int __init iSeries_src_init(void)
 {
         /* clear the progress line */
-        ppc_md.progress(" ", 0xffff);
+       if (firmware_has_feature(FW_FEATURE_ISERIES))
+               ppc_md.progress(" ", 0xffff);
         return 0;
 }
 
index 84e7ee2c086f59f21bec9a99b0ce368b07b11676..a6799ed34a66c07555c71e910586328112eb8f79 100644 (file)
@@ -42,6 +42,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/prom.h>
+#include <asm/firmware.h>
 #include <asm/iseries/hv_types.h>
 #include <asm/iseries/hv_lp_event.h>
 #include <asm/iseries/hv_lp_config.h>
@@ -183,6 +184,9 @@ static int __init vio_proc_init(void)
 {
        struct proc_dir_entry *e;
 
+       if (!firmware_has_feature(FW_FEATURE_ISERIES))
+               return 0;
+
        e = create_proc_entry("iSeries/config", 0, NULL);
        if (e)
                e->proc_fops = &proc_viopath_operations;
index f12d5c69e74dea275d772308cdcf311da4f54956..50855d4fd5a08204d1cfcabdc4bc98ada2005fd6 100644 (file)
@@ -254,7 +254,6 @@ static void __init maple_init_IRQ(void)
                printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n",
                       openpic_addr, has_isus);
        }
-       of_node_put(root);
 
        BUG_ON(openpic_addr == 0);
 
index 89d6e295dbf7171011ffd278fbf93efc06f45eb7..bea7d1bb1a3b48e5c24c05c15068430b9c8859c2 100644 (file)
@@ -129,7 +129,6 @@ static __init void pas_init_IRQ(void)
        }
        openpic_addr = of_read_number(opprop, naddr);
        printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
-       of_node_put(root);
 
        mpic = mpic_alloc(mpic_node, openpic_addr, MPIC_PRIMARY, 0, 0,
                          " PAS-OPIC  ");
index c00cfed7af2c0a8b2531ff8e62f620ba971e21c2..5c7e38789897b4da98d3fae627d4881e4bde39a1 100644 (file)
@@ -26,7 +26,7 @@
 BEGIN_FTR_SECTION;                                             \
        mfspr   r0,SPRN_PURR;           /* get PURR and */      \
        std     r0,STK_PARM(r6)(r1);    /* save for later */    \
-END_FTR_SECTION_IFCLR(CPU_FTR_PURR);
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);
        
 /*
  * postcall is performed immediately before function return which
@@ -43,7 +43,7 @@ BEGIN_FTR_SECTION;                                            \
        mfspr   r8,SPRN_PURR;           /* PURR after */        \
        ld      r6,STK_PARM(r6)(r1);    /* PURR before */       \
        subf    r6,r6,r8;               /* delta */             \
-END_FTR_SECTION_IFCLR(CPU_FTR_PURR);                           \
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);                           \
        ld      r5,STK_PARM(r5)(r1);    /* timebase before */   \
        subf    r5,r5,r7;               /* time delta */        \
                                                                \
@@ -66,7 +66,7 @@ BEGIN_FTR_SECTION;                                            \
        ld      r7,HCALL_STAT_PURR(r4); /* PURR */              \
        add     r7,r7,r6;                                       \
        std     r7,HCALL_STAT_PURR(r4);                         \
-END_FTR_SECTION_IFCLR(CPU_FTR_PURR);                           \
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);                           \
 1:
 #else
 #define HCALL_INST_PRECALL
@@ -145,6 +145,7 @@ _GLOBAL(plpar_hcall9)
 
        HVSC                            /* invoke the hypervisor */
 
+       mr      r0,r12
        ld      r12,STK_PARM(r4)(r1)
        std     r4,  0(r12)
        std     r5,  8(r12)
@@ -154,7 +155,7 @@ _GLOBAL(plpar_hcall9)
        std     r9, 40(r12)
        std     r10,48(r12)
        std     r11,56(r12)
-       std     r12,64(r12)
+       std     r0, 64(r12)
 
        HCALL_INST_POSTCALL
 
index 80181c4c49ebd7c356506ca45d8625b316c2c509..3ddc04925d50d01f85266379ffe6d96a0a406aa0 100644 (file)
@@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
  */
 static void *hc_start(struct seq_file *m, loff_t *pos)
 {
-       if ((int)*pos < HCALL_STAT_ARRAY_SIZE)
+       if ((int)*pos < (HCALL_STAT_ARRAY_SIZE-1))
                return (void *)(unsigned long)(*pos + 1);
 
        return NULL;
@@ -57,7 +57,7 @@ static int hc_show(struct seq_file *m, void *p)
        struct hcall_stats *hs = (struct hcall_stats *)m->private;
 
        if (hs[h_num].num_calls) {
-               if (!cpu_has_feature(CPU_FTR_PURR))
+               if (cpu_has_feature(CPU_FTR_PURR))
                        seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2,
                                   hs[h_num].num_calls,
                                   hs[h_num].tb_total,
index b5b2b1103de8d58dd2f49b6b766fe4d429913c46..81d172d650389ee206eef128d912028a7bcd4933 100644 (file)
@@ -224,7 +224,6 @@ static void xics_unmask_irq(unsigned int virq)
 static void xics_mask_real_irq(unsigned int irq)
 {
        int call_status;
-       unsigned int server;
 
        if (irq == XICS_IPI)
                return;
@@ -236,9 +235,9 @@ static void xics_mask_real_irq(unsigned int irq)
                return;
        }
 
-       server = get_irq_server(irq);
        /* Have to set XIVE to 0xff to be able to remove a slot */
-       call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, 0xff);
+       call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq,
+                               default_server, 0xff);
        if (call_status != 0) {
                printk(KERN_ERR "xics_disable_irq: irq=%u: ibm_set_xive(0xff)"
                       " returned %d\n", irq, call_status);
index 04d4917eb3035c4140d184e1c740b6fc9eb535cd..2621a7e72d2d1f0ec7839c0ca1ba7b309600a198 100644 (file)
@@ -12,7 +12,6 @@ obj-$(CONFIG_MMIO_NVRAM)      += mmio_nvram.o
 obj-$(CONFIG_FSL_SOC)          += fsl_soc.o
 obj-$(CONFIG_TSI108_BRIDGE)    += tsi108_pci.o tsi108_dev.o
 obj-$(CONFIG_QUICC_ENGINE)     += qe_lib/
-obj-$(CONFIG_MTD)              += rom.o
 
 ifeq ($(CONFIG_PPC_MERGE),y)
 obj-$(CONFIG_PPC_I8259)                += i8259.o
@@ -21,5 +20,6 @@ endif
 
 # Temporary hack until we have migrated to asm-powerpc
 ifeq ($(ARCH),powerpc)
+obj-$(CONFIG_MTD)              += rom.o
 obj-$(CONFIG_CPM2)             += cpm2_common.o cpm2_pic.o
 endif
index 4388b3309e0cbac3d3879e504d18021c32733378..eca507050e47db9e575df233afbda59b0ed45cc7 100644 (file)
@@ -164,11 +164,14 @@ startup_continue:
        srl     %r7,28
        clr     %r6,%r7                 # compare cc with last access code
        be      .Lsame-.LPG1(%r13)
-       b       .Lchkmem-.LPG1(%r13)
+       lhi     %r8,0                   # no program checks
+       b       .Lsavchk-.LPG1(%r13)
 .Lsame:
        ar      %r5,%r1                 # add 128KB to end of chunk
        bno     .Lloop-.LPG1(%r13)      # r1 < 0x80000000 -> loop
 .Lchkmem:                              # > 2GB or tprot got a program check
+       lhi     %r8,1                   # set program check flag
+.Lsavchk:
        clr     %r4,%r5                 # chunk size > 0?
        be      .Lchkloop-.LPG1(%r13)
        st      %r4,0(%r3)              # store start address of chunk
@@ -190,8 +193,15 @@ startup_continue:
        je      .Ldonemem               # if not, leave
        chi     %r10,0                  # do we have chunks left?
        je      .Ldonemem
+       chi     %r8,1                   # program check ?
+       je      .Lpgmchk
+       lr      %r4,%r5                 # potential new chunk
+       alr     %r5,%r1                 # add 128KB to end of chunk
+       j       .Llpcnt
+.Lpgmchk:
        alr     %r5,%r1                 # add 128KB to end of chunk
        lr      %r4,%r5                 # potential new chunk
+.Llpcnt:
        clr     %r5,%r9                 # should we go on?
        jl      .Lloop
 .Ldonemem:
index c526279e11239dc65e5c8c8a0b585d3ca1556a04..6ba3f4512dd1a1ad26a4d4e0993c9b6feaaf6bc6 100644 (file)
@@ -172,12 +172,15 @@ startup_continue:
        srl     %r7,28
        clr     %r6,%r7                 # compare cc with last access code
        je      .Lsame
-       j       .Lchkmem
+       lghi    %r8,0                   # no program checks
+       j       .Lsavchk
 .Lsame:
        algr    %r5,%r1                 # add 128KB to end of chunk
                                        # no need to check here,
        brc     12,.Lloop               # this is the same chunk
 .Lchkmem:                              # > 16EB or tprot got a program check
+       lghi    %r8,1                   # set program check flag
+.Lsavchk:
        clgr    %r4,%r5                 # chunk size > 0?
        je      .Lchkloop
        stg     %r4,0(%r3)              # store start address of chunk
@@ -204,8 +207,15 @@ startup_continue:
        chi     %r10, 0                 # do we have chunks left?
        je      .Ldonemem
 .Lhsaskip:
+       chi     %r8,1                   # program check ?
+       je      .Lpgmchk
+       lgr     %r4,%r5                 # potential new chunk
+       algr    %r5,%r1                 # add 128KB to end of chunk
+       j       .Llpcnt
+.Lpgmchk:
        algr    %r5,%r1                 # add 128KB to end of chunk
        lgr     %r4,%r5                 # potential new chunk
+.Llpcnt:
        clgr    %r5,%r9                 # should we go on?
        jl      .Lloop
 .Ldonemem:
index 49ef206ec8806c6a6dc4ed0b0d9e211979065cf1..5d8ee3baac147e0b153dab2d4ad0152ab22629f9 100644 (file)
@@ -476,7 +476,7 @@ static void __init setup_memory_end(void)
        int i;
 
        memory_size = real_size = 0;
-       max_phys = VMALLOC_END - VMALLOC_MIN_SIZE;
+       max_phys = VMALLOC_END_INIT - VMALLOC_MIN_SIZE;
        memory_end &= PAGE_MASK;
 
        max_mem = memory_end ? min(max_phys, memory_end) : max_phys;
index 19090f7d4f517e6799619bf1b4398d750994285c..c0cd255fddbd04ccce6895bac1cd08b940be8535 100644 (file)
@@ -794,7 +794,10 @@ static int __init topology_init(void)
        int ret;
 
        for_each_possible_cpu(cpu) {
-               ret = register_cpu(&per_cpu(cpu_devices, cpu), cpu);
+               struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+               c->hotpluggable = 1;
+               ret = register_cpu(c, cpu);
                if (ret)
                        printk(KERN_WARNING "topology_init: register_cpu %d "
                               "failed (%d)\n", cpu, ret);
index 633249c3ba9180ea2aa75534b0b64458a6825191..49c3e46b406573011d591bbcbd36e6825deb0578 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include <linux/errno.h>
+#include <linux/hardirq.h>
 #include <linux/mm.h>
 #include <asm/uaccess.h>
 #include <asm/futex.h>
@@ -18,6 +19,8 @@ static inline int __handle_fault(struct mm_struct *mm, unsigned long address,
        struct vm_area_struct *vma;
        int ret = -EFAULT;
 
+       if (in_atomic())
+               return ret;
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
        if (unlikely(!vma))
index bbaca66fa29356af1b0c7ea5589622f28aec4519..56a0214e9928c371c6cce379e599aaf0f61ef8cd 100644 (file)
@@ -258,8 +258,6 @@ int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old)
 {
        int oldval = 0, newval, ret;
 
-       pagefault_disable();
-
        switch (op) {
        case FUTEX_OP_SET:
                __futex_atomic_op("lr %2,%5\n",
@@ -284,7 +282,6 @@ int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old)
        default:
                ret = -ENOSYS;
        }
-       pagefault_enable();
        *old = oldval;
        return ret;
 }
index 829698f6d0490962c387f884f550678b90f8db19..49802f1bee9487484b066915160388d5d683234a 100644 (file)
@@ -69,6 +69,11 @@ static void nvidia_bugs(void)
 
 static void ati_bugs(void)
 {
+       if (timer_over_8254 == 1) {
+               timer_over_8254 = 0;
+               printk(KERN_INFO
+               "ATI board detected. Disabling timer routing over 8254.\n");
+       }
 }
 
 static void intel_bugs(void)
index 2a1dcd5f69c2599e3b3e4b41b355a10f672f79d7..d7bad90a5ad80d6b4d651f23d87d30e2dfec8580 100644 (file)
@@ -55,6 +55,10 @@ int sis_apic_bug; /* not actually supported, dummy for compile */
 
 static int no_timer_check;
 
+static int disable_timer_pin_1 __initdata;
+
+int timer_over_8254 __initdata = 1;
+
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
@@ -350,6 +354,29 @@ static int __init disable_ioapic_setup(char *str)
 }
 early_param("noapic", disable_ioapic_setup);
 
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+       disable_timer_pin_1 = 1;
+       return 1;
+}
+__setup("disable_timer_pin_1", disable_timer_pin_setup);
+
+static int __init setup_disable_8254_timer(char *s)
+{
+       timer_over_8254 = -1;
+       return 1;
+}
+static int __init setup_enable_8254_timer(char *s)
+{
+       timer_over_8254 = 2;
+       return 1;
+}
+
+__setup("disable_8254_timer", setup_disable_8254_timer);
+__setup("enable_8254_timer", setup_enable_8254_timer);
+
+
 /*
  * Find the IRQ entry number of a certain pin.
  */
@@ -1568,33 +1595,10 @@ static inline void unlock_ExtINT_logic(void)
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
+ *
+ * FIXME: really need to revamp this for modern platforms only.
  */
-
-static int try_apic_pin(int apic, int pin, char *msg)
-{
-       apic_printk(APIC_VERBOSE, KERN_INFO
-                   "..TIMER: trying IO-APIC=%d PIN=%d %s",
-                   apic, pin, msg);
-
-       /*
-        * Ok, does IRQ0 through the IOAPIC work?
-        */
-       if (!no_timer_check && timer_irq_works()) {
-               nmi_watchdog_default();
-               if (nmi_watchdog == NMI_IO_APIC) {
-                       disable_8259A_irq(0);
-                       setup_nmi();
-                       enable_8259A_irq(0);
-               }
-               return 1;
-       }
-       clear_IO_APIC_pin(apic, pin);
-       apic_printk(APIC_QUIET, KERN_ERR " .. failed\n");
-       return 0;
-}
-
-/* The function from hell */
-static void check_timer(void)
+static inline void check_timer(void)
 {
        int apic1, pin1, apic2, pin2;
        int vector;
@@ -1615,43 +1619,61 @@ static void check_timer(void)
         */
        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
        init_8259A(1);
+       if (timer_over_8254 > 0)
+               enable_8259A_irq(0);
 
        pin1  = find_isa_irq_pin(0, mp_INT);
        apic1 = find_isa_irq_apic(0, mp_INT);
        pin2  = ioapic_i8259.pin;
        apic2 = ioapic_i8259.apic;
 
-       /* Do this first, otherwise we get double interrupts on ATI boards */
-       if ((pin1 != -1) && try_apic_pin(apic1, pin1,"with 8259 IRQ0 disabled"))
-               return;
+       apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
+               vector, apic1, pin1, apic2, pin2);
 
-       /* Now try again with IRQ0 8259A enabled.
-          Assumes timer is on IO-APIC 0 ?!? */
-       enable_8259A_irq(0);
-       unmask_IO_APIC_irq(0);
-       if (try_apic_pin(apic1, pin1, "with 8259 IRQ0 enabled"))
-               return;
-       disable_8259A_irq(0);
-
-       /* Always try pin0 and pin2 on APIC 0 to handle buggy timer overrides
-          on Nvidia boards */
-       if (!(apic1 == 0 && pin1 == 0) &&
-           try_apic_pin(0, 0, "fallback with 8259 IRQ0 disabled"))
-               return;
-       if (!(apic1 == 0 && pin1 == 2) &&
-           try_apic_pin(0, 2, "fallback with 8259 IRQ0 disabled"))
-               return;
+       if (pin1 != -1) {
+               /*
+                * Ok, does IRQ0 through the IOAPIC work?
+                */
+               unmask_IO_APIC_irq(0);
+               if (!no_timer_check && timer_irq_works()) {
+                       nmi_watchdog_default();
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               disable_8259A_irq(0);
+                               setup_nmi();
+                               enable_8259A_irq(0);
+                       }
+                       if (disable_timer_pin_1 > 0)
+                               clear_IO_APIC_pin(0, pin1);
+                       return;
+               }
+               clear_IO_APIC_pin(apic1, pin1);
+               apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
+                               "connected to IO-APIC\n");
+       }
 
-       /* Then try pure 8259A routing on the 8259 as reported by BIOS*/
-       enable_8259A_irq(0);
+       apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
+                               "through the 8259A ... ");
        if (pin2 != -1) {
+               apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
+                       apic2, pin2);
+               /*
+                * legacy devices should be connected to IO APIC #0
+                */
                setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
-               if (try_apic_pin(apic2,pin2,"8259A broadcast ExtINT from BIOS"))
+               if (timer_irq_works()) {
+                       apic_printk(APIC_VERBOSE," works.\n");
+                       nmi_watchdog_default();
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               setup_nmi();
+                       }
                        return;
+               }
+               /*
+                * Cleanup, just in case ...
+                */
+               clear_IO_APIC_pin(apic2, pin2);
        }
-
-       /* Tried all possibilities to go through the IO-APIC. Now come the
-          really cheesy fallbacks. */
+       apic_printk(APIC_VERBOSE," failed.\n");
 
        if (nmi_watchdog == NMI_IO_APIC) {
                printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
index 88aeccbafaaf9e46ed4678b573e0f28cc6a6f5f2..d9b651ffcdc0c21335f1c15e1e7ef7e3d86361cf 100644 (file)
@@ -321,13 +321,16 @@ static int set_lcd_status(struct backlight_device *bd)
 static unsigned long write_lcd(const char *buffer, unsigned long count)
 {
        int value;
-       int ret = count;
+       int ret;
 
        if (sscanf(buffer, " brightness : %i", &value) == 1 &&
-           value >= 0 && value < HCI_LCD_BRIGHTNESS_LEVELS)
+           value >= 0 && value < HCI_LCD_BRIGHTNESS_LEVELS) {
                ret = set_lcd(value);
-       else
+               if (ret == 0)
+                       ret = count;
+       } else {
                ret = -EINVAL;
+       }
        return ret;
 }
 
index b34e0a958d0f37f3ef799c1659a55f4558eeb298..da21552d2b1c0a1013bb5942d583c4b8038230c0 100644 (file)
@@ -381,7 +381,7 @@ config PATA_OPTI
          If unsure, say N.
 
 config PATA_OPTIDMA
-       tristate "OPTI FireStar PATA support (Veyr Experimental)"
+       tristate "OPTI FireStar PATA support (Very Experimental)"
        depends on PCI && EXPERIMENTAL
        help
          This option enables DMA/PIO support for the later OPTi
index 47082df7199e113ecae36156604f1b7a39c2549c..dfb306057cf465c9e5f48eff8abdb55e633e6f6a 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/libata.h>
 
 #define DRV_NAME       "pata_hpt37x"
-#define DRV_VERSION    "0.5.1"
+#define DRV_VERSION    "0.5.2"
 
 struct hpt_clock {
        u8      xfer_speed;
@@ -416,7 +416,7 @@ static const char *bad_ata100_5[] = {
 
 static unsigned long hpt370_filter(const struct ata_port *ap, struct ata_device *adev, unsigned long mask)
 {
-       if (adev->class != ATA_DEV_ATA) {
+       if (adev->class == ATA_DEV_ATA) {
                if (hpt_dma_blacklisted(adev, "UDMA", bad_ata33))
                        mask &= ~ATA_MASK_UDMA;
                if (hpt_dma_blacklisted(adev, "UDMA100", bad_ata100_5))
@@ -749,7 +749,7 @@ static void hpt37x_bmdma_stop(struct ata_queued_cmd *qc)
 {
        struct ata_port *ap = qc->ap;
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-       int mscreg = 0x50 + 2 * ap->port_no;
+       int mscreg = 0x50 + 4 * ap->port_no;
        u8 bwsr_stat, msc_stat;
 
        pci_read_config_byte(pdev, 0x6A, &bwsr_stat);
index 7c95c762950fde3efa8253e134662f2db9fc739b..62462190e07e877634053cf20e41643c8ea227cf 100644 (file)
@@ -765,47 +765,34 @@ static inline struct bio *pkt_get_list_first(struct bio **list_head, struct bio
  */
 static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
 {
-       char sense[SCSI_SENSE_BUFFERSIZE];
-       request_queue_t *q;
+       request_queue_t *q = bdev_get_queue(pd->bdev);
        struct request *rq;
-       DECLARE_COMPLETION_ONSTACK(wait);
-       int err = 0;
+       int ret = 0;
 
-       q = bdev_get_queue(pd->bdev);
+       rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
+                            WRITE : READ, __GFP_WAIT);
+
+       if (cgc->buflen) {
+               if (blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, __GFP_WAIT))
+                       goto out;
+       }
+
+       rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
+       memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
+       if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
+               memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
 
-       rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? WRITE : READ,
-                            __GFP_WAIT);
-       rq->errors = 0;
-       rq->rq_disk = pd->bdev->bd_disk;
-       rq->bio = NULL;
-       rq->buffer = NULL;
        rq->timeout = 60*HZ;
-       rq->data = cgc->buffer;
-       rq->data_len = cgc->buflen;
-       rq->sense = sense;
-       memset(sense, 0, sizeof(sense));
-       rq->sense_len = 0;
        rq->cmd_type = REQ_TYPE_BLOCK_PC;
        rq->cmd_flags |= REQ_HARDBARRIER;
        if (cgc->quiet)
                rq->cmd_flags |= REQ_QUIET;
-       memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
-       if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
-               memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
-       rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
-
-       rq->ref_count++;
-       rq->end_io_data = &wait;
-       rq->end_io = blk_end_sync_rq;
-       elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
-       generic_unplug_device(q);
-       wait_for_completion(&wait);
-
-       if (rq->errors)
-               err = -EIO;
 
+       blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0);
+       ret = rq->errors;
+out:
        blk_put_request(rq);
-       return err;
+       return ret;
 }
 
 /*
index aeefec97fdee2ca40e706e0c4348f0fca68ca892..6bdf593081d8d23f20dc0a5692c8dfec6d29390a 100644 (file)
@@ -117,10 +117,17 @@ static struct usb_device_id blacklist_ids[] = {
 
        /* IBM/Lenovo ThinkPad with Broadcom chip */
        { USB_DEVICE(0x0a5c, 0x201e), .driver_info = HCI_WRONG_SCO_MTU },
+       { USB_DEVICE(0x0a5c, 0x2110), .driver_info = HCI_WRONG_SCO_MTU },
 
        /* ANYCOM Bluetooth USB-200 and USB-250 */
        { USB_DEVICE(0x0a5c, 0x2111), .driver_info = HCI_RESET },
 
+       /* HP laptop with Broadcom chip */
+       { USB_DEVICE(0x03f0, 0x171d), .driver_info = HCI_WRONG_SCO_MTU },
+
+       /* Dell laptop with Broadcom chip */
+       { USB_DEVICE(0x413c, 0x8126), .driver_info = HCI_WRONG_SCO_MTU },
+
        /* Microsoft Wireless Transceiver for Bluetooth 2.0 */
        { USB_DEVICE(0x045e, 0x009c), .driver_info = HCI_RESET },
 
index 5eabe47b0bc842f4abc60599b561dfa7229fbcdf..433305062fb8aa4f7c070d4c8ae7e968d766be1b 100644 (file)
@@ -606,9 +606,9 @@ static int iiDownloadAll(i2eBordStrPtr, loadHdrStrPtr, int, int);
 // code and returning.
 //
 #define COMPLETE(pB,code) \
-       if(1){ \
+       do { \
                 pB->i2eError = code; \
                 return (code == I2EE_GOOD);\
-       }
+       } while (0)
 
 #endif   // I2ELLIS_H
index 3ece6923134359a9e70c841e8e592b589ea864ab..5c9f67f98d10b43e58b2736d07ca76b8f498faac 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/init.h>
 #include <linux/connector.h>
 #include <asm/atomic.h>
+#include <asm/unaligned.h>
 
 #include <linux/cn_proc.h>
 
@@ -60,7 +61,7 @@ void proc_fork_connector(struct task_struct *task)
        ev = (struct proc_event*)msg->data;
        get_seq(&msg->seq, &ev->cpu);
        ktime_get_ts(&ts); /* get high res monotonic timestamp */
-       ev->timestamp_ns = timespec_to_ns(&ts);
+       put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
        ev->what = PROC_EVENT_FORK;
        ev->event_data.fork.parent_pid = task->real_parent->pid;
        ev->event_data.fork.parent_tgid = task->real_parent->tgid;
@@ -88,7 +89,7 @@ void proc_exec_connector(struct task_struct *task)
        ev = (struct proc_event*)msg->data;
        get_seq(&msg->seq, &ev->cpu);
        ktime_get_ts(&ts); /* get high res monotonic timestamp */
-       ev->timestamp_ns = timespec_to_ns(&ts);
+       put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
        ev->what = PROC_EVENT_EXEC;
        ev->event_data.exec.process_pid = task->pid;
        ev->event_data.exec.process_tgid = task->tgid;
@@ -124,7 +125,7 @@ void proc_id_connector(struct task_struct *task, int which_id)
                return;
        get_seq(&msg->seq, &ev->cpu);
        ktime_get_ts(&ts); /* get high res monotonic timestamp */
-       ev->timestamp_ns = timespec_to_ns(&ts);
+       put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
 
        memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
        msg->ack = 0; /* not used */
@@ -146,7 +147,7 @@ void proc_exit_connector(struct task_struct *task)
        ev = (struct proc_event*)msg->data;
        get_seq(&msg->seq, &ev->cpu);
        ktime_get_ts(&ts); /* get high res monotonic timestamp */
-       ev->timestamp_ns = timespec_to_ns(&ts);
+       put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
        ev->what = PROC_EVENT_EXIT;
        ev->event_data.exit.process_pid = task->pid;
        ev->event_data.exit.process_tgid = task->tgid;
@@ -181,7 +182,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
        ev = (struct proc_event*)msg->data;
        msg->seq = rcvd_seq;
        ktime_get_ts(&ts); /* get high res monotonic timestamp */
-       ev->timestamp_ns = timespec_to_ns(&ts);
+       put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
        ev->cpu = -1;
        ev->what = PROC_EVENT_NONE;
        ev->event_data.ack.err = err;
index e1989f3a268404226155f0b9ef93bcce4c5fc747..9367c4cfe936f533f9500cd10f73a2b293dfd538 100644 (file)
@@ -564,13 +564,4 @@ config I2C_PNX
          This driver can also be built as a module.  If so, the module
          will be called i2c-pnx.
 
-config I2C_PNX_EARLY
-       bool "Early initialization for I2C on PNXxxxx"
-       depends on I2C_PNX=y
-       help
-         Under certain circumstances one may need to make sure I2C on PNXxxxx
-         is initialized earlier than some other driver that depends on it
-         (for instance, that might be USB in case of PNX4008). With this
-         option turned on you can guarantee that.
-
 endmenu
index bbc8e3a7ff556c1eb8306b56169f2e36c28d0f14..490173611d6b615013b2d87a7984af9aec7b04d2 100644 (file)
@@ -529,6 +529,8 @@ mv64xxx_i2c_probe(struct platform_device *pd)
        platform_set_drvdata(pd, drv_data);
        i2c_set_adapdata(&drv_data->adapter, drv_data);
 
+       mv64xxx_i2c_hw_init(drv_data);
+
        if (request_irq(drv_data->irq, mv64xxx_i2c_intr, 0,
                        MV64XXX_I2C_CTLR_NAME, drv_data)) {
                dev_err(&drv_data->adapter.dev,
@@ -542,8 +544,6 @@ mv64xxx_i2c_probe(struct platform_device *pd)
                goto exit_free_irq;
        }
 
-       mv64xxx_i2c_hw_init(drv_data);
-
        return 0;
 
        exit_free_irq:
index de0bca77e92697acb0fb6b10aa9d4c3a63d92e18..17376feb1acc6b95c29196ac538515b9a58ead4d 100644 (file)
@@ -305,8 +305,7 @@ static int i2c_pnx_master_rcv(struct i2c_adapter *adap)
        return 0;
 }
 
-static irqreturn_t
-i2c_pnx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id)
 {
        u32 stat, ctl;
        struct i2c_adapter *adap = dev_id;
@@ -699,10 +698,6 @@ MODULE_AUTHOR("Vitaly Wool, Dennis Kovalev <source@mvista.com>");
 MODULE_DESCRIPTION("I2C driver for Philips IP3204-based I2C busses");
 MODULE_LICENSE("GPL");
 
-#ifdef CONFIG_I2C_PNX_EARLY
 /* We need to make sure I2C is initialized before USB */
 subsys_initcall(i2c_adap_pnx_init);
-#else
-mudule_init(i2c_adap_pnx_init);
-#endif
 module_exit(i2c_adap_pnx_exit);
index 420377c86422b229008e11361e079cca6556346e..3fcb646e2073649ca8d330da786259dd400df071 100644 (file)
@@ -209,6 +209,7 @@ m41t00_set(void *arg)
        buf[m41t00_chip->hour] = (buf[m41t00_chip->hour] & ~0x3f) | (hour& 0x3f);
        buf[m41t00_chip->day] = (buf[m41t00_chip->day] & ~0x3f) | (day & 0x3f);
        buf[m41t00_chip->mon] = (buf[m41t00_chip->mon] & ~0x1f) | (mon & 0x1f);
+       buf[m41t00_chip->year] = year;
 
        if (i2c_master_send(save_client, wbuf, 9) < 0)
                dev_err(&save_client->dev, "m41t00_set: Write error\n");
index 3e31f1d265c9e7d403d57a19ac87d7a4c4c443bd..b05378a3d673fb67de094c3c0e4c33e9c7836d87 100644 (file)
@@ -95,16 +95,32 @@ struct device_driver i2c_adapter_driver = {
        .bus = &i2c_bus_type,
 };
 
+/* ------------------------------------------------------------------------- */
+
+/* I2C bus adapters -- one roots each I2C or SMBUS segment */
+
 static void i2c_adapter_class_dev_release(struct class_device *dev)
 {
        struct i2c_adapter *adap = class_dev_to_i2c_adapter(dev);
        complete(&adap->class_dev_released);
 }
 
+static ssize_t i2c_adapter_show_name(struct class_device *cdev, char *buf)
+{
+       struct i2c_adapter *adap = class_dev_to_i2c_adapter(cdev);
+       return sprintf(buf, "%s\n", adap->name);
+}
+
+static struct class_device_attribute i2c_adapter_attrs[] = {
+       __ATTR(name, S_IRUGO, i2c_adapter_show_name, NULL),
+       { },
+};
+
 struct class i2c_adapter_class = {
-       .owner =        THIS_MODULE,
-       .name =         "i2c-adapter",
-       .release =      &i2c_adapter_class_dev_release,
+       .owner                  = THIS_MODULE,
+       .name                   = "i2c-adapter",
+       .class_dev_attrs        = i2c_adapter_attrs,
+       .release                = &i2c_adapter_class_dev_release,
 };
 
 static ssize_t show_adapter_name(struct device *dev, struct device_attribute *attr, char *buf)
@@ -175,8 +191,12 @@ int i2c_add_adapter(struct i2c_adapter *adap)
         * If the parent pointer is not set up,
         * we add this adapter to the host bus.
         */
-       if (adap->dev.parent == NULL)
+       if (adap->dev.parent == NULL) {
                adap->dev.parent = &platform_bus;
+               printk(KERN_WARNING "**WARNING** I2C adapter driver [%s] "
+                      "forgot to specify physical device; fix it!\n",
+                      adap->name);
+       }
        sprintf(adap->dev.bus_id, "i2c-%d", adap->nr);
        adap->dev.driver = &i2c_adapter_driver;
        adap->dev.release = &i2c_adapter_dev_release;
index ffdffb6379efa944b8972bea3c8cbea7ad6194a2..524e65de4398b73cd9e97324e7f8ecdd1b591376 100644 (file)
@@ -46,6 +46,8 @@ static atiixp_ide_timing mdma_timing[] = {
 
 static int save_mdma_mode[4];
 
+static DEFINE_SPINLOCK(atiixp_lock);
+
 /**
  *     atiixp_ratemask         -       compute rate mask for ATIIXP IDE
  *     @drive: IDE drive to compute for
@@ -105,7 +107,7 @@ static int atiixp_ide_dma_host_on(ide_drive_t *drive)
        unsigned long flags;
        u16 tmp16;
 
-       spin_lock_irqsave(&ide_lock, flags);
+       spin_lock_irqsave(&atiixp_lock, flags);
 
        pci_read_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, &tmp16);
        if (save_mdma_mode[drive->dn])
@@ -114,7 +116,7 @@ static int atiixp_ide_dma_host_on(ide_drive_t *drive)
                tmp16 |= (1 << drive->dn);
        pci_write_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, tmp16);
 
-       spin_unlock_irqrestore(&ide_lock, flags);
+       spin_unlock_irqrestore(&atiixp_lock, flags);
 
        return __ide_dma_host_on(drive);
 }
@@ -125,13 +127,13 @@ static int atiixp_ide_dma_host_off(ide_drive_t *drive)
        unsigned long flags;
        u16 tmp16;
 
-       spin_lock_irqsave(&ide_lock, flags);
+       spin_lock_irqsave(&atiixp_lock, flags);
 
        pci_read_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, &tmp16);
        tmp16 &= ~(1 << drive->dn);
        pci_write_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, tmp16);
 
-       spin_unlock_irqrestore(&ide_lock, flags);
+       spin_unlock_irqrestore(&atiixp_lock, flags);
 
        return __ide_dma_host_off(drive);
 }
@@ -152,7 +154,7 @@ static void atiixp_tuneproc(ide_drive_t *drive, u8 pio)
        u32 pio_timing_data;
        u16 pio_mode_data;
 
-       spin_lock_irqsave(&ide_lock, flags);
+       spin_lock_irqsave(&atiixp_lock, flags);
 
        pci_read_config_word(dev, ATIIXP_IDE_PIO_MODE, &pio_mode_data);
        pio_mode_data &= ~(0x07 << (drive->dn * 4));
@@ -165,7 +167,7 @@ static void atiixp_tuneproc(ide_drive_t *drive, u8 pio)
                 (pio_timing[pio].command_width << (timing_shift + 4));
        pci_write_config_dword(dev, ATIIXP_IDE_PIO_TIMING, pio_timing_data);
 
-       spin_unlock_irqrestore(&ide_lock, flags);
+       spin_unlock_irqrestore(&atiixp_lock, flags);
 }
 
 /**
@@ -189,7 +191,7 @@ static int atiixp_speedproc(ide_drive_t *drive, u8 xferspeed)
 
        speed = ide_rate_filter(atiixp_ratemask(drive), xferspeed);
 
-       spin_lock_irqsave(&ide_lock, flags);
+       spin_lock_irqsave(&atiixp_lock, flags);
 
        save_mdma_mode[drive->dn] = 0;
        if (speed >= XFER_UDMA_0) {
@@ -208,7 +210,7 @@ static int atiixp_speedproc(ide_drive_t *drive, u8 xferspeed)
                }
        }
 
-       spin_unlock_irqrestore(&ide_lock, flags);
+       spin_unlock_irqrestore(&atiixp_lock, flags);
 
        if (speed >= XFER_SW_DMA_0)
                pio = atiixp_dma_2_pio(speed);
index 61f1a9665a7f911729ae17faad0492a5b89309fe..381cc6f101ce073c9307469122230da9870fbab4 100644 (file)
@@ -123,7 +123,7 @@ struct via82cxxx_dev
 static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
 {
        struct pci_dev *dev = hwif->pci_dev;
-       struct via82cxxx_dev *vdev = ide_get_hwifdata(hwif);
+       struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
        u8 t;
 
        if (~vdev->via_config->flags & VIA_BAD_AST) {
@@ -162,7 +162,7 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
 static int via_set_drive(ide_drive_t *drive, u8 speed)
 {
        ide_drive_t *peer = HWIF(drive)->drives + (~drive->dn & 1);
-       struct via82cxxx_dev *vdev = ide_get_hwifdata(drive->hwif);
+       struct via82cxxx_dev *vdev = pci_get_drvdata(drive->hwif->pci_dev);
        struct ide_timing t, p;
        unsigned int T, UT;
 
@@ -225,7 +225,7 @@ static void via82cxxx_tune_drive(ide_drive_t *drive, u8 pio)
 static int via82cxxx_ide_dma_check (ide_drive_t *drive)
 {
        ide_hwif_t *hwif = HWIF(drive);
-       struct via82cxxx_dev *vdev = ide_get_hwifdata(hwif);
+       struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
        u16 w80 = hwif->udma_four;
 
        u16 speed = ide_find_best_mode(drive,
@@ -262,6 +262,53 @@ static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
        return via_config;
 }
 
+/*
+ * Check and handle 80-wire cable presence
+ */
+static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
+{
+       int i;
+
+       switch (vdev->via_config->flags & VIA_UDMA) {
+               case VIA_UDMA_66:
+                       for (i = 24; i >= 0; i -= 8)
+                               if (((u >> (i & 16)) & 8) &&
+                                   ((u >> i) & 0x20) &&
+                                    (((u >> i) & 7) < 2)) {
+                                       /*
+                                        * 2x PCI clock and
+                                        * UDMA w/ < 3T/cycle
+                                        */
+                                       vdev->via_80w |= (1 << (1 - (i >> 4)));
+                               }
+                       break;
+
+               case VIA_UDMA_100:
+                       for (i = 24; i >= 0; i -= 8)
+                               if (((u >> i) & 0x10) ||
+                                   (((u >> i) & 0x20) &&
+                                    (((u >> i) & 7) < 4))) {
+                                       /* BIOS 80-wire bit or
+                                        * UDMA w/ < 60ns/cycle
+                                        */
+                                       vdev->via_80w |= (1 << (1 - (i >> 4)));
+                               }
+                       break;
+
+               case VIA_UDMA_133:
+                       for (i = 24; i >= 0; i -= 8)
+                               if (((u >> i) & 0x10) ||
+                                   (((u >> i) & 0x20) &&
+                                    (((u >> i) & 7) < 6))) {
+                                       /* BIOS 80-wire bit or
+                                        * UDMA w/ < 60ns/cycle
+                                        */
+                                       vdev->via_80w |= (1 << (1 - (i >> 4)));
+                               }
+                       break;
+       }
+}
+
 /**
  *     init_chipset_via82cxxx  -       initialization handler
  *     @dev: PCI device
@@ -274,14 +321,22 @@ static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
 static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const char *name)
 {
        struct pci_dev *isa = NULL;
+       struct via82cxxx_dev *vdev;
        struct via_isa_bridge *via_config;
        u8 t, v;
-       unsigned int u;
+       u32 u;
+
+       vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+       if (!vdev) {
+               printk(KERN_ERR "VP_IDE: out of memory :(\n");
+               return -ENOMEM;
+       }
+       pci_set_drvdata(dev, vdev);
 
        /*
         * Find the ISA bridge to see how good the IDE is.
         */
-       via_config = via_config_find(&isa);
+       vdev->via_config = via_config = via_config_find(&isa);
 
        /* We checked this earlier so if it fails here deeep badness
           is involved */
@@ -289,16 +344,17 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
        BUG_ON(!via_config->id);
 
        /*
-        * Setup or disable Clk66 if appropriate
+        * Detect cable and configure Clk66
         */
+       pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
+
+       via_cable_detect(vdev, u);
 
        if ((via_config->flags & VIA_UDMA) == VIA_UDMA_66) {
                /* Enable Clk66 */
-               pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
                pci_write_config_dword(dev, VIA_UDMA_TIMING, u|0x80008);
        } else if (via_config->flags & VIA_BAD_CLK66) {
                /* Would cause trouble on 596a and 686 */
-               pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
                pci_write_config_dword(dev, VIA_UDMA_TIMING, u & ~0x80008);
        }
 
@@ -367,75 +423,11 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
        return 0;
 }
 
-/*
- * Check and handle 80-wire cable presence
- */
-static void __devinit via_cable_detect(struct pci_dev *dev, struct via82cxxx_dev *vdev)
-{
-       unsigned int u;
-       int i;
-       pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
-
-       switch (vdev->via_config->flags & VIA_UDMA) {
-
-               case VIA_UDMA_66:
-                       for (i = 24; i >= 0; i -= 8)
-                               if (((u >> (i & 16)) & 8) &&
-                                   ((u >> i) & 0x20) &&
-                                    (((u >> i) & 7) < 2)) {
-                                       /*
-                                        * 2x PCI clock and
-                                        * UDMA w/ < 3T/cycle
-                                        */
-                                       vdev->via_80w |= (1 << (1 - (i >> 4)));
-                               }
-                       break;
-
-               case VIA_UDMA_100:
-                       for (i = 24; i >= 0; i -= 8)
-                               if (((u >> i) & 0x10) ||
-                                   (((u >> i) & 0x20) &&
-                                    (((u >> i) & 7) < 4))) {
-                                       /* BIOS 80-wire bit or
-                                        * UDMA w/ < 60ns/cycle
-                                        */
-                                       vdev->via_80w |= (1 << (1 - (i >> 4)));
-                               }
-                       break;
-
-               case VIA_UDMA_133:
-                       for (i = 24; i >= 0; i -= 8)
-                               if (((u >> i) & 0x10) ||
-                                   (((u >> i) & 0x20) &&
-                                    (((u >> i) & 7) < 6))) {
-                                       /* BIOS 80-wire bit or
-                                        * UDMA w/ < 60ns/cycle
-                                        */
-                                       vdev->via_80w |= (1 << (1 - (i >> 4)));
-                               }
-                       break;
-
-       }
-}
-
 static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif)
 {
-       struct via82cxxx_dev *vdev = kmalloc(sizeof(struct via82cxxx_dev),
-               GFP_KERNEL);
-       struct pci_dev *isa = NULL;
+       struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
        int i;
 
-       if (vdev == NULL) {
-               printk(KERN_ERR "VP_IDE: out of memory :(\n");
-               return;
-       }
-
-       memset(vdev, 0, sizeof(struct via82cxxx_dev));
-       ide_set_hwifdata(hwif, vdev);
-
-       vdev->via_config = via_config_find(&isa);
-       via_cable_detect(hwif->pci_dev, vdev);
-
        hwif->autodma = 0;
 
        hwif->tuneproc = &via82cxxx_tune_drive;
index 533193d4e5dffa177f7747f69d33456c2cb7e764..9e0ab048c878eba5b07d57501762f83c3f1f2356 100644 (file)
@@ -1088,10 +1088,21 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
                *sin = iw_event->local_addr;
                sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
                *sin = iw_event->remote_addr;
-               if (iw_event->status)
-                       event.event = RDMA_CM_EVENT_REJECTED;
-               else
+               switch (iw_event->status) {
+               case 0:
                        event.event = RDMA_CM_EVENT_ESTABLISHED;
+                       break;
+               case -ECONNRESET:
+               case -ECONNREFUSED:
+                       event.event = RDMA_CM_EVENT_REJECTED;
+                       break;
+               case -ETIMEDOUT:
+                       event.event = RDMA_CM_EVENT_UNREACHABLE;
+                       break;
+               default:
+                       event.event = RDMA_CM_EVENT_CONNECT_ERROR;
+                       break;
+               }
                break;
        case IW_CM_EVENT_ESTABLISHED:
                event.event = RDMA_CM_EVENT_ESTABLISHED;
index 81a5cdc5733aa4f5372d2fbaa173489f132cd1c9..e2e8d329b44389cd04230c55853db3f55f75a07b 100644 (file)
@@ -209,10 +209,21 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
        if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
                if (!ctx->backlog) {
                        ret = -EDQUOT;
+                       kfree(uevent);
                        goto out;
                }
                ctx->backlog--;
+       } else if (!ctx->uid) {
+               /*
+                * We ignore events for new connections until userspace has set
+                * their context.  This can only happen if an error occurs on a
+                * new connection before the user accepts it.  This is okay,
+                * since the accept will just fail later.
+                */
+               kfree(uevent);
+               goto out;
        }
+
        list_add_tail(&uevent->list, &ctx->file->event_list);
        wake_up_interruptible(&ctx->file->poll_wait);
 out:
index e1b618c5f685079cd1c9e8245225ee3993372902..b7be950ab47c3a700737feae370d8bd09a2d2b69 100644 (file)
@@ -50,7 +50,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
                                              ib_device);
        struct hipz_query_hca *rblock;
 
-       rblock = ehca_alloc_fw_ctrlblock();
+       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!rblock) {
                ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
                return -ENOMEM;
@@ -110,7 +110,7 @@ int ehca_query_port(struct ib_device *ibdev,
                                              ib_device);
        struct hipz_query_port *rblock;
 
-       rblock = ehca_alloc_fw_ctrlblock();
+       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!rblock) {
                ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
                return -ENOMEM;
@@ -179,7 +179,7 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
                return -EINVAL;
        }
 
-       rblock = ehca_alloc_fw_ctrlblock();
+       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!rblock) {
                ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
                return -ENOMEM;
@@ -212,7 +212,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port,
                return -EINVAL;
        }
 
-       rblock = ehca_alloc_fw_ctrlblock();
+       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!rblock) {
                ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
                return -ENOMEM;
index c3ea746e9045669852266ecb17baa2b7208c8c1c..e7209afb4250c1bc7261513b897e606de53e4a12 100644 (file)
@@ -138,7 +138,7 @@ int ehca_error_data(struct ehca_shca *shca, void *data,
        u64 *rblock;
        unsigned long block_count;
 
-       rblock = ehca_alloc_fw_ctrlblock();
+       rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
        if (!rblock) {
                ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
                ret = -ENOMEM;
index 3720e3032cceed84e22a514280e36cbbcc030e62..cd7789f0d08ecbb99a1a9da168b58d4d78182fab 100644 (file)
@@ -180,10 +180,10 @@ int ehca_mmap_register(u64 physical,void **mapped,
 int ehca_munmap(unsigned long addr, size_t len);
 
 #ifdef CONFIG_PPC_64K_PAGES
-void *ehca_alloc_fw_ctrlblock(void);
+void *ehca_alloc_fw_ctrlblock(gfp_t flags);
 void ehca_free_fw_ctrlblock(void *ptr);
 #else
-#define ehca_alloc_fw_ctrlblock() ((void *) get_zeroed_page(GFP_KERNEL))
+#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags))
 #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
 #endif
 
index cc47e4c13a180897fa3aee03a589d197b48367db..6574fbbaead5e8faf0f65d7e98ca3c75c4bede05 100644 (file)
@@ -106,9 +106,9 @@ static struct timer_list poll_eqs_timer;
 #ifdef CONFIG_PPC_64K_PAGES
 static struct kmem_cache *ctblk_cache = NULL;
 
-void *ehca_alloc_fw_ctrlblock(void)
+void *ehca_alloc_fw_ctrlblock(gfp_t flags)
 {
-       void *ret = kmem_cache_zalloc(ctblk_cache, GFP_KERNEL);
+       void *ret = kmem_cache_zalloc(ctblk_cache, flags);
        if (!ret)
                ehca_gen_err("Out of memory for ctblk");
        return ret;
@@ -206,7 +206,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
        u64 h_ret;
        struct hipz_query_hca *rblock;
 
-       rblock = ehca_alloc_fw_ctrlblock();
+       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!rblock) {
                ehca_gen_err("Cannot allocate rblock memory.");
                return -ENOMEM;
@@ -258,7 +258,7 @@ static int init_node_guid(struct ehca_shca *shca)
        int ret = 0;
        struct hipz_query_hca *rblock;
 
-       rblock = ehca_alloc_fw_ctrlblock();
+       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!rblock) {
                ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
                return -ENOMEM;
@@ -469,7 +469,7 @@ static ssize_t  ehca_show_##name(struct device *dev,                       \
                                                                           \
        shca = dev->driver_data;                                           \
                                                                           \
-       rblock = ehca_alloc_fw_ctrlblock();                                \
+       rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);                      \
        if (!rblock) {                                                     \
                dev_err(dev, "Can't allocate rblock memory.");             \
                return 0;                                                  \
index 0a5e2214cc5f72b30ed36d0e0e95384fea4d6b0f..cfb362a1029caa979394fed9bc8fc54be60812ca 100644 (file)
@@ -1013,7 +1013,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
        u32 i;
        u64 *kpage;
 
-       kpage = ehca_alloc_fw_ctrlblock();
+       kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!kpage) {
                ehca_err(&shca->ib_device, "kpage alloc failed");
                ret = -ENOMEM;
@@ -1124,7 +1124,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
        ehca_mrmw_map_acl(acl, &hipz_acl);
        ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
 
-       kpage = ehca_alloc_fw_ctrlblock();
+       kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!kpage) {
                ehca_err(&shca->ib_device, "kpage alloc failed");
                ret = -ENOMEM;
index c6c9cef203e3361fb2868222cb4a405487b84087..34b85556d01e0d55b8524f62ac1f65aef2be63e5 100644 (file)
@@ -807,7 +807,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
        unsigned long spl_flags = 0;
 
        /* do query_qp to obtain current attr values */
-       mqpcb = ehca_alloc_fw_ctrlblock();
+       mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!mqpcb) {
                ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
                         "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
@@ -1273,7 +1273,7 @@ int ehca_query_qp(struct ib_qp *qp,
                return -EINVAL;
        }
 
-       qpcb = ehca_alloc_fw_ctrlblock();
+       qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
        if (!qpcb) {
                ehca_err(qp->device,"Out of memory for qpcb "
                         "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
index 283d50b76c3ddceb274d8445381d5ba569b28884..1159c8a0f2c5211d3b9b6f9f2a2b59ea8de6872b 100644 (file)
@@ -54,6 +54,10 @@ enum {
        MTHCA_CQ_ENTRY_SIZE = 0x20
 };
 
+enum {
+       MTHCA_ATOMIC_BYTE_LEN = 8
+};
+
 /*
  * Must be packed because start is 64 bits but only aligned to 32 bits.
  */
@@ -599,11 +603,11 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
                        break;
                case MTHCA_OPCODE_ATOMIC_CS:
                        entry->opcode    = IB_WC_COMP_SWAP;
-                       entry->byte_len  = be32_to_cpu(cqe->byte_cnt);
+                       entry->byte_len  = MTHCA_ATOMIC_BYTE_LEN;
                        break;
                case MTHCA_OPCODE_ATOMIC_FA:
                        entry->opcode    = IB_WC_FETCH_ADD;
-                       entry->byte_len  = be32_to_cpu(cqe->byte_cnt);
+                       entry->byte_len  = MTHCA_ATOMIC_BYTE_LEN;
                        break;
                case MTHCA_OPCODE_BIND_MW:
                        entry->opcode    = IB_WC_BIND_MW;
index 15cc2f6eb4754fa83a497fec55f381371c466000..6b19645d946c31af877c28d5c8d8c9ddfa4ec94f 100644 (file)
@@ -232,7 +232,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj)
 
        list_for_each_entry(chunk, &icm->chunk_list, list) {
                for (i = 0; i < chunk->npages; ++i) {
-                       if (chunk->mem[i].length >= offset) {
+                       if (chunk->mem[i].length > offset) {
                                page = chunk->mem[i].page;
                                goto out;
                        }
index d844a2569b471f1dcfc5b63ddacbc31b45db22df..5f5214c0337d18383ad6f2272b99fbd47f68a69b 100644 (file)
@@ -429,13 +429,18 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
 {
        struct mthca_dev *dev = to_mdev(ibqp->device);
        struct mthca_qp *qp = to_mqp(ibqp);
-       int err;
-       struct mthca_mailbox *mailbox;
+       int err = 0;
+       struct mthca_mailbox *mailbox = NULL;
        struct mthca_qp_param *qp_param;
        struct mthca_qp_context *context;
        int mthca_state;
        u8 status;
 
+       if (qp->state == IB_QPS_RESET) {
+               qp_attr->qp_state = IB_QPS_RESET;
+               goto done;
+       }
+
        mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
        if (IS_ERR(mailbox))
                return PTR_ERR(mailbox);
@@ -454,7 +459,6 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
        mthca_state = be32_to_cpu(context->flags) >> 28;
 
        qp_attr->qp_state            = to_ib_qp_state(mthca_state);
-       qp_attr->cur_qp_state        = qp_attr->qp_state;
        qp_attr->path_mtu            = context->mtu_msgmax >> 5;
        qp_attr->path_mig_state      =
                to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -464,11 +468,6 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
        qp_attr->dest_qp_num         = be32_to_cpu(context->remote_qpn) & 0xffffff;
        qp_attr->qp_access_flags     =
                to_ib_qp_access_flags(be32_to_cpu(context->params2));
-       qp_attr->cap.max_send_wr     = qp->sq.max;
-       qp_attr->cap.max_recv_wr     = qp->rq.max;
-       qp_attr->cap.max_send_sge    = qp->sq.max_gs;
-       qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
-       qp_attr->cap.max_inline_data = qp->max_inline_data;
 
        if (qp->transport == RC || qp->transport == UC) {
                to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
@@ -495,7 +494,16 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
        qp_attr->retry_cnt          = (be32_to_cpu(context->params1) >> 16) & 0x7;
        qp_attr->rnr_retry          = context->pri_path.rnr_retry >> 5;
        qp_attr->alt_timeout        = context->alt_path.ackto >> 3;
-       qp_init_attr->cap           = qp_attr->cap;
+
+done:
+       qp_attr->cur_qp_state        = qp_attr->qp_state;
+       qp_attr->cap.max_send_wr     = qp->sq.max;
+       qp_attr->cap.max_recv_wr     = qp->rq.max;
+       qp_attr->cap.max_send_sge    = qp->sq.max_gs;
+       qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
+       qp_attr->cap.max_inline_data = qp->max_inline_data;
+
+       qp_init_attr->cap            = qp_attr->cap;
 
 out:
        mthca_free_mailbox(dev, mailbox);
index 9b2041e25d593978c183cedd04999368dfeb140b..dd221eda3ea63c9a648146d38d5a9269172f9485 100644 (file)
@@ -177,7 +177,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn,
         * - if yes, the mtask is recycled at iscsi_complete_pdu
         * - if no,  the mtask is recycled at iser_snd_completion
         */
-       if (error && error != -EAGAIN)
+       if (error && error != -ENOBUFS)
                iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
 
        return error;
@@ -241,7 +241,7 @@ iscsi_iser_ctask_xmit(struct iscsi_conn *conn,
                error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask);
 
  iscsi_iser_ctask_xmit_exit:
-       if (error && error != -EAGAIN)
+       if (error && error != -ENOBUFS)
                iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
        return error;
 }
index e73c87b9be43cad3b1c0985c30040da433012912..0a7d1ab60e6d81832e70e037255dd0cbb27efa1b 100644 (file)
@@ -304,18 +304,14 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
 static int
 iser_check_xmit(struct iscsi_conn *conn, void *task)
 {
-       int rc = 0;
        struct iscsi_iser_conn *iser_conn = conn->dd_data;
 
-       write_lock_bh(conn->recv_lock);
        if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
            ISER_QP_MAX_REQ_DTOS) {
-               iser_dbg("%ld can't xmit task %p, suspending tx\n",jiffies,task);
-               set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-               rc = -EAGAIN;
+               iser_dbg("%ld can't xmit task %p\n",jiffies,task);
+               return -ENOBUFS;
        }
-       write_unlock_bh(conn->recv_lock);
-       return rc;
+       return 0;
 }
 
 
@@ -340,7 +336,7 @@ int iser_send_command(struct iscsi_conn     *conn,
                return -EPERM;
        }
        if (iser_check_xmit(conn, ctask))
-               return -EAGAIN;
+               return -ENOBUFS;
 
        edtl = ntohl(hdr->data_length);
 
@@ -426,7 +422,7 @@ int iser_send_data_out(struct iscsi_conn     *conn,
        }
 
        if (iser_check_xmit(conn, ctask))
-               return -EAGAIN;
+               return -ENOBUFS;
 
        itt = ntohl(hdr->itt);
        data_seg_len = ntoh24(hdr->dlength);
@@ -498,7 +494,7 @@ int iser_send_control(struct iscsi_conn *conn,
        }
 
        if (iser_check_xmit(conn,mtask))
-               return -EAGAIN;
+               return -ENOBUFS;
 
        /* build the tx desc regd header and add it to the tx desc dto */
        mdesc->type = ISCSI_TX_CONTROL;
@@ -605,6 +601,7 @@ void iser_snd_completion(struct iser_desc *tx_desc)
        struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn;
        struct iscsi_conn      *conn = iser_conn->iscsi_conn;
        struct iscsi_mgmt_task *mtask;
+       int resume_tx = 0;
 
        iser_dbg("Initiator, Data sent dto=0x%p\n", dto);
 
@@ -613,15 +610,16 @@ void iser_snd_completion(struct iser_desc *tx_desc)
        if (tx_desc->type == ISCSI_TX_DATAOUT)
                kmem_cache_free(ig.desc_cache, tx_desc);
 
+       if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
+           ISER_QP_MAX_REQ_DTOS)
+               resume_tx = 1;
+
        atomic_dec(&ib_conn->post_send_buf_count);
 
-       write_lock(conn->recv_lock);
-       if (conn->suspend_tx) {
+       if (resume_tx) {
                iser_dbg("%ld resuming tx\n",jiffies);
-               clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
                scsi_queue_work(conn->session->host, &conn->xmitwork);
        }
-       write_unlock(conn->recv_lock);
 
        if (tx_desc->type == ISCSI_TX_CONTROL) {
                /* this arithmetic is legal by libiscsi dd_data allocation */
index 100df6f38d920759291e5b442f6f0995061e4f60..91e0c75aca8f40b1402eb29c8c94140979fdcca5 100644 (file)
@@ -52,6 +52,8 @@
 #define KVM_MAX_VCPUS 1
 #define KVM_MEMORY_SLOTS 4
 #define KVM_NUM_MMU_PAGES 256
+#define KVM_MIN_FREE_MMU_PAGES 5
+#define KVM_REFILL_PAGES 25
 
 #define FX_IMAGE_SIZE 512
 #define FX_IMAGE_ALIGN 16
@@ -89,14 +91,54 @@ typedef unsigned long  hva_t;
 typedef u64            hpa_t;
 typedef unsigned long  hfn_t;
 
+#define NR_PTE_CHAIN_ENTRIES 5
+
+struct kvm_pte_chain {
+       u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
+       struct hlist_node link;
+};
+
+/*
+ * kvm_mmu_page_role, below, is defined as:
+ *
+ *   bits 0:3 - total guest paging levels (2-4, or zero for real mode)
+ *   bits 4:7 - page table level for this shadow (1-4)
+ *   bits 8:9 - page table quadrant for 2-level guests
+ *   bit   16 - "metaphysical" - gfn is not a real page (huge page/real mode)
+ */
+union kvm_mmu_page_role {
+       unsigned word;
+       struct {
+               unsigned glevels : 4;
+               unsigned level : 4;
+               unsigned quadrant : 2;
+               unsigned pad_for_nice_hex_output : 6;
+               unsigned metaphysical : 1;
+       };
+};
+
 struct kvm_mmu_page {
        struct list_head link;
+       struct hlist_node hash_link;
+
+       /*
+        * The following two entries are used to key the shadow page in the
+        * hash table.
+        */
+       gfn_t gfn;
+       union kvm_mmu_page_role role;
+
        hpa_t page_hpa;
        unsigned long slot_bitmap; /* One bit set per slot which has memory
                                    * in this shadow page.
                                    */
        int global;              /* Set if all ptes in this page are global */
-       u64 *parent_pte;
+       int multimapped;         /* More than one parent_pte? */
+       int root_count;          /* Currently serving as active root */
+       union {
+               u64 *parent_pte;               /* !multimapped */
+               struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
+       };
 };
 
 struct vmcs {
@@ -117,14 +159,26 @@ struct kvm_vcpu;
 struct kvm_mmu {
        void (*new_cr3)(struct kvm_vcpu *vcpu);
        int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
-       void (*inval_page)(struct kvm_vcpu *vcpu, gva_t gva);
        void (*free)(struct kvm_vcpu *vcpu);
        gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
        hpa_t root_hpa;
        int root_level;
        int shadow_root_level;
+
+       u64 *pae_root;
+};
+
+#define KVM_NR_MEM_OBJS 20
+
+struct kvm_mmu_memory_cache {
+       int nobjs;
+       void *objects[KVM_NR_MEM_OBJS];
 };
 
+/*
+ * We don't want allocation failures within the mmu code, so we preallocate
+ * enough memory for a single page fault in a cache.
+ */
 struct kvm_guest_debug {
        int enabled;
        unsigned long bp[4];
@@ -173,6 +227,7 @@ struct kvm_vcpu {
        struct mutex mutex;
        int   cpu;
        int   launched;
+       int interrupt_window_open;
        unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
 #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
        unsigned long irq_pending[NR_IRQ_WORDS];
@@ -184,6 +239,7 @@ struct kvm_vcpu {
        unsigned long cr3;
        unsigned long cr4;
        unsigned long cr8;
+       u64 pdptrs[4]; /* pae */
        u64 shadow_efer;
        u64 apic_base;
        int nmsrs;
@@ -194,6 +250,12 @@ struct kvm_vcpu {
        struct kvm_mmu_page page_header_buf[KVM_NUM_MMU_PAGES];
        struct kvm_mmu mmu;
 
+       struct kvm_mmu_memory_cache mmu_pte_chain_cache;
+       struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
+
+       gfn_t last_pt_write_gfn;
+       int   last_pt_write_count;
+
        struct kvm_guest_debug guest_debug;
 
        char fx_buf[FX_BUF_SIZE];
@@ -231,10 +293,16 @@ struct kvm {
        spinlock_t lock; /* protects everything except vcpus */
        int nmemslots;
        struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
+       /*
+        * Hash table of struct kvm_mmu_page.
+        */
        struct list_head active_mmu_pages;
+       int n_free_mmu_pages;
+       struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
        struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
        int memory_config_version;
        int busy;
+       unsigned long rmap_overflow;
 };
 
 struct kvm_stat {
@@ -247,6 +315,9 @@ struct kvm_stat {
        u32 io_exits;
        u32 mmio_exits;
        u32 signal_exits;
+       u32 irq_window_exits;
+       u32 halt_exits;
+       u32 request_irq_exits;
        u32 irq_exits;
 };
 
@@ -279,6 +350,7 @@ struct kvm_arch_ops {
        void (*set_segment)(struct kvm_vcpu *vcpu,
                            struct kvm_segment *var, int seg);
        void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
+       void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu);
        void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
        void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu,
                                      unsigned long cr0);
@@ -323,7 +395,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
 int kvm_mmu_setup(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
-void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
+void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot);
 
 hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
@@ -396,6 +468,19 @@ int kvm_write_guest(struct kvm_vcpu *vcpu,
 
 unsigned long segment_base(u16 selector);
 
+void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
+void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
+int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
+
+static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
+                                    u32 error_code)
+{
+       if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
+               kvm_mmu_free_some_pages(vcpu);
+       return vcpu->mmu.page_fault(vcpu, gva, error_code);
+}
+
 static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
        struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
@@ -541,19 +626,4 @@ static inline u32 get_rdx_init_val(void)
 #define TSS_REDIRECTION_SIZE (256 / 8)
 #define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
 
-#ifdef CONFIG_X86_64
-
-/*
- * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.  Therefore
- * we need to allocate shadow page tables in the first 4GB of memory, which
- * happens to fit the DMA32 zone.
- */
-#define GFP_KVM_MMU (GFP_KERNEL | __GFP_DMA32)
-
-#else
-
-#define GFP_KVM_MMU GFP_KERNEL
-
-#endif
-
 #endif
index ce7fe640f18dfc29b909af99bc80ac6aac1a3290..67c1154960f0a308f72e41c7a5e2f54f2afc2894 100644 (file)
@@ -58,6 +58,9 @@ static struct kvm_stats_debugfs_item {
        { "io_exits", &kvm_stat.io_exits },
        { "mmio_exits", &kvm_stat.mmio_exits },
        { "signal_exits", &kvm_stat.signal_exits },
+       { "irq_window", &kvm_stat.irq_window_exits },
+       { "halt_exits", &kvm_stat.halt_exits },
+       { "request_irq", &kvm_stat.request_irq_exits },
        { "irq_exits", &kvm_stat.irq_exits },
        { 0, 0 }
 };
@@ -227,6 +230,7 @@ static int kvm_dev_open(struct inode *inode, struct file *filp)
                struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 
                mutex_init(&vcpu->mutex);
+               vcpu->kvm = kvm;
                vcpu->mmu.root_hpa = INVALID_PAGE;
                INIT_LIST_HEAD(&vcpu->free_pages);
        }
@@ -268,8 +272,8 @@ static void kvm_free_physmem(struct kvm *kvm)
 
 static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
 {
-       kvm_arch_ops->vcpu_free(vcpu);
        kvm_mmu_destroy(vcpu);
+       kvm_arch_ops->vcpu_free(vcpu);
 }
 
 static void kvm_free_vcpus(struct kvm *kvm)
@@ -295,14 +299,17 @@ static void inject_gp(struct kvm_vcpu *vcpu)
        kvm_arch_ops->inject_gp(vcpu, 0);
 }
 
-static int pdptrs_have_reserved_bits_set(struct kvm_vcpu *vcpu,
-                                        unsigned long cr3)
+/*
+ * Load the pae pdptrs.  Return true is they are all valid.
+ */
+static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
        gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
-       unsigned offset = (cr3 & (PAGE_SIZE-1)) >> 5;
+       unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
        int i;
        u64 pdpte;
        u64 *pdpt;
+       int ret;
        struct kvm_memory_slot *memslot;
 
        spin_lock(&vcpu->kvm->lock);
@@ -310,16 +317,23 @@ static int pdptrs_have_reserved_bits_set(struct kvm_vcpu *vcpu,
        /* FIXME: !memslot - emulate? 0xff? */
        pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0);
 
+       ret = 1;
        for (i = 0; i < 4; ++i) {
                pdpte = pdpt[offset + i];
-               if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull))
-                       break;
+               if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) {
+                       ret = 0;
+                       goto out;
+               }
        }
 
+       for (i = 0; i < 4; ++i)
+               vcpu->pdptrs[i] = pdpt[offset + i];
+
+out:
        kunmap_atomic(pdpt, KM_USER0);
        spin_unlock(&vcpu->kvm->lock);
 
-       return i != 4;
+       return ret;
 }
 
 void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
@@ -365,8 +379,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
                        }
                } else
 #endif
-               if (is_pae(vcpu) &&
-                           pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) {
+               if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
                        printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
                               "reserved bits\n");
                        inject_gp(vcpu);
@@ -387,6 +400,7 @@ EXPORT_SYMBOL_GPL(set_cr0);
 
 void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
+       kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
        set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
 }
 EXPORT_SYMBOL_GPL(lmsw);
@@ -407,7 +421,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                        return;
                }
        } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK)
-                  && pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) {
+                  && !load_pdptrs(vcpu, vcpu->cr3)) {
                printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
                inject_gp(vcpu);
        }
@@ -439,7 +453,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
                        return;
                }
                if (is_paging(vcpu) && is_pae(vcpu) &&
-                   pdptrs_have_reserved_bits_set(vcpu, cr3)) {
+                   !load_pdptrs(vcpu, cr3)) {
                        printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
                               "reserved bits\n");
                        inject_gp(vcpu);
@@ -449,7 +463,19 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
        vcpu->cr3 = cr3;
        spin_lock(&vcpu->kvm->lock);
-       vcpu->mmu.new_cr3(vcpu);
+       /*
+        * Does the new cr3 value map to physical memory? (Note, we
+        * catch an invalid cr3 even in real-mode, because it would
+        * cause trouble later on when we turn on paging anyway.)
+        *
+        * A real CPU would silently accept an invalid cr3 and would
+        * attempt to use it - with largely undefined (and often hard
+        * to debug) behavior on the guest side.
+        */
+       if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
+               inject_gp(vcpu);
+       else
+               vcpu->mmu.new_cr3(vcpu);
        spin_unlock(&vcpu->kvm->lock);
 }
 EXPORT_SYMBOL_GPL(set_cr3);
@@ -517,7 +543,6 @@ static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n)
        vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
 
        vcpu->cpu = -1;  /* First load will set up TR */
-       vcpu->kvm = kvm;
        r = kvm_arch_ops->vcpu_create(vcpu);
        if (r < 0)
                goto out_free_vcpus;
@@ -634,6 +659,7 @@ raced:
                                                     | __GFP_ZERO);
                        if (!new.phys_mem[i])
                                goto out_free;
+                       new.phys_mem[i]->private = 0;
                }
        }
 
@@ -688,6 +714,13 @@ out:
        return r;
 }
 
+static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot)
+{
+       spin_lock(&vcpu->kvm->lock);
+       kvm_mmu_slot_remove_write_access(vcpu, slot);
+       spin_unlock(&vcpu->kvm->lock);
+}
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
@@ -697,6 +730,7 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm,
        struct kvm_memory_slot *memslot;
        int r, i;
        int n;
+       int cleared;
        unsigned long any = 0;
 
        spin_lock(&kvm->lock);
@@ -727,15 +761,17 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm,
 
 
        if (any) {
-               spin_lock(&kvm->lock);
-               kvm_mmu_slot_remove_write_access(kvm, log->slot);
-               spin_unlock(&kvm->lock);
-               memset(memslot->dirty_bitmap, 0, n);
+               cleared = 0;
                for (i = 0; i < KVM_MAX_VCPUS; ++i) {
                        struct kvm_vcpu *vcpu = vcpu_load(kvm, i);
 
                        if (!vcpu)
                                continue;
+                       if (!cleared) {
+                               do_remove_write_access(vcpu, log->slot);
+                               memset(memslot->dirty_bitmap, 0, n);
+                               cleared = 1;
+                       }
                        kvm_arch_ops->tlb_flush(vcpu);
                        vcpu_put(vcpu);
                }
@@ -863,6 +899,27 @@ static int emulator_read_emulated(unsigned long addr,
        }
 }
 
+static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+                              unsigned long val, int bytes)
+{
+       struct kvm_memory_slot *m;
+       struct page *page;
+       void *virt;
+
+       if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
+               return 0;
+       m = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT);
+       if (!m)
+               return 0;
+       page = gfn_to_page(m, gpa >> PAGE_SHIFT);
+       kvm_mmu_pre_write(vcpu, gpa, bytes);
+       virt = kmap_atomic(page, KM_USER0);
+       memcpy(virt + offset_in_page(gpa), &val, bytes);
+       kunmap_atomic(virt, KM_USER0);
+       kvm_mmu_post_write(vcpu, gpa, bytes);
+       return 1;
+}
+
 static int emulator_write_emulated(unsigned long addr,
                                   unsigned long val,
                                   unsigned int bytes,
@@ -874,6 +931,9 @@ static int emulator_write_emulated(unsigned long addr,
        if (gpa == UNMAPPED_GVA)
                return X86EMUL_PROPAGATE_FAULT;
 
+       if (emulator_write_phys(vcpu, gpa, val, bytes))
+               return X86EMUL_CONTINUE;
+
        vcpu->mmio_needed = 1;
        vcpu->mmio_phys_addr = gpa;
        vcpu->mmio_size = bytes;
@@ -898,6 +958,30 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
        return emulator_write_emulated(addr, new, bytes, ctxt);
 }
 
+#ifdef CONFIG_X86_32
+
+static int emulator_cmpxchg8b_emulated(unsigned long addr,
+                                      unsigned long old_lo,
+                                      unsigned long old_hi,
+                                      unsigned long new_lo,
+                                      unsigned long new_hi,
+                                      struct x86_emulate_ctxt *ctxt)
+{
+       static int reported;
+       int r;
+
+       if (!reported) {
+               reported = 1;
+               printk(KERN_WARNING "kvm: emulating exchange8b as write\n");
+       }
+       r = emulator_write_emulated(addr, new_lo, 4, ctxt);
+       if (r != X86EMUL_CONTINUE)
+               return r;
+       return emulator_write_emulated(addr+4, new_hi, 4, ctxt);
+}
+
+#endif
+
 static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
 {
        return kvm_arch_ops->get_segment_base(vcpu, seg);
@@ -905,18 +989,15 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
 
 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 {
-       spin_lock(&vcpu->kvm->lock);
-       vcpu->mmu.inval_page(vcpu, address);
-       spin_unlock(&vcpu->kvm->lock);
-       kvm_arch_ops->invlpg(vcpu, address);
        return X86EMUL_CONTINUE;
 }
 
 int emulate_clts(struct kvm_vcpu *vcpu)
 {
-       unsigned long cr0 = vcpu->cr0;
+       unsigned long cr0;
 
-       cr0 &= ~CR0_TS_MASK;
+       kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
+       cr0 = vcpu->cr0 & ~CR0_TS_MASK;
        kvm_arch_ops->set_cr0(vcpu, cr0);
        return X86EMUL_CONTINUE;
 }
@@ -975,6 +1056,9 @@ struct x86_emulate_ops emulate_ops = {
        .read_emulated       = emulator_read_emulated,
        .write_emulated      = emulator_write_emulated,
        .cmpxchg_emulated    = emulator_cmpxchg_emulated,
+#ifdef CONFIG_X86_32
+       .cmpxchg8b_emulated  = emulator_cmpxchg8b_emulated,
+#endif
 };
 
 int emulate_instruction(struct kvm_vcpu *vcpu,
@@ -1024,6 +1108,8 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
        }
 
        if (r) {
+               if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
+                       return EMULATE_DONE;
                if (!vcpu->mmio_needed) {
                        report_emulation_failure(&emulate_ctxt);
                        return EMULATE_FAIL;
@@ -1069,6 +1155,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
 
 unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 {
+       kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
        switch (cr) {
        case 0:
                return vcpu->cr0;
@@ -1403,6 +1490,7 @@ static int kvm_dev_ioctl_get_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
        sregs->gdt.limit = dt.limit;
        sregs->gdt.base = dt.base;
 
+       kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
        sregs->cr0 = vcpu->cr0;
        sregs->cr2 = vcpu->cr2;
        sregs->cr3 = vcpu->cr3;
@@ -1467,11 +1555,15 @@ static int kvm_dev_ioctl_set_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
 #endif
        vcpu->apic_base = sregs->apic_base;
 
+       kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
+
        mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
        kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0);
 
        mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
        kvm_arch_ops->set_cr4(vcpu, sregs->cr4);
+       if (!is_long_mode(vcpu) && is_pae(vcpu))
+               load_pdptrs(vcpu, vcpu->cr3);
 
        if (mmu_reset_needed)
                kvm_mmu_reset_context(vcpu);
@@ -1693,12 +1785,12 @@ static long kvm_dev_ioctl(struct file *filp,
                if (copy_from_user(&kvm_run, (void *)arg, sizeof kvm_run))
                        goto out;
                r = kvm_dev_ioctl_run(kvm, &kvm_run);
-               if (r < 0)
+               if (r < 0 &&  r != -EINTR)
                        goto out;
-               r = -EFAULT;
-               if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run))
+               if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run)) {
+                       r = -EFAULT;
                        goto out;
-               r = 0;
+               }
                break;
        }
        case KVM_GET_REGS: {
@@ -1842,6 +1934,7 @@ static long kvm_dev_ioctl(struct file *filp,
                                 num_msrs_to_save * sizeof(u32)))
                        goto out;
                r = 0;
+               break;
        }
        default:
                ;
@@ -1944,17 +2037,17 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
                return -EEXIST;
        }
 
-       kvm_arch_ops = ops;
-
-       if (!kvm_arch_ops->cpu_has_kvm_support()) {
+       if (!ops->cpu_has_kvm_support()) {
                printk(KERN_ERR "kvm: no hardware support\n");
                return -EOPNOTSUPP;
        }
-       if (kvm_arch_ops->disabled_by_bios()) {
+       if (ops->disabled_by_bios()) {
                printk(KERN_ERR "kvm: disabled by bios\n");
                return -EOPNOTSUPP;
        }
 
+       kvm_arch_ops = ops;
+
        r = kvm_arch_ops->hardware_setup();
        if (r < 0)
            return r;
index 790423c5f23d2dab18dcf98eba55330c93177935..c6f972914f082e9af20ca6af54d155104ef5d2ba 100644 (file)
 #include "vmx.h"
 #include "kvm.h"
 
+#undef MMU_DEBUG
+
+#undef AUDIT
+
+#ifdef AUDIT
+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);
+#else
+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
+#endif
+
+#ifdef MMU_DEBUG
+
+#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
+#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
+
+#else
+
 #define pgprintk(x...) do { } while (0)
+#define rmap_printk(x...) do { } while (0)
+
+#endif
+
+#if defined(MMU_DEBUG) || defined(AUDIT)
+static int dbg = 1;
+#endif
 
 #define ASSERT(x)                                                      \
        if (!(x)) {                                                     \
                       __FILE__, __LINE__, #x);                         \
        }
 
-#define PT64_ENT_PER_PAGE 512
-#define PT32_ENT_PER_PAGE 1024
+#define PT64_PT_BITS 9
+#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
+#define PT32_PT_BITS 10
+#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
 
 #define PT_WRITABLE_SHIFT 1
 
 #define PT_DIRECTORY_LEVEL 2
 #define PT_PAGE_TABLE_LEVEL 1
 
+#define RMAP_EXT 4
+
+struct kvm_rmap_desc {
+       u64 *shadow_ptes[RMAP_EXT];
+       struct kvm_rmap_desc *more;
+};
+
 static int is_write_protection(struct kvm_vcpu *vcpu)
 {
        return vcpu->cr0 & CR0_WP_MASK;
@@ -150,32 +183,272 @@ static int is_io_pte(unsigned long pte)
        return pte & PT_SHADOW_IO_MARK;
 }
 
+static int is_rmap_pte(u64 pte)
+{
+       return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
+               == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
+}
+
+static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
+                                 size_t objsize, int min)
+{
+       void *obj;
+
+       if (cache->nobjs >= min)
+               return 0;
+       while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
+               obj = kzalloc(objsize, GFP_NOWAIT);
+               if (!obj)
+                       return -ENOMEM;
+               cache->objects[cache->nobjs++] = obj;
+       }
+       return 0;
+}
+
+static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
+{
+       while (mc->nobjs)
+               kfree(mc->objects[--mc->nobjs]);
+}
+
+static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
+{
+       int r;
+
+       r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
+                                  sizeof(struct kvm_pte_chain), 4);
+       if (r)
+               goto out;
+       r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
+                                  sizeof(struct kvm_rmap_desc), 1);
+out:
+       return r;
+}
+
+static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
+{
+       mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
+       mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);
+}
+
+static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
+                                   size_t size)
+{
+       void *p;
+
+       BUG_ON(!mc->nobjs);
+       p = mc->objects[--mc->nobjs];
+       memset(p, 0, size);
+       return p;
+}
+
+static void mmu_memory_cache_free(struct kvm_mmu_memory_cache *mc, void *obj)
+{
+       if (mc->nobjs < KVM_NR_MEM_OBJS)
+               mc->objects[mc->nobjs++] = obj;
+       else
+               kfree(obj);
+}
+
+static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
+{
+       return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache,
+                                     sizeof(struct kvm_pte_chain));
+}
+
+static void mmu_free_pte_chain(struct kvm_vcpu *vcpu,
+                              struct kvm_pte_chain *pc)
+{
+       mmu_memory_cache_free(&vcpu->mmu_pte_chain_cache, pc);
+}
+
+static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
+{
+       return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache,
+                                     sizeof(struct kvm_rmap_desc));
+}
+
+static void mmu_free_rmap_desc(struct kvm_vcpu *vcpu,
+                              struct kvm_rmap_desc *rd)
+{
+       mmu_memory_cache_free(&vcpu->mmu_rmap_desc_cache, rd);
+}
+
+/*
+ * Reverse mapping data structures:
+ *
+ * If page->private bit zero is zero, then page->private points to the
+ * shadow page table entry that points to page_address(page).
+ *
+ * If page->private bit zero is one, (then page->private & ~1) points
+ * to a struct kvm_rmap_desc containing more mappings.
+ */
+static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
+{
+       struct page *page;
+       struct kvm_rmap_desc *desc;
+       int i;
+
+       if (!is_rmap_pte(*spte))
+               return;
+       page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+       if (!page->private) {
+               rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
+               page->private = (unsigned long)spte;
+       } else if (!(page->private & 1)) {
+               rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
+               desc = mmu_alloc_rmap_desc(vcpu);
+               desc->shadow_ptes[0] = (u64 *)page->private;
+               desc->shadow_ptes[1] = spte;
+               page->private = (unsigned long)desc | 1;
+       } else {
+               rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
+               desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+               while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
+                       desc = desc->more;
+               if (desc->shadow_ptes[RMAP_EXT-1]) {
+                       desc->more = mmu_alloc_rmap_desc(vcpu);
+                       desc = desc->more;
+               }
+               for (i = 0; desc->shadow_ptes[i]; ++i)
+                       ;
+               desc->shadow_ptes[i] = spte;
+       }
+}
+
+static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu,
+                                  struct page *page,
+                                  struct kvm_rmap_desc *desc,
+                                  int i,
+                                  struct kvm_rmap_desc *prev_desc)
+{
+       int j;
+
+       for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
+               ;
+       desc->shadow_ptes[i] = desc->shadow_ptes[j];
+       desc->shadow_ptes[j] = 0;
+       if (j != 0)
+               return;
+       if (!prev_desc && !desc->more)
+               page->private = (unsigned long)desc->shadow_ptes[0];
+       else
+               if (prev_desc)
+                       prev_desc->more = desc->more;
+               else
+                       page->private = (unsigned long)desc->more | 1;
+       mmu_free_rmap_desc(vcpu, desc);
+}
+
+static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte)
+{
+       struct page *page;
+       struct kvm_rmap_desc *desc;
+       struct kvm_rmap_desc *prev_desc;
+       int i;
+
+       if (!is_rmap_pte(*spte))
+               return;
+       page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+       if (!page->private) {
+               printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
+               BUG();
+       } else if (!(page->private & 1)) {
+               rmap_printk("rmap_remove:  %p %llx 1->0\n", spte, *spte);
+               if ((u64 *)page->private != spte) {
+                       printk(KERN_ERR "rmap_remove:  %p %llx 1->BUG\n",
+                              spte, *spte);
+                       BUG();
+               }
+               page->private = 0;
+       } else {
+               rmap_printk("rmap_remove:  %p %llx many->many\n", spte, *spte);
+               desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+               prev_desc = NULL;
+               while (desc) {
+                       for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
+                               if (desc->shadow_ptes[i] == spte) {
+                                       rmap_desc_remove_entry(vcpu, page,
+                                                              desc, i,
+                                                              prev_desc);
+                                       return;
+                               }
+                       prev_desc = desc;
+                       desc = desc->more;
+               }
+               BUG();
+       }
+}
+
+static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
+{
+       struct kvm *kvm = vcpu->kvm;
+       struct page *page;
+       struct kvm_memory_slot *slot;
+       struct kvm_rmap_desc *desc;
+       u64 *spte;
+
+       slot = gfn_to_memslot(kvm, gfn);
+       BUG_ON(!slot);
+       page = gfn_to_page(slot, gfn);
+
+       while (page->private) {
+               if (!(page->private & 1))
+                       spte = (u64 *)page->private;
+               else {
+                       desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+                       spte = desc->shadow_ptes[0];
+               }
+               BUG_ON(!spte);
+               BUG_ON((*spte & PT64_BASE_ADDR_MASK) !=
+                      page_to_pfn(page) << PAGE_SHIFT);
+               BUG_ON(!(*spte & PT_PRESENT_MASK));
+               BUG_ON(!(*spte & PT_WRITABLE_MASK));
+               rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
+               rmap_remove(vcpu, spte);
+               kvm_arch_ops->tlb_flush(vcpu);
+               *spte &= ~(u64)PT_WRITABLE_MASK;
+       }
+}
+
+static int is_empty_shadow_page(hpa_t page_hpa)
+{
+       u64 *pos;
+       u64 *end;
+
+       for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64);
+                     pos != end; pos++)
+               if (*pos != 0) {
+                       printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
+                              pos, *pos);
+                       return 0;
+               }
+       return 1;
+}
+
 static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
 {
        struct kvm_mmu_page *page_head = page_header(page_hpa);
 
+       ASSERT(is_empty_shadow_page(page_hpa));
        list_del(&page_head->link);
        page_head->page_hpa = page_hpa;
        list_add(&page_head->link, &vcpu->free_pages);
+       ++vcpu->kvm->n_free_mmu_pages;
 }
 
-static int is_empty_shadow_page(hpa_t page_hpa)
+static unsigned kvm_page_table_hashfn(gfn_t gfn)
 {
-       u32 *pos;
-       u32 *end;
-       for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u32);
-                     pos != end; pos++)
-               if (*pos != 0)
-                       return 0;
-       return 1;
+       return gfn;
 }
 
-static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte)
+static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
+                                              u64 *parent_pte)
 {
        struct kvm_mmu_page *page;
 
        if (list_empty(&vcpu->free_pages))
-               return INVALID_PAGE;
+               return NULL;
 
        page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link);
        list_del(&page->link);
@@ -183,8 +456,239 @@ static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte)
        ASSERT(is_empty_shadow_page(page->page_hpa));
        page->slot_bitmap = 0;
        page->global = 1;
+       page->multimapped = 0;
        page->parent_pte = parent_pte;
-       return page->page_hpa;
+       --vcpu->kvm->n_free_mmu_pages;
+       return page;
+}
+
+static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
+                                   struct kvm_mmu_page *page, u64 *parent_pte)
+{
+       struct kvm_pte_chain *pte_chain;
+       struct hlist_node *node;
+       int i;
+
+       if (!parent_pte)
+               return;
+       if (!page->multimapped) {
+               u64 *old = page->parent_pte;
+
+               if (!old) {
+                       page->parent_pte = parent_pte;
+                       return;
+               }
+               page->multimapped = 1;
+               pte_chain = mmu_alloc_pte_chain(vcpu);
+               INIT_HLIST_HEAD(&page->parent_ptes);
+               hlist_add_head(&pte_chain->link, &page->parent_ptes);
+               pte_chain->parent_ptes[0] = old;
+       }
+       hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) {
+               if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1])
+                       continue;
+               for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i)
+                       if (!pte_chain->parent_ptes[i]) {
+                               pte_chain->parent_ptes[i] = parent_pte;
+                               return;
+                       }
+       }
+       pte_chain = mmu_alloc_pte_chain(vcpu);
+       BUG_ON(!pte_chain);
+       hlist_add_head(&pte_chain->link, &page->parent_ptes);
+       pte_chain->parent_ptes[0] = parent_pte;
+}
+
+static void mmu_page_remove_parent_pte(struct kvm_vcpu *vcpu,
+                                      struct kvm_mmu_page *page,
+                                      u64 *parent_pte)
+{
+       struct kvm_pte_chain *pte_chain;
+       struct hlist_node *node;
+       int i;
+
+       if (!page->multimapped) {
+               BUG_ON(page->parent_pte != parent_pte);
+               page->parent_pte = NULL;
+               return;
+       }
+       hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link)
+               for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
+                       if (!pte_chain->parent_ptes[i])
+                               break;
+                       if (pte_chain->parent_ptes[i] != parent_pte)
+                               continue;
+                       while (i + 1 < NR_PTE_CHAIN_ENTRIES
+                               && pte_chain->parent_ptes[i + 1]) {
+                               pte_chain->parent_ptes[i]
+                                       = pte_chain->parent_ptes[i + 1];
+                               ++i;
+                       }
+                       pte_chain->parent_ptes[i] = NULL;
+                       if (i == 0) {
+                               hlist_del(&pte_chain->link);
+                               mmu_free_pte_chain(vcpu, pte_chain);
+                               if (hlist_empty(&page->parent_ptes)) {
+                                       page->multimapped = 0;
+                                       page->parent_pte = NULL;
+                               }
+                       }
+                       return;
+               }
+       BUG();
+}
+
+static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
+                                               gfn_t gfn)
+{
+       unsigned index;
+       struct hlist_head *bucket;
+       struct kvm_mmu_page *page;
+       struct hlist_node *node;
+
+       pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
+       index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+       bucket = &vcpu->kvm->mmu_page_hash[index];
+       hlist_for_each_entry(page, node, bucket, hash_link)
+               if (page->gfn == gfn && !page->role.metaphysical) {
+                       pgprintk("%s: found role %x\n",
+                                __FUNCTION__, page->role.word);
+                       return page;
+               }
+       return NULL;
+}
+
+static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
+                                            gfn_t gfn,
+                                            gva_t gaddr,
+                                            unsigned level,
+                                            int metaphysical,
+                                            u64 *parent_pte)
+{
+       union kvm_mmu_page_role role;
+       unsigned index;
+       unsigned quadrant;
+       struct hlist_head *bucket;
+       struct kvm_mmu_page *page;
+       struct hlist_node *node;
+
+       role.word = 0;
+       role.glevels = vcpu->mmu.root_level;
+       role.level = level;
+       role.metaphysical = metaphysical;
+       if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
+               quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
+               quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
+               role.quadrant = quadrant;
+       }
+       pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
+                gfn, role.word);
+       index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+       bucket = &vcpu->kvm->mmu_page_hash[index];
+       hlist_for_each_entry(page, node, bucket, hash_link)
+               if (page->gfn == gfn && page->role.word == role.word) {
+                       mmu_page_add_parent_pte(vcpu, page, parent_pte);
+                       pgprintk("%s: found\n", __FUNCTION__);
+                       return page;
+               }
+       page = kvm_mmu_alloc_page(vcpu, parent_pte);
+       if (!page)
+               return page;
+       pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
+       page->gfn = gfn;
+       page->role = role;
+       hlist_add_head(&page->hash_link, bucket);
+       if (!metaphysical)
+               rmap_write_protect(vcpu, gfn);
+       return page;
+}
+
+static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu,
+                                        struct kvm_mmu_page *page)
+{
+       unsigned i;
+       u64 *pt;
+       u64 ent;
+
+       pt = __va(page->page_hpa);
+
+       if (page->role.level == PT_PAGE_TABLE_LEVEL) {
+               for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+                       if (pt[i] & PT_PRESENT_MASK)
+                               rmap_remove(vcpu, &pt[i]);
+                       pt[i] = 0;
+               }
+               kvm_arch_ops->tlb_flush(vcpu);
+               return;
+       }
+
+       for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+               ent = pt[i];
+
+               pt[i] = 0;
+               if (!(ent & PT_PRESENT_MASK))
+                       continue;
+               ent &= PT64_BASE_ADDR_MASK;
+               mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]);
+       }
+}
+
+static void kvm_mmu_put_page(struct kvm_vcpu *vcpu,
+                            struct kvm_mmu_page *page,
+                            u64 *parent_pte)
+{
+       mmu_page_remove_parent_pte(vcpu, page, parent_pte);
+}
+
+static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu,
+                            struct kvm_mmu_page *page)
+{
+       u64 *parent_pte;
+
+       while (page->multimapped || page->parent_pte) {
+               if (!page->multimapped)
+                       parent_pte = page->parent_pte;
+               else {
+                       struct kvm_pte_chain *chain;
+
+                       chain = container_of(page->parent_ptes.first,
+                                            struct kvm_pte_chain, link);
+                       parent_pte = chain->parent_ptes[0];
+               }
+               BUG_ON(!parent_pte);
+               kvm_mmu_put_page(vcpu, page, parent_pte);
+               *parent_pte = 0;
+       }
+       kvm_mmu_page_unlink_children(vcpu, page);
+       if (!page->root_count) {
+               hlist_del(&page->hash_link);
+               kvm_mmu_free_page(vcpu, page->page_hpa);
+       } else {
+               list_del(&page->link);
+               list_add(&page->link, &vcpu->kvm->active_mmu_pages);
+       }
+}
+
+static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+       unsigned index;
+       struct hlist_head *bucket;
+       struct kvm_mmu_page *page;
+       struct hlist_node *node, *n;
+       int r;
+
+       pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
+       r = 0;
+       index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+       bucket = &vcpu->kvm->mmu_page_hash[index];
+       hlist_for_each_entry_safe(page, node, n, bucket, hash_link)
+               if (page->gfn == gfn && !page->role.metaphysical) {
+                       pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
+                                page->role.word);
+                       kvm_mmu_zap_page(vcpu, page);
+                       r = 1;
+               }
+       return r;
 }
 
 static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
@@ -225,35 +729,6 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
        return gpa_to_hpa(vcpu, gpa);
 }
 
-
-static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa,
-                              int level)
-{
-       ASSERT(vcpu);
-       ASSERT(VALID_PAGE(page_hpa));
-       ASSERT(level <= PT64_ROOT_LEVEL && level > 0);
-
-       if (level == 1)
-               memset(__va(page_hpa), 0, PAGE_SIZE);
-       else {
-               u64 *pos;
-               u64 *end;
-
-               for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE;
-                    pos != end; pos++) {
-                       u64 current_ent = *pos;
-
-                       *pos = 0;
-                       if (is_present_pte(current_ent))
-                               release_pt_page_64(vcpu,
-                                                 current_ent &
-                                                 PT64_BASE_ADDR_MASK,
-                                                 level - 1);
-               }
-       }
-       kvm_mmu_free_page(vcpu, page_hpa);
-}
-
 static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 {
 }
@@ -266,52 +741,109 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
        for (; ; level--) {
                u32 index = PT64_INDEX(v, level);
                u64 *table;
+               u64 pte;
 
                ASSERT(VALID_PAGE(table_addr));
                table = __va(table_addr);
 
                if (level == 1) {
+                       pte = table[index];
+                       if (is_present_pte(pte) && is_writeble_pte(pte))
+                               return 0;
                        mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
                        page_header_update_slot(vcpu->kvm, table, v);
                        table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
                                                                PT_USER_MASK;
+                       rmap_add(vcpu, &table[index]);
                        return 0;
                }
 
                if (table[index] == 0) {
-                       hpa_t new_table = kvm_mmu_alloc_page(vcpu,
-                                                            &table[index]);
-
-                       if (!VALID_PAGE(new_table)) {
+                       struct kvm_mmu_page *new_table;
+                       gfn_t pseudo_gfn;
+
+                       pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK)
+                               >> PAGE_SHIFT;
+                       new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
+                                                    v, level - 1,
+                                                    1, &table[index]);
+                       if (!new_table) {
                                pgprintk("nonpaging_map: ENOMEM\n");
                                return -ENOMEM;
                        }
 
-                       if (level == PT32E_ROOT_LEVEL)
-                               table[index] = new_table | PT_PRESENT_MASK;
-                       else
-                               table[index] = new_table | PT_PRESENT_MASK |
-                                               PT_WRITABLE_MASK | PT_USER_MASK;
+                       table[index] = new_table->page_hpa | PT_PRESENT_MASK
+                               | PT_WRITABLE_MASK | PT_USER_MASK;
                }
                table_addr = table[index] & PT64_BASE_ADDR_MASK;
        }
 }
 
-static void nonpaging_flush(struct kvm_vcpu *vcpu)
+static void mmu_free_roots(struct kvm_vcpu *vcpu)
 {
-       hpa_t root = vcpu->mmu.root_hpa;
+       int i;
+       struct kvm_mmu_page *page;
 
-       ++kvm_stat.tlb_flush;
-       pgprintk("nonpaging_flush\n");
-       ASSERT(VALID_PAGE(root));
-       release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
-       root = kvm_mmu_alloc_page(vcpu, NULL);
-       ASSERT(VALID_PAGE(root));
-       vcpu->mmu.root_hpa = root;
-       if (is_paging(vcpu))
-               root |= (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK));
-       kvm_arch_ops->set_cr3(vcpu, root);
-       kvm_arch_ops->tlb_flush(vcpu);
+#ifdef CONFIG_X86_64
+       if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+               hpa_t root = vcpu->mmu.root_hpa;
+
+               ASSERT(VALID_PAGE(root));
+               page = page_header(root);
+               --page->root_count;
+               vcpu->mmu.root_hpa = INVALID_PAGE;
+               return;
+       }
+#endif
+       for (i = 0; i < 4; ++i) {
+               hpa_t root = vcpu->mmu.pae_root[i];
+
+               ASSERT(VALID_PAGE(root));
+               root &= PT64_BASE_ADDR_MASK;
+               page = page_header(root);
+               --page->root_count;
+               vcpu->mmu.pae_root[i] = INVALID_PAGE;
+       }
+       vcpu->mmu.root_hpa = INVALID_PAGE;
+}
+
+static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
+{
+       int i;
+       gfn_t root_gfn;
+       struct kvm_mmu_page *page;
+
+       root_gfn = vcpu->cr3 >> PAGE_SHIFT;
+
+#ifdef CONFIG_X86_64
+       if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+               hpa_t root = vcpu->mmu.root_hpa;
+
+               ASSERT(!VALID_PAGE(root));
+               page = kvm_mmu_get_page(vcpu, root_gfn, 0,
+                                       PT64_ROOT_LEVEL, 0, NULL);
+               root = page->page_hpa;
+               ++page->root_count;
+               vcpu->mmu.root_hpa = root;
+               return;
+       }
+#endif
+       for (i = 0; i < 4; ++i) {
+               hpa_t root = vcpu->mmu.pae_root[i];
+
+               ASSERT(!VALID_PAGE(root));
+               if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL)
+                       root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
+               else if (vcpu->mmu.root_level == 0)
+                       root_gfn = 0;
+               page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
+                                       PT32_ROOT_LEVEL, !is_paging(vcpu),
+                                       NULL);
+               root = page->page_hpa;
+               ++page->root_count;
+               vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
+       }
+       vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root);
 }
 
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -322,43 +854,29 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
 static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
                               u32 error_code)
 {
-       int ret;
        gpa_t addr = gva;
+       hpa_t paddr;
+       int r;
+
+       r = mmu_topup_memory_caches(vcpu);
+       if (r)
+               return r;
 
        ASSERT(vcpu);
        ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
 
-       for (;;) {
-            hpa_t paddr;
-
-            paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
 
-            if (is_error_hpa(paddr))
-                    return 1;
+       paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
 
-            ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
-            if (ret) {
-                    nonpaging_flush(vcpu);
-                    continue;
-            }
-            break;
-       }
-       return ret;
-}
+       if (is_error_hpa(paddr))
+               return 1;
 
-static void nonpaging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
-{
+       return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
 }
 
 static void nonpaging_free(struct kvm_vcpu *vcpu)
 {
-       hpa_t root;
-
-       ASSERT(vcpu);
-       root = vcpu->mmu.root_hpa;
-       if (VALID_PAGE(root))
-               release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
-       vcpu->mmu.root_hpa = INVALID_PAGE;
+       mmu_free_roots(vcpu);
 }
 
 static int nonpaging_init_context(struct kvm_vcpu *vcpu)
@@ -367,40 +885,31 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
 
        context->new_cr3 = nonpaging_new_cr3;
        context->page_fault = nonpaging_page_fault;
-       context->inval_page = nonpaging_inval_page;
        context->gva_to_gpa = nonpaging_gva_to_gpa;
        context->free = nonpaging_free;
-       context->root_level = PT32E_ROOT_LEVEL;
+       context->root_level = 0;
        context->shadow_root_level = PT32E_ROOT_LEVEL;
-       context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
+       mmu_alloc_roots(vcpu);
        ASSERT(VALID_PAGE(context->root_hpa));
        kvm_arch_ops->set_cr3(vcpu, context->root_hpa);
        return 0;
 }
 
-
 static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 {
-       struct kvm_mmu_page *page, *npage;
-
-       list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages,
-                                link) {
-               if (page->global)
-                       continue;
-
-               if (!page->parent_pte)
-                       continue;
-
-               *page->parent_pte = 0;
-               release_pt_page_64(vcpu, page->page_hpa, 1);
-       }
        ++kvm_stat.tlb_flush;
        kvm_arch_ops->tlb_flush(vcpu);
 }
 
 static void paging_new_cr3(struct kvm_vcpu *vcpu)
 {
+       pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
+       mmu_free_roots(vcpu);
+       if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
+               kvm_mmu_free_some_pages(vcpu);
+       mmu_alloc_roots(vcpu);
        kvm_mmu_flush_tlb(vcpu);
+       kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
 }
 
 static void mark_pagetable_nonglobal(void *shadow_pte)
@@ -412,7 +921,8 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
                             u64 *shadow_pte,
                             gpa_t gaddr,
                             int dirty,
-                            u64 access_bits)
+                            u64 access_bits,
+                            gfn_t gfn)
 {
        hpa_t paddr;
 
@@ -420,13 +930,10 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
        if (!dirty)
                access_bits &= ~PT_WRITABLE_MASK;
 
-       if (access_bits & PT_WRITABLE_MASK)
-               mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
+       paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
 
        *shadow_pte |= access_bits;
 
-       paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
-
        if (!(*shadow_pte & PT_GLOBAL_MASK))
                mark_pagetable_nonglobal(shadow_pte);
 
@@ -434,10 +941,31 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
                *shadow_pte |= gaddr;
                *shadow_pte |= PT_SHADOW_IO_MARK;
                *shadow_pte &= ~PT_PRESENT_MASK;
-       } else {
-               *shadow_pte |= paddr;
-               page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
+               return;
+       }
+
+       *shadow_pte |= paddr;
+
+       if (access_bits & PT_WRITABLE_MASK) {
+               struct kvm_mmu_page *shadow;
+
+               shadow = kvm_mmu_lookup_page(vcpu, gfn);
+               if (shadow) {
+                       pgprintk("%s: found shadow page for %lx, marking ro\n",
+                                __FUNCTION__, gfn);
+                       access_bits &= ~PT_WRITABLE_MASK;
+                       if (is_writeble_pte(*shadow_pte)) {
+                                   *shadow_pte &= ~PT_WRITABLE_MASK;
+                                   kvm_arch_ops->tlb_flush(vcpu);
+                       }
+               }
        }
+
+       if (access_bits & PT_WRITABLE_MASK)
+               mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
+
+       page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
+       rmap_add(vcpu, shadow_pte);
 }
 
 static void inject_page_fault(struct kvm_vcpu *vcpu,
@@ -474,41 +1002,6 @@ static int may_access(u64 pte, int write, int user)
        return 1;
 }
 
-/*
- * Remove a shadow pte.
- */
-static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
-{
-       hpa_t page_addr = vcpu->mmu.root_hpa;
-       int level = vcpu->mmu.shadow_root_level;
-
-       ++kvm_stat.invlpg;
-
-       for (; ; level--) {
-               u32 index = PT64_INDEX(addr, level);
-               u64 *table = __va(page_addr);
-
-               if (level == PT_PAGE_TABLE_LEVEL ) {
-                       table[index] = 0;
-                       return;
-               }
-
-               if (!is_present_pte(table[index]))
-                       return;
-
-               page_addr = table[index] & PT64_BASE_ADDR_MASK;
-
-               if (level == PT_DIRECTORY_LEVEL &&
-                         (table[index] & PT_SHADOW_PS_MARK)) {
-                       table[index] = 0;
-                       release_pt_page_64(vcpu, page_addr, PT_PAGE_TABLE_LEVEL);
-
-                       kvm_arch_ops->tlb_flush(vcpu);
-                       return;
-               }
-       }
-}
-
 static void paging_free(struct kvm_vcpu *vcpu)
 {
        nonpaging_free(vcpu);
@@ -522,37 +1015,40 @@ static void paging_free(struct kvm_vcpu *vcpu)
 #include "paging_tmpl.h"
 #undef PTTYPE
 
-static int paging64_init_context(struct kvm_vcpu *vcpu)
+static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 {
        struct kvm_mmu *context = &vcpu->mmu;
 
        ASSERT(is_pae(vcpu));
        context->new_cr3 = paging_new_cr3;
        context->page_fault = paging64_page_fault;
-       context->inval_page = paging_inval_page;
        context->gva_to_gpa = paging64_gva_to_gpa;
        context->free = paging_free;
-       context->root_level = PT64_ROOT_LEVEL;
-       context->shadow_root_level = PT64_ROOT_LEVEL;
-       context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
+       context->root_level = level;
+       context->shadow_root_level = level;
+       mmu_alloc_roots(vcpu);
        ASSERT(VALID_PAGE(context->root_hpa));
        kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
                    (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
        return 0;
 }
 
+static int paging64_init_context(struct kvm_vcpu *vcpu)
+{
+       return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
+}
+
 static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
        struct kvm_mmu *context = &vcpu->mmu;
 
        context->new_cr3 = paging_new_cr3;
        context->page_fault = paging32_page_fault;
-       context->inval_page = paging_inval_page;
        context->gva_to_gpa = paging32_gva_to_gpa;
        context->free = paging_free;
        context->root_level = PT32_ROOT_LEVEL;
        context->shadow_root_level = PT32E_ROOT_LEVEL;
-       context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
+       mmu_alloc_roots(vcpu);
        ASSERT(VALID_PAGE(context->root_hpa));
        kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
                    (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
@@ -561,14 +1057,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
-       int ret;
-
-       if ((ret = paging64_init_context(vcpu)))
-               return ret;
-
-       vcpu->mmu.root_level = PT32E_ROOT_LEVEL;
-       vcpu->mmu.shadow_root_level = PT32E_ROOT_LEVEL;
-       return 0;
+       return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
 static int init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -597,41 +1086,161 @@ static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
 {
+       int r;
+
        destroy_kvm_mmu(vcpu);
-       return init_kvm_mmu(vcpu);
+       r = init_kvm_mmu(vcpu);
+       if (r < 0)
+               goto out;
+       r = mmu_topup_memory_caches(vcpu);
+out:
+       return r;
 }
 
-static void free_mmu_pages(struct kvm_vcpu *vcpu)
+void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
 {
-       while (!list_empty(&vcpu->free_pages)) {
+       gfn_t gfn = gpa >> PAGE_SHIFT;
+       struct kvm_mmu_page *page;
+       struct kvm_mmu_page *child;
+       struct hlist_node *node, *n;
+       struct hlist_head *bucket;
+       unsigned index;
+       u64 *spte;
+       u64 pte;
+       unsigned offset = offset_in_page(gpa);
+       unsigned pte_size;
+       unsigned page_offset;
+       unsigned misaligned;
+       int level;
+       int flooded = 0;
+
+       pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
+       if (gfn == vcpu->last_pt_write_gfn) {
+               ++vcpu->last_pt_write_count;
+               if (vcpu->last_pt_write_count >= 3)
+                       flooded = 1;
+       } else {
+               vcpu->last_pt_write_gfn = gfn;
+               vcpu->last_pt_write_count = 1;
+       }
+       index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+       bucket = &vcpu->kvm->mmu_page_hash[index];
+       hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {
+               if (page->gfn != gfn || page->role.metaphysical)
+                       continue;
+               pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
+               misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
+               if (misaligned || flooded) {
+                       /*
+                        * Misaligned accesses are too much trouble to fix
+                        * up; also, they usually indicate a page is not used
+                        * as a page table.
+                        *
+                        * If we're seeing too many writes to a page,
+                        * it may no longer be a page table, or we may be
+                        * forking, in which case it is better to unmap the
+                        * page.
+                        */
+                       pgprintk("misaligned: gpa %llx bytes %d role %x\n",
+                                gpa, bytes, page->role.word);
+                       kvm_mmu_zap_page(vcpu, page);
+                       continue;
+               }
+               page_offset = offset;
+               level = page->role.level;
+               if (page->role.glevels == PT32_ROOT_LEVEL) {
+                       page_offset <<= 1;          /* 32->64 */
+                       page_offset &= ~PAGE_MASK;
+               }
+               spte = __va(page->page_hpa);
+               spte += page_offset / sizeof(*spte);
+               pte = *spte;
+               if (is_present_pte(pte)) {
+                       if (level == PT_PAGE_TABLE_LEVEL)
+                               rmap_remove(vcpu, spte);
+                       else {
+                               child = page_header(pte & PT64_BASE_ADDR_MASK);
+                               mmu_page_remove_parent_pte(vcpu, child, spte);
+                       }
+               }
+               *spte = 0;
+       }
+}
+
+void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
+{
+}
+
+int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
+{
+       gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
+
+       return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT);
+}
+
+void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
+{
+       while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) {
                struct kvm_mmu_page *page;
 
+               page = container_of(vcpu->kvm->active_mmu_pages.prev,
+                                   struct kvm_mmu_page, link);
+               kvm_mmu_zap_page(vcpu, page);
+       }
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_free_some_pages);
+
+static void free_mmu_pages(struct kvm_vcpu *vcpu)
+{
+       struct kvm_mmu_page *page;
+
+       while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
+               page = container_of(vcpu->kvm->active_mmu_pages.next,
+                                   struct kvm_mmu_page, link);
+               kvm_mmu_zap_page(vcpu, page);
+       }
+       while (!list_empty(&vcpu->free_pages)) {
                page = list_entry(vcpu->free_pages.next,
                                  struct kvm_mmu_page, link);
                list_del(&page->link);
                __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT));
                page->page_hpa = INVALID_PAGE;
        }
+       free_page((unsigned long)vcpu->mmu.pae_root);
 }
 
 static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 {
+       struct page *page;
        int i;
 
        ASSERT(vcpu);
 
        for (i = 0; i < KVM_NUM_MMU_PAGES; i++) {
-               struct page *page;
                struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i];
 
                INIT_LIST_HEAD(&page_header->link);
-               if ((page = alloc_page(GFP_KVM_MMU)) == NULL)
+               if ((page = alloc_page(GFP_KERNEL)) == NULL)
                        goto error_1;
                page->private = (unsigned long)page_header;
                page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
                memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
                list_add(&page_header->link, &vcpu->free_pages);
+               ++vcpu->kvm->n_free_mmu_pages;
        }
+
+       /*
+        * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
+        * Therefore we need to allocate shadow page tables in the first
+        * 4GB of memory, which happens to fit the DMA32 zone.
+        */
+       page = alloc_page(GFP_KERNEL | __GFP_DMA32);
+       if (!page)
+               goto error_1;
+       vcpu->mmu.pae_root = page_address(page);
+       for (i = 0; i < 4; ++i)
+               vcpu->mmu.pae_root[i] = INVALID_PAGE;
+
        return 0;
 
 error_1:
@@ -663,10 +1272,12 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 
        destroy_kvm_mmu(vcpu);
        free_mmu_pages(vcpu);
+       mmu_free_memory_caches(vcpu);
 }
 
-void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
+void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot)
 {
+       struct kvm *kvm = vcpu->kvm;
        struct kvm_mmu_page *page;
 
        list_for_each_entry(page, &kvm->active_mmu_pages, link) {
@@ -679,8 +1290,169 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
                pt = __va(page->page_hpa);
                for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
                        /* avoid RMW */
-                       if (pt[i] & PT_WRITABLE_MASK)
+                       if (pt[i] & PT_WRITABLE_MASK) {
+                               rmap_remove(vcpu, &pt[i]);
                                pt[i] &= ~PT_WRITABLE_MASK;
+                       }
+       }
+}
+
+#ifdef AUDIT
+
+static const char *audit_msg;
+
+static gva_t canonicalize(gva_t gva)
+{
+#ifdef CONFIG_X86_64
+       gva = (long long)(gva << 16) >> 16;
+#endif
+       return gva;
+}
 
+static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
+                               gva_t va, int level)
+{
+       u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK);
+       int i;
+       gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));
+
+       for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
+               u64 ent = pt[i];
+
+               if (!ent & PT_PRESENT_MASK)
+                       continue;
+
+               va = canonicalize(va);
+               if (level > 1)
+                       audit_mappings_page(vcpu, ent, va, level - 1);
+               else {
+                       gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
+                       hpa_t hpa = gpa_to_hpa(vcpu, gpa);
+
+                       if ((ent & PT_PRESENT_MASK)
+                           && (ent & PT64_BASE_ADDR_MASK) != hpa)
+                               printk(KERN_ERR "audit error: (%s) levels %d"
+                                      " gva %lx gpa %llx hpa %llx ent %llx\n",
+                                      audit_msg, vcpu->mmu.root_level,
+                                      va, gpa, hpa, ent);
+               }
        }
 }
+
+static void audit_mappings(struct kvm_vcpu *vcpu)
+{
+       int i;
+
+       if (vcpu->mmu.root_level == 4)
+               audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
+       else
+               for (i = 0; i < 4; ++i)
+                       if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK)
+                               audit_mappings_page(vcpu,
+                                                   vcpu->mmu.pae_root[i],
+                                                   i << 30,
+                                                   2);
+}
+
+static int count_rmaps(struct kvm_vcpu *vcpu)
+{
+       int nmaps = 0;
+       int i, j, k;
+
+       for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
+               struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
+               struct kvm_rmap_desc *d;
+
+               for (j = 0; j < m->npages; ++j) {
+                       struct page *page = m->phys_mem[j];
+
+                       if (!page->private)
+                               continue;
+                       if (!(page->private & 1)) {
+                               ++nmaps;
+                               continue;
+                       }
+                       d = (struct kvm_rmap_desc *)(page->private & ~1ul);
+                       while (d) {
+                               for (k = 0; k < RMAP_EXT; ++k)
+                                       if (d->shadow_ptes[k])
+                                               ++nmaps;
+                                       else
+                                               break;
+                               d = d->more;
+                       }
+               }
+       }
+       return nmaps;
+}
+
+static int count_writable_mappings(struct kvm_vcpu *vcpu)
+{
+       int nmaps = 0;
+       struct kvm_mmu_page *page;
+       int i;
+
+       list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
+               u64 *pt = __va(page->page_hpa);
+
+               if (page->role.level != PT_PAGE_TABLE_LEVEL)
+                       continue;
+
+               for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+                       u64 ent = pt[i];
+
+                       if (!(ent & PT_PRESENT_MASK))
+                               continue;
+                       if (!(ent & PT_WRITABLE_MASK))
+                               continue;
+                       ++nmaps;
+               }
+       }
+       return nmaps;
+}
+
+static void audit_rmap(struct kvm_vcpu *vcpu)
+{
+       int n_rmap = count_rmaps(vcpu);
+       int n_actual = count_writable_mappings(vcpu);
+
+       if (n_rmap != n_actual)
+               printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
+                      __FUNCTION__, audit_msg, n_rmap, n_actual);
+}
+
+static void audit_write_protection(struct kvm_vcpu *vcpu)
+{
+       struct kvm_mmu_page *page;
+
+       list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
+               hfn_t hfn;
+               struct page *pg;
+
+               if (page->role.metaphysical)
+                       continue;
+
+               hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT)
+                       >> PAGE_SHIFT;
+               pg = pfn_to_page(hfn);
+               if (pg->private)
+                       printk(KERN_ERR "%s: (%s) shadow page has writable"
+                              " mappings: gfn %lx role %x\n",
+                              __FUNCTION__, audit_msg, page->gfn,
+                              page->role.word);
+       }
+}
+
+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
+{
+       int olddbg = dbg;
+
+       dbg = 0;
+       audit_msg = msg;
+       audit_rmap(vcpu);
+       audit_write_protection(vcpu);
+       audit_mappings(vcpu);
+       dbg = olddbg;
+}
+
+#endif
index 09bb9b4ed12d1918d5843dfae6a53328a98500a7..2dbf4307ed9ed1e5ea7629bcd551c4b32f3eed08 100644 (file)
        #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
        #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
        #define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK
+       #ifdef CONFIG_X86_64
+       #define PT_MAX_FULL_LEVELS 4
+       #else
+       #define PT_MAX_FULL_LEVELS 2
+       #endif
 #elif PTTYPE == 32
        #define pt_element_t u32
        #define guest_walker guest_walker32
@@ -42,6 +47,7 @@
        #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
        #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
        #define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK
+       #define PT_MAX_FULL_LEVELS 2
 #else
        #error Invalid PTTYPE value
 #endif
  */
 struct guest_walker {
        int level;
+       gfn_t table_gfn[PT_MAX_FULL_LEVELS];
        pt_element_t *table;
+       pt_element_t *ptep;
        pt_element_t inherited_ar;
+       gfn_t gfn;
 };
 
-static void FNAME(init_walker)(struct guest_walker *walker,
-                              struct kvm_vcpu *vcpu)
+/*
+ * Fetch a guest pte for a guest virtual address
+ */
+static void FNAME(walk_addr)(struct guest_walker *walker,
+                            struct kvm_vcpu *vcpu, gva_t addr)
 {
        hpa_t hpa;
        struct kvm_memory_slot *slot;
+       pt_element_t *ptep;
+       pt_element_t root;
+       gfn_t table_gfn;
 
+       pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
        walker->level = vcpu->mmu.root_level;
-       slot = gfn_to_memslot(vcpu->kvm,
-                             (vcpu->cr3 & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
-       hpa = safe_gpa_to_hpa(vcpu, vcpu->cr3 & PT64_BASE_ADDR_MASK);
+       walker->table = NULL;
+       root = vcpu->cr3;
+#if PTTYPE == 64
+       if (!is_long_mode(vcpu)) {
+               walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
+               root = *walker->ptep;
+               if (!(root & PT_PRESENT_MASK))
+                       return;
+               --walker->level;
+       }
+#endif
+       table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+       walker->table_gfn[walker->level - 1] = table_gfn;
+       pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
+                walker->level - 1, table_gfn);
+       slot = gfn_to_memslot(vcpu->kvm, table_gfn);
+       hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
        walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0);
 
        ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
               (vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0);
 
-       walker->table = (pt_element_t *)( (unsigned long)walker->table |
-               (unsigned long)(vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) );
        walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
+
+       for (;;) {
+               int index = PT_INDEX(addr, walker->level);
+               hpa_t paddr;
+
+               ptep = &walker->table[index];
+               ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
+                      ((unsigned long)ptep & PAGE_MASK));
+
+               if (is_present_pte(*ptep) && !(*ptep &  PT_ACCESSED_MASK))
+                       *ptep |= PT_ACCESSED_MASK;
+
+               if (!is_present_pte(*ptep))
+                       break;
+
+               if (walker->level == PT_PAGE_TABLE_LEVEL) {
+                       walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
+                               >> PAGE_SHIFT;
+                       break;
+               }
+
+               if (walker->level == PT_DIRECTORY_LEVEL
+                   && (*ptep & PT_PAGE_SIZE_MASK)
+                   && (PTTYPE == 64 || is_pse(vcpu))) {
+                       walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
+                               >> PAGE_SHIFT;
+                       walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
+                       break;
+               }
+
+               if (walker->level != 3 || is_long_mode(vcpu))
+                       walker->inherited_ar &= walker->table[index];
+               table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
+               paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK);
+               kunmap_atomic(walker->table, KM_USER0);
+               walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT),
+                                           KM_USER0);
+               --walker->level;
+               walker->table_gfn[walker->level - 1 ] = table_gfn;
+               pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
+                        walker->level - 1, table_gfn);
+       }
+       walker->ptep = ptep;
+       pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
 }
 
 static void FNAME(release_walker)(struct guest_walker *walker)
 {
-       kunmap_atomic(walker->table, KM_USER0);
+       if (walker->table)
+               kunmap_atomic(walker->table, KM_USER0);
 }
 
 static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte,
-                          u64 *shadow_pte, u64 access_bits)
+                          u64 *shadow_pte, u64 access_bits, gfn_t gfn)
 {
        ASSERT(*shadow_pte == 0);
        access_bits &= guest_pte;
        *shadow_pte = (guest_pte & PT_PTE_COPY_MASK);
        set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK,
-                      guest_pte & PT_DIRTY_MASK, access_bits);
+                      guest_pte & PT_DIRTY_MASK, access_bits, gfn);
 }
 
 static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde,
-                          u64 *shadow_pte, u64 access_bits,
-                          int index)
+                          u64 *shadow_pte, u64 access_bits, gfn_t gfn)
 {
        gpa_t gaddr;
 
        ASSERT(*shadow_pte == 0);
        access_bits &= guest_pde;
-       gaddr = (guest_pde & PT_DIR_BASE_ADDR_MASK) + PAGE_SIZE * index;
+       gaddr = (gpa_t)gfn << PAGE_SHIFT;
        if (PTTYPE == 32 && is_cpuid_PSE36())
                gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) <<
                        (32 - PT32_DIR_PSE36_SHIFT);
        *shadow_pte = guest_pde & PT_PTE_COPY_MASK;
        set_pte_common(vcpu, shadow_pte, gaddr,
-                      guest_pde & PT_DIRTY_MASK, access_bits);
-}
-
-/*
- * Fetch a guest pte from a specific level in the paging hierarchy.
- */
-static pt_element_t *FNAME(fetch_guest)(struct kvm_vcpu *vcpu,
-                                       struct guest_walker *walker,
-                                       int level,
-                                       gva_t addr)
-{
-
-       ASSERT(level > 0  && level <= walker->level);
-
-       for (;;) {
-               int index = PT_INDEX(addr, walker->level);
-               hpa_t paddr;
-
-               ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
-                      ((unsigned long)&walker->table[index] & PAGE_MASK));
-               if (level == walker->level ||
-                   !is_present_pte(walker->table[index]) ||
-                   (walker->level == PT_DIRECTORY_LEVEL &&
-                    (walker->table[index] & PT_PAGE_SIZE_MASK) &&
-                    (PTTYPE == 64 || is_pse(vcpu))))
-                       return &walker->table[index];
-               if (walker->level != 3 || is_long_mode(vcpu))
-                       walker->inherited_ar &= walker->table[index];
-               paddr = safe_gpa_to_hpa(vcpu, walker->table[index] & PT_BASE_ADDR_MASK);
-               kunmap_atomic(walker->table, KM_USER0);
-               walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT),
-                                           KM_USER0);
-               --walker->level;
-       }
+                      guest_pde & PT_DIRTY_MASK, access_bits, gfn);
 }
 
 /*
@@ -150,15 +189,26 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
        hpa_t shadow_addr;
        int level;
        u64 *prev_shadow_ent = NULL;
+       pt_element_t *guest_ent = walker->ptep;
+
+       if (!is_present_pte(*guest_ent))
+               return NULL;
 
        shadow_addr = vcpu->mmu.root_hpa;
        level = vcpu->mmu.shadow_root_level;
+       if (level == PT32E_ROOT_LEVEL) {
+               shadow_addr = vcpu->mmu.pae_root[(addr >> 30) & 3];
+               shadow_addr &= PT64_BASE_ADDR_MASK;
+               --level;
+       }
 
        for (; ; level--) {
                u32 index = SHADOW_PT_INDEX(addr, level);
                u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index;
-               pt_element_t *guest_ent;
+               struct kvm_mmu_page *shadow_page;
                u64 shadow_pte;
+               int metaphysical;
+               gfn_t table_gfn;
 
                if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
                        if (level == PT_PAGE_TABLE_LEVEL)
@@ -168,21 +218,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                        continue;
                }
 
-               if (PTTYPE == 32 && level > PT32_ROOT_LEVEL) {
-                       ASSERT(level == PT32E_ROOT_LEVEL);
-                       guest_ent = FNAME(fetch_guest)(vcpu, walker,
-                                                      PT32_ROOT_LEVEL, addr);
-               } else
-                       guest_ent = FNAME(fetch_guest)(vcpu, walker,
-                                                      level, addr);
-
-               if (!is_present_pte(*guest_ent))
-                       return NULL;
-
-               /* Don't set accessed bit on PAE PDPTRs */
-               if (vcpu->mmu.root_level != 3 || walker->level != 3)
-                       *guest_ent |= PT_ACCESSED_MASK;
-
                if (level == PT_PAGE_TABLE_LEVEL) {
 
                        if (walker->level == PT_DIRECTORY_LEVEL) {
@@ -190,21 +225,30 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                                        *prev_shadow_ent |= PT_SHADOW_PS_MARK;
                                FNAME(set_pde)(vcpu, *guest_ent, shadow_ent,
                                               walker->inherited_ar,
-                                         PT_INDEX(addr, PT_PAGE_TABLE_LEVEL));
+                                              walker->gfn);
                        } else {
                                ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
-                               FNAME(set_pte)(vcpu, *guest_ent, shadow_ent, walker->inherited_ar);
+                               FNAME(set_pte)(vcpu, *guest_ent, shadow_ent,
+                                              walker->inherited_ar,
+                                              walker->gfn);
                        }
                        return shadow_ent;
                }
 
-               shadow_addr = kvm_mmu_alloc_page(vcpu, shadow_ent);
-               if (!VALID_PAGE(shadow_addr))
-                       return ERR_PTR(-ENOMEM);
-               shadow_pte = shadow_addr | PT_PRESENT_MASK;
-               if (vcpu->mmu.root_level > 3 || level != 3)
-                       shadow_pte |= PT_ACCESSED_MASK
-                               | PT_WRITABLE_MASK | PT_USER_MASK;
+               if (level - 1 == PT_PAGE_TABLE_LEVEL
+                   && walker->level == PT_DIRECTORY_LEVEL) {
+                       metaphysical = 1;
+                       table_gfn = (*guest_ent & PT_BASE_ADDR_MASK)
+                               >> PAGE_SHIFT;
+               } else {
+                       metaphysical = 0;
+                       table_gfn = walker->table_gfn[level - 2];
+               }
+               shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+                                              metaphysical, shadow_ent);
+               shadow_addr = shadow_page->page_hpa;
+               shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
+                       | PT_WRITABLE_MASK | PT_USER_MASK;
                *shadow_ent = shadow_pte;
                prev_shadow_ent = shadow_ent;
        }
@@ -221,11 +265,13 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
                               u64 *shadow_ent,
                               struct guest_walker *walker,
                               gva_t addr,
-                              int user)
+                              int user,
+                              int *write_pt)
 {
        pt_element_t *guest_ent;
        int writable_shadow;
        gfn_t gfn;
+       struct kvm_mmu_page *page;
 
        if (is_writeble_pte(*shadow_ent))
                return 0;
@@ -250,17 +296,35 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
                        *shadow_ent &= ~PT_USER_MASK;
                }
 
-       guest_ent = FNAME(fetch_guest)(vcpu, walker, PT_PAGE_TABLE_LEVEL, addr);
+       guest_ent = walker->ptep;
 
        if (!is_present_pte(*guest_ent)) {
                *shadow_ent = 0;
                return 0;
        }
 
-       gfn = (*guest_ent & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+       gfn = walker->gfn;
+
+       if (user) {
+               /*
+                * Usermode page faults won't be for page table updates.
+                */
+               while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
+                       pgprintk("%s: zap %lx %x\n",
+                                __FUNCTION__, gfn, page->role.word);
+                       kvm_mmu_zap_page(vcpu, page);
+               }
+       } else if (kvm_mmu_lookup_page(vcpu, gfn)) {
+               pgprintk("%s: found shadow page for %lx, marking ro\n",
+                        __FUNCTION__, gfn);
+               *guest_ent |= PT_DIRTY_MASK;
+               *write_pt = 1;
+               return 0;
+       }
        mark_page_dirty(vcpu->kvm, gfn);
        *shadow_ent |= PT_WRITABLE_MASK;
        *guest_ent |= PT_DIRTY_MASK;
+       rmap_add(vcpu, shadow_ent);
 
        return 1;
 }
@@ -276,7 +340,8 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
  *   - normal guest page fault due to the guest pte marked not present, not
  *     writable, or not executable
  *
- *  Returns: 1 if we need to emulate the instruction, 0 otherwise
+ *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
+ *           a negative value on error.
  */
 static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
                               u32 error_code)
@@ -287,39 +352,47 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
        struct guest_walker walker;
        u64 *shadow_pte;
        int fixed;
+       int write_pt = 0;
+       int r;
+
+       pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
+       kvm_mmu_audit(vcpu, "pre page fault");
+
+       r = mmu_topup_memory_caches(vcpu);
+       if (r)
+               return r;
 
        /*
         * Look up the shadow pte for the faulting address.
         */
-       for (;;) {
-               FNAME(init_walker)(&walker, vcpu);
-               shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
-               if (IS_ERR(shadow_pte)) {  /* must be -ENOMEM */
-                       nonpaging_flush(vcpu);
-                       FNAME(release_walker)(&walker);
-                       continue;
-               }
-               break;
-       }
+       FNAME(walk_addr)(&walker, vcpu, addr);
+       shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
 
        /*
         * The page is not mapped by the guest.  Let the guest handle it.
         */
        if (!shadow_pte) {
+               pgprintk("%s: not mapped\n", __FUNCTION__);
                inject_page_fault(vcpu, addr, error_code);
                FNAME(release_walker)(&walker);
                return 0;
        }
 
+       pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__,
+                shadow_pte, *shadow_pte);
+
        /*
         * Update the shadow pte.
         */
        if (write_fault)
                fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr,
-                                           user_fault);
+                                           user_fault, &write_pt);
        else
                fixed = fix_read_pf(shadow_pte);
 
+       pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__,
+                shadow_pte, *shadow_pte);
+
        FNAME(release_walker)(&walker);
 
        /*
@@ -331,20 +404,23 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
                pgprintk("%s: io work, no access\n", __FUNCTION__);
                inject_page_fault(vcpu, addr,
                                  error_code | PFERR_PRESENT_MASK);
+               kvm_mmu_audit(vcpu, "post page fault (io)");
                return 0;
        }
 
        /*
         * pte not present, guest page fault.
         */
-       if (pte_present && !fixed) {
+       if (pte_present && !fixed && !write_pt) {
                inject_page_fault(vcpu, addr, error_code);
+               kvm_mmu_audit(vcpu, "post page fault (guest)");
                return 0;
        }
 
        ++kvm_stat.pf_fixed;
+       kvm_mmu_audit(vcpu, "post page fault (fixed)");
 
-       return 0;
+       return write_pt;
 }
 
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -353,9