Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
[sfrench/cifs-2.6.git] / arch / i386 / kernel / paravirt.c
index e55fd05da0f521048c03bf68206f336e1cb6f94f..5c10f376bce1e36b437c6a76ba28e5a6c4eb0026 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/efi.h>
 #include <linux/bcd.h>
 #include <linux/start_kernel.h>
+#include <linux/highmem.h>
 
 #include <asm/bug.h>
 #include <asm/paravirt.h>
 #include <asm/fixmap.h>
 #include <asm/apic.h>
 #include <asm/tlbflush.h>
+#include <asm/timer.h>
 
 /* nop stub */
-static void native_nop(void)
+void _paravirt_nop(void)
 {
 }
 
@@ -53,334 +55,151 @@ char *memory_setup(void)
 #define DEF_NATIVE(name, code)                                 \
        extern const char start_##name[], end_##name[];         \
        asm("start_" #name ": " code "; end_" #name ":")
-DEF_NATIVE(cli, "cli");
-DEF_NATIVE(sti, "sti");
-DEF_NATIVE(popf, "push %eax; popf");
-DEF_NATIVE(pushf, "pushf; pop %eax");
-DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli");
+
+DEF_NATIVE(irq_disable, "cli");
+DEF_NATIVE(irq_enable, "sti");
+DEF_NATIVE(restore_fl, "push %eax; popf");
+DEF_NATIVE(save_fl, "pushf; pop %eax");
 DEF_NATIVE(iret, "iret");
-DEF_NATIVE(sti_sysexit, "sti; sysexit");
+DEF_NATIVE(irq_enable_sysexit, "sti; sysexit");
+DEF_NATIVE(read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(read_cr3, "mov %cr3, %eax");
+DEF_NATIVE(clts, "clts");
+DEF_NATIVE(read_tsc, "rdtsc");
 
-static const struct native_insns
-{
-       const char *start, *end;
-} native_insns[] = {
-       [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli },
-       [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti },
-       [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf },
-       [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf },
-       [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli },
-       [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret },
-       [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit },
-};
+DEF_NATIVE(ud2a, "ud2a");
 
 static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
 {
-       unsigned int insn_len;
-
-       /* Don't touch it if we don't have a replacement */
-       if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start)
-               return len;
-
-       insn_len = native_insns[type].end - native_insns[type].start;
-
-       /* Similarly if we can't fit replacement. */
-       if (len < insn_len)
-               return len;
+       const unsigned char *start, *end;
+       unsigned ret;
+
+       switch(type) {
+#define SITE(x)        case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site
+               SITE(irq_disable);
+               SITE(irq_enable);
+               SITE(restore_fl);
+               SITE(save_fl);
+               SITE(iret);
+               SITE(irq_enable_sysexit);
+               SITE(read_cr2);
+               SITE(read_cr3);
+               SITE(write_cr3);
+               SITE(clts);
+               SITE(read_tsc);
+#undef SITE
+
+       patch_site:
+               ret = paravirt_patch_insns(insns, len, start, end);
+               break;
 
-       memcpy(insns, native_insns[type].start, insn_len);
-       return insn_len;
-}
+       case PARAVIRT_PATCH(make_pgd):
+       case PARAVIRT_PATCH(make_pte):
+       case PARAVIRT_PATCH(pgd_val):
+       case PARAVIRT_PATCH(pte_val):
+#ifdef CONFIG_X86_PAE
+       case PARAVIRT_PATCH(make_pmd):
+       case PARAVIRT_PATCH(pmd_val):
+#endif
+               /* These functions end up returning exactly what
+                  they're passed, in the same registers. */
+               ret = paravirt_patch_nop();
+               break;
 
-static fastcall unsigned long native_get_debugreg(int regno)
-{
-       unsigned long val = 0;  /* Damn you, gcc! */
-
-       switch (regno) {
-       case 0:
-               asm("movl %%db0, %0" :"=r" (val)); break;
-       case 1:
-               asm("movl %%db1, %0" :"=r" (val)); break;
-       case 2:
-               asm("movl %%db2, %0" :"=r" (val)); break;
-       case 3:
-               asm("movl %%db3, %0" :"=r" (val)); break;
-       case 6:
-               asm("movl %%db6, %0" :"=r" (val)); break;
-       case 7:
-               asm("movl %%db7, %0" :"=r" (val)); break;
        default:
-               BUG();
-       }
-       return val;
-}
-
-static fastcall void native_set_debugreg(int regno, unsigned long value)
-{
-       switch (regno) {
-       case 0:
-               asm("movl %0,%%db0"     : /* no output */ :"r" (value));
-               break;
-       case 1:
-               asm("movl %0,%%db1"     : /* no output */ :"r" (value));
-               break;
-       case 2:
-               asm("movl %0,%%db2"     : /* no output */ :"r" (value));
+               ret = paravirt_patch_default(type, clobbers, insns, len);
                break;
-       case 3:
-               asm("movl %0,%%db3"     : /* no output */ :"r" (value));
-               break;
-       case 6:
-               asm("movl %0,%%db6"     : /* no output */ :"r" (value));
-               break;
-       case 7:
-               asm("movl %0,%%db7"     : /* no output */ :"r" (value));
-               break;
-       default:
-               BUG();
        }
-}
-
-void init_IRQ(void)
-{
-       paravirt_ops.init_IRQ();
-}
-
-static fastcall void native_clts(void)
-{
-       asm volatile ("clts");
-}
-
-static fastcall unsigned long native_read_cr0(void)
-{
-       unsigned long val;
-       asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
-       return val;
-}
-
-static fastcall void native_write_cr0(unsigned long val)
-{
-       asm volatile("movl %0,%%cr0": :"r" (val));
-}
-
-static fastcall unsigned long native_read_cr2(void)
-{
-       unsigned long val;
-       asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
-       return val;
-}
-
-static fastcall void native_write_cr2(unsigned long val)
-{
-       asm volatile("movl %0,%%cr2": :"r" (val));
-}
-
-static fastcall unsigned long native_read_cr3(void)
-{
-       unsigned long val;
-       asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
-       return val;
-}
-
-static fastcall void native_write_cr3(unsigned long val)
-{
-       asm volatile("movl %0,%%cr3": :"r" (val));
-}
-
-static fastcall unsigned long native_read_cr4(void)
-{
-       unsigned long val;
-       asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
-       return val;
-}
-
-static fastcall unsigned long native_read_cr4_safe(void)
-{
-       unsigned long val;
-       /* This could fault if %cr4 does not exist */
-       asm("1: movl %%cr4, %0          \n"
-               "2:                             \n"
-               ".section __ex_table,\"a\"      \n"
-               ".long 1b,2b                    \n"
-               ".previous                      \n"
-               : "=r" (val): "0" (0));
-       return val;
-}
-
-static fastcall void native_write_cr4(unsigned long val)
-{
-       asm volatile("movl %0,%%cr4": :"r" (val));
-}
-
-static fastcall unsigned long native_save_fl(void)
-{
-       unsigned long f;
-       asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
-       return f;
-}
-
-static fastcall void native_restore_fl(unsigned long f)
-{
-       asm volatile("pushl %0 ; popfl": /* no output */
-                            :"g" (f)
-                            :"memory", "cc");
-}
-
-static fastcall void native_irq_disable(void)
-{
-       asm volatile("cli": : :"memory");
-}
-
-static fastcall void native_irq_enable(void)
-{
-       asm volatile("sti": : :"memory");
-}
-
-static fastcall void native_safe_halt(void)
-{
-       asm volatile("sti; hlt": : :"memory");
-}
-
-static fastcall void native_halt(void)
-{
-       asm volatile("hlt": : :"memory");
-}
 
-static fastcall void native_wbinvd(void)
-{
-       asm volatile("wbinvd": : :"memory");
+       return ret;
 }
 
-static fastcall unsigned long long native_read_msr(unsigned int msr, int *err)
+unsigned paravirt_patch_nop(void)
 {
-       unsigned long long val;
-
-       asm volatile("2: rdmsr ; xorl %0,%0\n"
-                    "1:\n\t"
-                    ".section .fixup,\"ax\"\n\t"
-                    "3:  movl %3,%0 ; jmp 1b\n\t"
-                    ".previous\n\t"
-                    ".section __ex_table,\"a\"\n"
-                    "   .align 4\n\t"
-                    "   .long  2b,3b\n\t"
-                    ".previous"
-                    : "=r" (*err), "=A" (val)
-                    : "c" (msr), "i" (-EFAULT));
-
-       return val;
+       return 0;
 }
 
-static fastcall int native_write_msr(unsigned int msr, unsigned long long val)
+unsigned paravirt_patch_ignore(unsigned len)
 {
-       int err;
-       asm volatile("2: wrmsr ; xorl %0,%0\n"
-                    "1:\n\t"
-                    ".section .fixup,\"ax\"\n\t"
-                    "3:  movl %4,%0 ; jmp 1b\n\t"
-                    ".previous\n\t"
-                    ".section __ex_table,\"a\"\n"
-                    "   .align 4\n\t"
-                    "   .long  2b,3b\n\t"
-                    ".previous"
-                    : "=a" (err)
-                    : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)),
-                      "i" (-EFAULT));
-       return err;
+       return len;
 }
 
-static fastcall unsigned long long native_read_tsc(void)
+unsigned paravirt_patch_call(void *target, u16 tgt_clobbers,
+                            void *site, u16 site_clobbers,
+                            unsigned len)
 {
-       unsigned long long val;
-       asm volatile("rdtsc" : "=A" (val));
-       return val;
-}
+       unsigned char *call = site;
+       unsigned long delta = (unsigned long)target - (unsigned long)(call+5);
 
-static fastcall unsigned long long native_read_pmc(void)
-{
-       unsigned long long val;
-       asm volatile("rdpmc" : "=A" (val));
-       return val;
-}
+       if (tgt_clobbers & ~site_clobbers)
+               return len;     /* target would clobber too much for this site */
+       if (len < 5)
+               return len;     /* call too long for patch site */
 
-static fastcall void native_load_tr_desc(void)
-{
-       asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
-}
+       *call++ = 0xe8;         /* call */
+       *(unsigned long *)call = delta;
 
-static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr)
-{
-       asm volatile("lgdt %0"::"m" (*dtr));
+       return 5;
 }
 
-static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr)
+unsigned paravirt_patch_jmp(void *target, void *site, unsigned len)
 {
-       asm volatile("lidt %0"::"m" (*dtr));
-}
+       unsigned char *jmp = site;
+       unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5);
 
-static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr)
-{
-       asm ("sgdt %0":"=m" (*dtr));
-}
+       if (len < 5)
+               return len;     /* call too long for patch site */
 
-static fastcall void native_store_idt(struct Xgt_desc_struct *dtr)
-{
-       asm ("sidt %0":"=m" (*dtr));
-}
+       *jmp++ = 0xe9;          /* jmp */
+       *(unsigned long *)jmp = delta;
 
-static fastcall unsigned long native_store_tr(void)
-{
-       unsigned long tr;
-       asm ("str %0":"=r" (tr));
-       return tr;
-}
-
-static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu)
-{
-#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
-       C(0); C(1); C(2);
-#undef C
+       return 5;
 }
 
-static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high)
+unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len)
 {
-       u32 *lp = (u32 *)((char *)dt + entry*8);
-       lp[0] = entry_low;
-       lp[1] = entry_high;
-}
+       void *opfunc = *((void **)&paravirt_ops + type);
+       unsigned ret;
 
-static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high)
-{
-       native_write_dt_entry(dt, entrynum, low, high);
-}
+       if (opfunc == NULL)
+               /* If there's no function, patch it with a ud2a (BUG) */
+               ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a);
+       else if (opfunc == paravirt_nop)
+               /* If the operation is a nop, then nop the callsite */
+               ret = paravirt_patch_nop();
+       else if (type == PARAVIRT_PATCH(iret) ||
+                type == PARAVIRT_PATCH(irq_enable_sysexit))
+               /* If operation requires a jmp, then jmp */
+               ret = paravirt_patch_jmp(opfunc, site, len);
+       else
+               /* Otherwise call the function; assume target could
+                  clobber any caller-save reg */
+               ret = paravirt_patch_call(opfunc, CLBR_ANY,
+                                         site, clobbers, len);
 
-static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high)
-{
-       native_write_dt_entry(dt, entrynum, low, high);
+       return ret;
 }
 
-static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high)
+unsigned paravirt_patch_insns(void *site, unsigned len,
+                             const char *start, const char *end)
 {
-       native_write_dt_entry(dt, entrynum, low, high);
-}
+       unsigned insn_len = end - start;
 
-static fastcall void native_load_esp0(struct tss_struct *tss,
-                                     struct thread_struct *thread)
-{
-       tss->esp0 = thread->esp0;
+       if (insn_len > len || start == NULL)
+               insn_len = len;
+       else
+               memcpy(site, start, insn_len);
 
-       /* This can only happen when SEP is enabled, no need to test "SEP"arately */
-       if (unlikely(tss->ss1 != thread->sysenter_cs)) {
-               tss->ss1 = thread->sysenter_cs;
-               wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
-       }
+       return insn_len;
 }
 
-static fastcall void native_io_delay(void)
+void init_IRQ(void)
 {
-       asm volatile("outb %al,$0x80");
+       paravirt_ops.init_IRQ();
 }
 
-static fastcall void native_flush_tlb(void)
+static void native_flush_tlb(void)
 {
        __native_flush_tlb();
 }
@@ -389,91 +208,19 @@ static fastcall void native_flush_tlb(void)
  * Global pages have to be flushed a bit differently. Not a real
  * performance problem because this does not happen often.
  */
-static fastcall void native_flush_tlb_global(void)
+static void native_flush_tlb_global(void)
 {
        __native_flush_tlb_global();
 }
 
-static fastcall void native_flush_tlb_single(u32 addr)
+static void native_flush_tlb_single(unsigned long addr)
 {
        __native_flush_tlb_single(addr);
 }
 
-#ifndef CONFIG_X86_PAE
-static fastcall void native_set_pte(pte_t *ptep, pte_t pteval)
-{
-       *ptep = pteval;
-}
-
-static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
-{
-       *ptep = pteval;
-}
-
-static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
-       *pmdp = pmdval;
-}
-
-#else /* CONFIG_X86_PAE */
-
-static fastcall void native_set_pte(pte_t *ptep, pte_t pte)
-{
-       ptep->pte_high = pte.pte_high;
-       smp_wmb();
-       ptep->pte_low = pte.pte_low;
-}
-
-static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
-{
-       ptep->pte_high = pte.pte_high;
-       smp_wmb();
-       ptep->pte_low = pte.pte_low;
-}
-
-static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
-{
-       ptep->pte_low = 0;
-       smp_wmb();
-       ptep->pte_high = pte.pte_high;
-       smp_wmb();
-       ptep->pte_low = pte.pte_low;
-}
-
-static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
-{
-       set_64bit((unsigned long long *)ptep,pte_val(pteval));
-}
-
-static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
-       set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
-}
-
-static fastcall void native_set_pud(pud_t *pudp, pud_t pudval)
-{
-       *pudp = pudval;
-}
-
-static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-       ptep->pte_low = 0;
-       smp_wmb();
-       ptep->pte_high = 0;
-}
-
-static fastcall void native_pmd_clear(pmd_t *pmd)
-{
-       u32 *tmp = (u32 *)pmd;
-       *tmp = 0;
-       smp_wmb();
-       *(tmp + 1) = 0;
-}
-#endif /* CONFIG_X86_PAE */
-
 /* These are in entry.S */
-extern fastcall void native_iret(void);
-extern fastcall void native_irq_enable_sysexit(void);
+extern void native_iret(void);
+extern void native_irq_enable_sysexit(void);
 
 static int __init print_banner(void)
 {
@@ -482,21 +229,19 @@ static int __init print_banner(void)
 }
 core_initcall(print_banner);
 
-/* We simply declare start_kernel to be the paravirt probe of last resort. */
-paravirt_probe(start_kernel);
-
 struct paravirt_ops paravirt_ops = {
        .name = "bare hardware",
        .paravirt_enabled = 0,
        .kernel_rpl = 0,
+       .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
 
        .patch = native_patch,
        .banner = default_banner,
-       .arch_setup = native_nop,
+       .arch_setup = paravirt_nop,
        .memory_setup = machine_specific_memory_setup,
        .get_wallclock = native_get_wallclock,
        .set_wallclock = native_set_wallclock,
-       .time_init = time_init_hook,
+       .time_init = hpet_time_init,
        .init_IRQ = native_init_IRQ,
 
        .cpuid = native_cpuid,
@@ -519,10 +264,12 @@ struct paravirt_ops paravirt_ops = {
        .safe_halt = native_safe_halt,
        .halt = native_halt,
        .wbinvd = native_wbinvd,
-       .read_msr = native_read_msr,
-       .write_msr = native_write_msr,
+       .read_msr = native_read_msr_safe,
+       .write_msr = native_write_msr_safe,
        .read_tsc = native_read_tsc,
        .read_pmc = native_read_pmc,
+       .get_scheduled_cycles = native_read_tsc,
+       .get_cpu_khz = native_calculate_cpu_khz,
        .load_tr_desc = native_load_tr_desc,
        .set_ldt = native_set_ldt,
        .load_gdt = native_load_gdt,
@@ -531,46 +278,71 @@ struct paravirt_ops paravirt_ops = {
        .store_idt = native_store_idt,
        .store_tr = native_store_tr,
        .load_tls = native_load_tls,
-       .write_ldt_entry = native_write_ldt_entry,
-       .write_gdt_entry = native_write_gdt_entry,
-       .write_idt_entry = native_write_idt_entry,
+       .write_ldt_entry = write_dt_entry,
+       .write_gdt_entry = write_dt_entry,
+       .write_idt_entry = write_dt_entry,
        .load_esp0 = native_load_esp0,
 
        .set_iopl_mask = native_set_iopl_mask,
        .io_delay = native_io_delay,
-       .const_udelay = __const_udelay,
 
 #ifdef CONFIG_X86_LOCAL_APIC
        .apic_write = native_apic_write,
        .apic_write_atomic = native_apic_write_atomic,
        .apic_read = native_apic_read,
+       .setup_boot_clock = setup_boot_APIC_clock,
+       .setup_secondary_clock = setup_secondary_APIC_clock,
+       .startup_ipi_hook = paravirt_nop,
 #endif
+       .set_lazy_mode = paravirt_nop,
+
+       .pagetable_setup_start = native_pagetable_setup_start,
+       .pagetable_setup_done = native_pagetable_setup_done,
 
        .flush_tlb_user = native_flush_tlb,
        .flush_tlb_kernel = native_flush_tlb_global,
        .flush_tlb_single = native_flush_tlb_single,
+       .flush_tlb_others = native_flush_tlb_others,
+
+       .alloc_pt = paravirt_nop,
+       .alloc_pd = paravirt_nop,
+       .alloc_pd_clone = paravirt_nop,
+       .release_pt = paravirt_nop,
+       .release_pd = paravirt_nop,
 
        .set_pte = native_set_pte,
        .set_pte_at = native_set_pte_at,
        .set_pmd = native_set_pmd,
-       .pte_update = (void *)native_nop,
-       .pte_update_defer = (void *)native_nop,
+       .pte_update = paravirt_nop,
+       .pte_update_defer = paravirt_nop,
+
+#ifdef CONFIG_HIGHPTE
+       .kmap_atomic_pte = kmap_atomic,
+#endif
+
 #ifdef CONFIG_X86_PAE
        .set_pte_atomic = native_set_pte_atomic,
        .set_pte_present = native_set_pte_present,
        .set_pud = native_set_pud,
        .pte_clear = native_pte_clear,
        .pmd_clear = native_pmd_clear,
+
+       .pmd_val = native_pmd_val,
+       .make_pmd = native_make_pmd,
 #endif
 
+       .pte_val = native_pte_val,
+       .pgd_val = native_pgd_val,
+
+       .make_pte = native_make_pte,
+       .make_pgd = native_make_pgd,
+
        .irq_enable_sysexit = native_irq_enable_sysexit,
        .iret = native_iret,
+
+       .dup_mmap = paravirt_nop,
+       .exit_mmap = paravirt_nop,
+       .activate_mm = paravirt_nop,
 };
 
-/*
- * NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops
- * semantics are subject to change. Hence we only do this
- * internal-only export of this, until it gets sorted out and
- * all lowlevel CPU ops used by modules are separately exported.
- */
-EXPORT_SYMBOL_GPL(paravirt_ops);
+EXPORT_SYMBOL(paravirt_ops);