Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 8 Apr 2014 19:02:28 +0000 (12:02 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 8 Apr 2014 19:02:28 +0000 (12:02 -0700)
Pull second set of s390 patches from Martin Schwidefsky:
 "The second part of Heikos uaccess rework, the page table walker for
  uaccess is now a thing of the past (yay!)

  The code change to fix the theoretical TLB flush problem allows us to
  add a TLB flush optimization for zEC12, this machine has new
  instructions that allow to do CPU local TLB flushes for single pages
  and for all pages of a specific address space.

  Plus the usual bug fixing and some more cleanup"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/uaccess: rework uaccess code - fix locking issues
  s390/mm,tlb: optimize TLB flushing for zEC12
  s390/mm,tlb: safeguard against speculative TLB creation
  s390/irq: Use defines for external interruption codes
  s390/irq: Add defines for external interruption codes
  s390/sclp: add timeout for queued requests
  kvm/s390: also set guest pages back to stable on kexec/kdump
  lcs: Add missing destroy_timer_on_stack()
  s390/tape: Add missing destroy_timer_on_stack()
  s390/tape: Use del_timer_sync()
  s390/3270: fix crash with multiple reset device requests
  s390/bitops,atomic: add missing memory barriers
  s390/zcrypt: add length check for aligned data to avoid overflow in msg-type 6

1  2 
arch/s390/include/asm/irq.h
arch/s390/include/asm/pgtable.h
arch/s390/kernel/irq.c
arch/s390/kernel/smp.c
arch/s390/kvm/diag.c
arch/s390/mm/fault.c
arch/s390/mm/pgtable.c
net/iucv/iucv.c

index 35f0faab53611214347cbec4129197747e38af18,763ccdcb70ba5b2aaebe9e41a10c4869059ce5bb..c4dd400a27917b7fa38a4e6daf66d91b5e88e688
  /* This number is used when no interrupt has been assigned */
  #define NO_IRQ                0
  
+ /* External interruption codes */
+ #define EXT_IRQ_INTERRUPT_KEY 0x0040
+ #define EXT_IRQ_CLK_COMP      0x1004
+ #define EXT_IRQ_CPU_TIMER     0x1005
+ #define EXT_IRQ_WARNING_TRACK 0x1007
+ #define EXT_IRQ_MALFUNC_ALERT 0x1200
+ #define EXT_IRQ_EMERGENCY_SIG 0x1201
+ #define EXT_IRQ_EXTERNAL_CALL 0x1202
+ #define EXT_IRQ_TIMING_ALERT  0x1406
+ #define EXT_IRQ_MEASURE_ALERT 0x1407
+ #define EXT_IRQ_SERVICE_SIG   0x2401
+ #define EXT_IRQ_CP_SERVICE    0x2603
+ #define EXT_IRQ_IUCV          0x4000
  #ifndef __ASSEMBLY__
  
  #include <linux/hardirq.h>
@@@ -53,7 -67,6 +67,7 @@@ enum interruption_class 
        IRQIO_PCI,
        IRQIO_MSI,
        IRQIO_VIR,
 +      IRQIO_VAI,
        NMI_NMI,
        CPU_RST,
        NR_ARCH_IRQS
@@@ -77,8 -90,8 +91,8 @@@ struct ext_code 
  
  typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long);
  
- int register_external_interrupt(u16 code, ext_int_handler_t handler);
- int unregister_external_interrupt(u16 code, ext_int_handler_t handler);
+ int register_external_irq(u16 code, ext_int_handler_t handler);
+ int unregister_external_irq(u16 code, ext_int_handler_t handler);
  
  enum irq_subclass {
        IRQ_SUBCLASS_MEASUREMENT_ALERT = 5,
index 50a75d96f9394faeb60a4ca5d8ca0f1411d754c0,66d51834f2cb24a4e74fd6e98cb3541fb45461cd..12f75313e086d4695ee768bde41beac4d3418de9
@@@ -782,7 -782,6 +782,7 @@@ static inline void pgste_set_pte(pte_t 
   * @table: pointer to the page directory
   * @asce: address space control element for gmap page table
   * @crst_list: list of all crst tables used in the guest address space
 + * @pfault_enabled: defines if pfaults are applicable for the guest
   */
  struct gmap {
        struct list_head list;
        unsigned long asce;
        void *private;
        struct list_head crst_list;
 +      bool pfault_enabled;
  };
  
  /**
@@@ -1070,12 -1068,35 +1070,35 @@@ static inline void __ptep_ipte(unsigne
                : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
  }
  
+ static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep)
+ {
+       unsigned long pto = (unsigned long) ptep;
+ #ifndef CONFIG_64BIT
+       /* pto in ESA mode must point to the start of the segment table */
+       pto &= 0x7ffffc00;
+ #endif
+       /* Invalidation + local TLB flush for the pte */
+       asm volatile(
+               "       .insn rrf,0xb2210000,%2,%3,0,1"
+               : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+ }
  static inline void ptep_flush_direct(struct mm_struct *mm,
                                     unsigned long address, pte_t *ptep)
  {
+       int active, count;
        if (pte_val(*ptep) & _PAGE_INVALID)
                return;
-       __ptep_ipte(address, ptep);
+       active = (mm == current->active_mm) ? 1 : 0;
+       count = atomic_add_return(0x10000, &mm->context.attach_count);
+       if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+           cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+               __ptep_ipte_local(address, ptep);
+       else
+               __ptep_ipte(address, ptep);
+       atomic_sub(0x10000, &mm->context.attach_count);
  }
  
  static inline void ptep_flush_lazy(struct mm_struct *mm,
@@@ -1384,35 -1405,6 +1407,6 @@@ static inline pmd_t *pmd_offset(pud_t *
  #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
  #define pte_unmap(pte) do { } while (0)
  
- static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
- {
-       unsigned long sto = (unsigned long) pmdp -
-                           pmd_index(address) * sizeof(pmd_t);
-       if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) {
-               asm volatile(
-                       "       .insn   rrf,0xb98e0000,%2,%3,0,0"
-                       : "=m" (*pmdp)
-                       : "m" (*pmdp), "a" (sto),
-                         "a" ((address & HPAGE_MASK))
-                       : "cc"
-               );
-       }
- }
- static inline void __pmd_csp(pmd_t *pmdp)
- {
-       register unsigned long reg2 asm("2") = pmd_val(*pmdp);
-       register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
-                                              _SEGMENT_ENTRY_INVALID;
-       register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
-       asm volatile(
-               "       csp %1,%3"
-               : "=m" (*pmdp)
-               : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
- }
  #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
  static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
  {
@@@ -1481,18 -1473,80 +1475,80 @@@ static inline pmd_t pmd_mkwrite(pmd_t p
  }
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
  
+ static inline void __pmdp_csp(pmd_t *pmdp)
+ {
+       register unsigned long reg2 asm("2") = pmd_val(*pmdp);
+       register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
+                                              _SEGMENT_ENTRY_INVALID;
+       register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
+       asm volatile(
+               "       csp %1,%3"
+               : "=m" (*pmdp)
+               : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+ }
+ static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
+ {
+       unsigned long sto;
+       sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
+       asm volatile(
+               "       .insn   rrf,0xb98e0000,%2,%3,0,0"
+               : "=m" (*pmdp)
+               : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+               : "cc" );
+ }
+ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
+ {
+       unsigned long sto;
+       sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
+       asm volatile(
+               "       .insn   rrf,0xb98e0000,%2,%3,0,1"
+               : "=m" (*pmdp)
+               : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+               : "cc" );
+ }
+ static inline void pmdp_flush_direct(struct mm_struct *mm,
+                                    unsigned long address, pmd_t *pmdp)
+ {
+       int active, count;
+       if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+               return;
+       if (!MACHINE_HAS_IDTE) {
+               __pmdp_csp(pmdp);
+               return;
+       }
+       active = (mm == current->active_mm) ? 1 : 0;
+       count = atomic_add_return(0x10000, &mm->context.attach_count);
+       if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+           cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+               __pmdp_idte_local(address, pmdp);
+       else
+               __pmdp_idte(address, pmdp);
+       atomic_sub(0x10000, &mm->context.attach_count);
+ }
  static inline void pmdp_flush_lazy(struct mm_struct *mm,
                                   unsigned long address, pmd_t *pmdp)
  {
        int active, count;
  
+       if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+               return;
        active = (mm == current->active_mm) ? 1 : 0;
        count = atomic_add_return(0x10000, &mm->context.attach_count);
        if ((count & 0xffff) <= active) {
                pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
                mm->context.flush_mm = 1;
-       } else
-               __pmd_idte(address, pmdp);
+       } else if (MACHINE_HAS_IDTE)
+               __pmdp_idte(address, pmdp);
+       else
+               __pmdp_csp(pmdp);
        atomic_sub(0x10000, &mm->context.attach_count);
  }
  
@@@ -1545,7 -1599,7 +1601,7 @@@ static inline int pmdp_test_and_clear_y
        pmd_t pmd;
  
        pmd = *pmdp;
-       __pmd_idte(address, pmdp);
+       pmdp_flush_direct(vma->vm_mm, address, pmdp);
        *pmdp = pmd_mkold(pmd);
        return pmd_young(pmd);
  }
@@@ -1556,7 -1610,7 +1612,7 @@@ static inline pmd_t pmdp_get_and_clear(
  {
        pmd_t pmd = *pmdp;
  
-       __pmd_idte(address, pmdp);
+       pmdp_flush_direct(mm, address, pmdp);
        pmd_clear(pmdp);
        return pmd;
  }
@@@ -1572,7 -1626,7 +1628,7 @@@ static inline pmd_t pmdp_clear_flush(st
  static inline void pmdp_invalidate(struct vm_area_struct *vma,
                                   unsigned long address, pmd_t *pmdp)
  {
-       __pmd_idte(address, pmdp);
+       pmdp_flush_direct(vma->vm_mm, address, pmdp);
  }
  
  #define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@@ -1582,7 -1636,7 +1638,7 @@@ static inline void pmdp_set_wrprotect(s
        pmd_t pmd = *pmdp;
  
        if (pmd_write(pmd)) {
-               __pmd_idte(address, pmdp);
+               pmdp_flush_direct(mm, address, pmdp);
                set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd));
        }
  }
diff --combined arch/s390/kernel/irq.c
index d42b14cc72a4516efa4c976a8e84ad47b97cbaa2,083617d739d81df198e30e099c3f04645f87bbc9..c7463aa0014b5a499d254028c77770f4ec4c7f07
@@@ -18,7 -18,6 +18,7 @@@
  #include <linux/errno.h>
  #include <linux/slab.h>
  #include <linux/cpu.h>
 +#include <linux/irq.h>
  #include <asm/irq_regs.h>
  #include <asm/cputime.h>
  #include <asm/lowcore.h>
@@@ -85,7 -84,6 +85,7 @@@ static const struct irq_class irqclass_
        [IRQIO_PCI]  = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
        [IRQIO_MSI]  = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
        [IRQIO_VIR]  = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
 +      [IRQIO_VAI]  = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
        [NMI_NMI]    = {.name = "NMI", .desc = "[NMI] Machine Check"},
        [CPU_RST]    = {.name = "RST", .desc = "[CPU] CPU Restart"},
  };
@@@ -207,7 -205,7 +207,7 @@@ static inline int ext_hash(u16 code
        return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1);
  }
  
- int register_external_interrupt(u16 code, ext_int_handler_t handler)
+ int register_external_irq(u16 code, ext_int_handler_t handler)
  {
        struct ext_int_info *p;
        unsigned long flags;
        spin_unlock_irqrestore(&ext_int_hash_lock, flags);
        return 0;
  }
- EXPORT_SYMBOL(register_external_interrupt);
+ EXPORT_SYMBOL(register_external_irq);
  
- int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
+ int unregister_external_irq(u16 code, ext_int_handler_t handler)
  {
        struct ext_int_info *p;
        unsigned long flags;
        spin_unlock_irqrestore(&ext_int_hash_lock, flags);
        return 0;
  }
- EXPORT_SYMBOL(unregister_external_interrupt);
+ EXPORT_SYMBOL(unregister_external_irq);
  
  static irqreturn_t do_ext_interrupt(int irq, void *dummy)
  {
        int index;
  
        ext_code = *(struct ext_code *) &regs->int_code;
-       if (ext_code.code != 0x1004)
+       if (ext_code.code != EXT_IRQ_CLK_COMP)
                __get_cpu_var(s390_idle).nohz_delay = 1;
  
        index = ext_hash(ext_code.code);
diff --combined arch/s390/kernel/smp.c
index 5a640b395bd4d37ced17d847fd01e6869b487363,42a501d13a3b94300527ad8b8f388fb5128f5def..512ce1cde2a4ca03c88350db6520581436968ba9
@@@ -236,6 -236,9 +236,9 @@@ static void pcpu_prepare_secondary(stru
  {
        struct _lowcore *lc = pcpu->lowcore;
  
+       if (MACHINE_HAS_TLB_LC)
+               cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+       cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
        atomic_inc(&init_mm.context.attach_count);
        lc->cpu_nr = cpu;
        lc->percpu_offset = __per_cpu_offset[cpu];
@@@ -760,6 -763,9 +763,9 @@@ void __cpu_die(unsigned int cpu
                cpu_relax();
        pcpu_free_lowcore(pcpu);
        atomic_dec(&init_mm.context.attach_count);
+       cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+       if (MACHINE_HAS_TLB_LC)
+               cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
  }
  
  void __noreturn cpu_die(void)
@@@ -785,10 -791,10 +791,10 @@@ void __init smp_fill_possible_mask(void
  void __init smp_prepare_cpus(unsigned int max_cpus)
  {
        /* request the 0x1201 emergency signal external interrupt */
-       if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
+       if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
                panic("Couldn't request external interrupt 0x1201");
        /* request the 0x1202 external call external interrupt */
-       if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0)
+       if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
                panic("Couldn't request external interrupt 0x1202");
        smp_detect_cpus();
  }
@@@ -1057,24 -1063,19 +1063,24 @@@ static DEVICE_ATTR(rescan, 0200, NULL, 
  
  static int __init s390_smp_init(void)
  {
 -      int cpu, rc;
 +      int cpu, rc = 0;
  
 -      hotcpu_notifier(smp_cpu_notify, 0);
  #ifdef CONFIG_HOTPLUG_CPU
        rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
        if (rc)
                return rc;
  #endif
 +      cpu_notifier_register_begin();
        for_each_present_cpu(cpu) {
                rc = smp_add_present_cpu(cpu);
                if (rc)
 -                      return rc;
 +                      goto out;
        }
 -      return 0;
 +
 +      __hotcpu_notifier(smp_cpu_notify, 0);
 +
 +out:
 +      cpu_notifier_register_done();
 +      return rc;
  }
  subsys_initcall(s390_smp_init);
diff --combined arch/s390/kvm/diag.c
index 03a05ffb662f98d426302cffb4b08cb5cade7fac,1facd9277f38648830165769a1d1e63ad8d99b54..08dfc839a6cfeeb3655f64d850ce1ed6e60d49cc
@@@ -18,7 -18,6 +18,7 @@@
  #include "kvm-s390.h"
  #include "trace.h"
  #include "trace-s390.h"
 +#include "gaccess.h"
  
  static int diag_release_pages(struct kvm_vcpu *vcpu)
  {
        return 0;
  }
  
 +static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 +{
 +      struct prs_parm {
 +              u16 code;
 +              u16 subcode;
 +              u16 parm_len;
 +              u16 parm_version;
 +              u64 token_addr;
 +              u64 select_mask;
 +              u64 compare_mask;
 +              u64 zarch;
 +      };
 +      struct prs_parm parm;
 +      int rc;
 +      u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
 +      u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
 +      unsigned long hva_token = KVM_HVA_ERR_BAD;
 +
 +      if (vcpu->run->s.regs.gprs[rx] & 7)
 +              return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 +      if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
 +              return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 +      if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
 +              return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 +
 +      switch (parm.subcode) {
 +      case 0: /* TOKEN */
 +              if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
 +                      /*
 +                       * If the pagefault handshake is already activated,
 +                       * the token must not be changed.  We have to return
 +                       * decimal 8 instead, as mandated in SC24-6084.
 +                       */
 +                      vcpu->run->s.regs.gprs[ry] = 8;
 +                      return 0;
 +              }
 +
 +              if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
 +                  parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
 +                      return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 +
 +              hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
 +              if (kvm_is_error_hva(hva_token))
 +                      return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 +
 +              vcpu->arch.pfault_token = parm.token_addr;
 +              vcpu->arch.pfault_select = parm.select_mask;
 +              vcpu->arch.pfault_compare = parm.compare_mask;
 +              vcpu->run->s.regs.gprs[ry] = 0;
 +              rc = 0;
 +              break;
 +      case 1: /*
 +               * CANCEL
 +               * Specification allows to let already pending tokens survive
 +               * the cancel, therefore to reduce code complexity, we assume
 +               * all outstanding tokens are already pending.
 +               */
 +              if (parm.token_addr || parm.select_mask ||
 +                  parm.compare_mask || parm.zarch)
 +                      return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 +
 +              vcpu->run->s.regs.gprs[ry] = 0;
 +              /*
 +               * If the pfault handling was not established or is already
 +               * canceled SC24-6084 requests to return decimal 4.
 +               */
 +              if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
 +                      vcpu->run->s.regs.gprs[ry] = 4;
 +              else
 +                      vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
 +
 +              rc = 0;
 +              break;
 +      default:
 +              rc = -EOPNOTSUPP;
 +              break;
 +      }
 +
 +      return rc;
 +}
 +
  static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
  {
        VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
@@@ -167,6 -85,10 +167,10 @@@ static int __diag_ipl_functions(struct 
  
        VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
        switch (subcode) {
+       case 0:
+       case 1:
+               page_table_reset_pgste(current->mm, 0, TASK_SIZE);
+               return -EOPNOTSUPP;
        case 3:
                vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
                page_table_reset_pgste(current->mm, 0, TASK_SIZE);
@@@ -235,8 -157,6 +239,8 @@@ int kvm_s390_handle_diag(struct kvm_vcp
                return __diag_time_slice_end(vcpu);
        case 0x9c:
                return __diag_time_slice_end_directed(vcpu);
 +      case 0x258:
 +              return __diag_page_ref_service(vcpu);
        case 0x308:
                return __diag_ipl_functions(vcpu);
        case 0x500:
diff --combined arch/s390/mm/fault.c
index 88cef505453bfb14aa21fabb745dcd781906ee5e,f93e6c2d4ba5e15117c79e732ca827d57d7b79c9..19f623f1f21c4134fc5f6a803d60b4a8d47f3b32
@@@ -50,7 -50,6 +50,7 @@@
  #define VM_FAULT_BADMAP               0x020000
  #define VM_FAULT_BADACCESS    0x040000
  #define VM_FAULT_SIGNAL               0x080000
 +#define VM_FAULT_PFAULT               0x100000
  
  static unsigned long store_indication __read_mostly;
  
@@@ -106,21 -105,24 +106,24 @@@ void bust_spinlocks(int yes
   * Returns the address space associated with the fault.
   * Returns 0 for kernel space and 1 for user space.
   */
- static inline int user_space_fault(unsigned long trans_exc_code)
+ static inline int user_space_fault(struct pt_regs *regs)
  {
+       unsigned long trans_exc_code;
        /*
         * The lowest two bits of the translation exception
         * identification indicate which paging table was used.
         */
-       trans_exc_code &= 3;
-       if (trans_exc_code == 2)
-               /* Access via secondary space, set_fs setting decides */
+       trans_exc_code = regs->int_parm_long & 3;
+       if (trans_exc_code == 3) /* home space -> kernel */
+               return 0;
+       if (user_mode(regs))
+               return 1;
+       if (trans_exc_code == 2) /* secondary space -> set_fs */
                return current->thread.mm_segment.ar4;
-       /*
-        * Access via primary space or access register is from user space
-        * and access via home space is from the kernel.
-        */
-       return trans_exc_code != 3;
+       if (current->flags & PF_VCPU)
+               return 1;
+       return 0;
  }
  
  static inline void report_user_fault(struct pt_regs *regs, long signr)
@@@ -172,7 -174,7 +175,7 @@@ static noinline void do_no_context(stru
         * terminate things with extreme prejudice.
         */
        address = regs->int_parm_long & __FAIL_ADDR_MASK;
-       if (!user_space_fault(regs->int_parm_long))
+       if (!user_space_fault(regs))
                printk(KERN_ALERT "Unable to handle kernel pointer dereference"
                       " at virtual kernel address %p\n", (void *)address);
        else
@@@ -228,7 -230,6 +231,7 @@@ static noinline void do_fault_error(str
                        return;
                }
        case VM_FAULT_BADCONTEXT:
 +      case VM_FAULT_PFAULT:
                do_no_context(regs);
                break;
        case VM_FAULT_SIGNAL:
   */
  static inline int do_exception(struct pt_regs *regs, int access)
  {
 +#ifdef CONFIG_PGSTE
 +      struct gmap *gmap;
 +#endif
        struct task_struct *tsk;
        struct mm_struct *mm;
        struct vm_area_struct *vma;
         * user context.
         */
        fault = VM_FAULT_BADCONTEXT;
-       if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
+       if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
                goto out;
  
        address = trans_exc_code & __FAIL_ADDR_MASK;
        down_read(&mm->mmap_sem);
  
  #ifdef CONFIG_PGSTE
 -      if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
 -              address = __gmap_fault(address,
 -                                   (struct gmap *) S390_lowcore.gmap);
 +      gmap = (struct gmap *)
 +              ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
 +      if (gmap) {
 +              address = __gmap_fault(address, gmap);
                if (address == -EFAULT) {
                        fault = VM_FAULT_BADMAP;
                        goto out_up;
                        fault = VM_FAULT_OOM;
                        goto out_up;
                }
 +              if (gmap->pfault_enabled)
 +                      flags |= FAULT_FLAG_RETRY_NOWAIT;
        }
  #endif
  
@@@ -379,19 -374,9 +382,19 @@@ retry
                                      regs, address);
                }
                if (fault & VM_FAULT_RETRY) {
 +#ifdef CONFIG_PGSTE
 +                      if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
 +                              /* FAULT_FLAG_RETRY_NOWAIT has been set,
 +                               * mmap_sem has not been released */
 +                              current->thread.gmap_pfault = 1;
 +                              fault = VM_FAULT_PFAULT;
 +                              goto out_up;
 +                      }
 +#endif
                        /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
                         * of starvation. */
 -                      flags &= ~FAULT_FLAG_ALLOW_RETRY;
 +                      flags &= ~(FAULT_FLAG_ALLOW_RETRY |
 +                                 FAULT_FLAG_RETRY_NOWAIT);
                        flags |= FAULT_FLAG_TRIED;
                        down_read(&mm->mmap_sem);
                        goto retry;
@@@ -441,30 -426,6 +444,6 @@@ void __kprobes do_dat_exception(struct 
                do_fault_error(regs, fault);
  }
  
- int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
- {
-       struct pt_regs regs;
-       int access, fault;
-       /* Emulate a uaccess fault from kernel mode. */
-       regs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK;
-       if (!irqs_disabled())
-               regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT;
-       regs.psw.addr = (unsigned long) __builtin_return_address(0);
-       regs.psw.addr |= PSW_ADDR_AMODE;
-       regs.int_code = pgm_int_code;
-       regs.int_parm_long = (uaddr & PAGE_MASK) | 2;
-       access = write ? VM_WRITE : VM_READ;
-       fault = do_exception(&regs, access);
-       /*
-        * Since the fault happened in kernel mode while performing a uaccess
-        * all we need to do now is emulating a fixup in case "fault" is not
-        * zero.
-        * For the calling uaccess functions this results always in -EFAULT.
-        */
-       return fault ? -EFAULT : 0;
- }
  #ifdef CONFIG_PFAULT 
  /*
   * 'pfault' pseudo page faults routines.
@@@ -645,7 -606,7 +624,7 @@@ static int __init pfault_irq_init(void
  {
        int rc;
  
-       rc = register_external_interrupt(0x2603, pfault_interrupt);
+       rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
        if (rc)
                goto out_extint;
        rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
        return 0;
  
  out_pfault:
-       unregister_external_interrupt(0x2603, pfault_interrupt);
+       unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
  out_extint:
        pfault_disable = 1;
        return rc;
diff --combined arch/s390/mm/pgtable.c
index 5d8324cd866b1b089e0193769d1b11c1585d0606,b5745dc9c6b506fb951ba6bf0de33e261c770297..d7cfd57815fbe484283819a98745f7e26e6d1d47
@@@ -54,7 -54,7 +54,7 @@@ static void __crst_table_upgrade(void *
        struct mm_struct *mm = arg;
  
        if (current->active_mm == mm)
-               update_mm(mm, current);
+               update_user_asce(mm, 1);
        __tlb_flush_local();
  }
  
@@@ -107,8 -107,10 +107,10 @@@ void crst_table_downgrade(struct mm_str
  {
        pgd_t *pgd;
  
-       if (current->active_mm == mm)
+       if (current->active_mm == mm) {
+               clear_user_asce(mm, 1);
                __tlb_flush_mm(mm);
+       }
        while (mm->context.asce_limit > limit) {
                pgd = mm->pgd;
                switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
                crst_table_free(mm, (unsigned long *) pgd);
        }
        if (current->active_mm == mm)
-               update_mm(mm, current);
+               update_user_asce(mm, 1);
  }
  #endif
  
@@@ -198,7 -200,7 +200,7 @@@ static int gmap_unlink_segment(struct g
  static void gmap_flush_tlb(struct gmap *gmap)
  {
        if (MACHINE_HAS_IDTE)
-               __tlb_flush_idte((unsigned long) gmap->table |
+               __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
                                 _ASCE_TYPE_REGION1);
        else
                __tlb_flush_global();
@@@ -217,7 -219,7 +219,7 @@@ void gmap_free(struct gmap *gmap
  
        /* Flush tlb. */
        if (MACHINE_HAS_IDTE)
-               __tlb_flush_idte((unsigned long) gmap->table |
+               __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
                                 _ASCE_TYPE_REGION1);
        else
                __tlb_flush_global();
@@@ -505,9 -507,6 +507,9 @@@ static int gmap_connect_pgtable(unsigne
        if (!pmd_present(*pmd) &&
            __pte_alloc(mm, vma, pmd, vmaddr))
                return -ENOMEM;
 +      /* large pmds cannot yet be handled */
 +      if (pmd_large(*pmd))
 +              return -EFAULT;
        /* pmd now points to a valid segment table entry. */
        rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
        if (!rmap)
diff --combined net/iucv/iucv.c
index 79a0ce95799fb26e7b96402afef67f4085dd10bf,12afba10a61f61549e39b1c775dc7401c4b846d4..da787930df0ab643a81bfa1f3f554efb2cd41bee
@@@ -621,42 -621,6 +621,42 @@@ static void iucv_disable(void
        put_online_cpus();
  }
  
 +static void free_iucv_data(int cpu)
 +{
 +      kfree(iucv_param_irq[cpu]);
 +      iucv_param_irq[cpu] = NULL;
 +      kfree(iucv_param[cpu]);
 +      iucv_param[cpu] = NULL;
 +      kfree(iucv_irq_data[cpu]);
 +      iucv_irq_data[cpu] = NULL;
 +}
 +
 +static int alloc_iucv_data(int cpu)
 +{
 +      /* Note: GFP_DMA used to get memory below 2G */
 +      iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
 +                           GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 +      if (!iucv_irq_data[cpu])
 +              goto out_free;
 +
 +      /* Allocate parameter blocks. */
 +      iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
 +                        GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 +      if (!iucv_param[cpu])
 +              goto out_free;
 +
 +      iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param),
 +                        GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 +      if (!iucv_param_irq[cpu])
 +              goto out_free;
 +
 +      return 0;
 +
 +out_free:
 +      free_iucv_data(cpu);
 +      return -ENOMEM;
 +}
 +
  static int iucv_cpu_notify(struct notifier_block *self,
                                     unsigned long action, void *hcpu)
  {
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
 -              iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
 -                                      GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 -              if (!iucv_irq_data[cpu])
 -                      return notifier_from_errno(-ENOMEM);
 -
 -              iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
 -                                   GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 -              if (!iucv_param[cpu]) {
 -                      kfree(iucv_irq_data[cpu]);
 -                      iucv_irq_data[cpu] = NULL;
 +              if (alloc_iucv_data(cpu))
                        return notifier_from_errno(-ENOMEM);
 -              }
 -              iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param),
 -                                      GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 -              if (!iucv_param_irq[cpu]) {
 -                      kfree(iucv_param[cpu]);
 -                      iucv_param[cpu] = NULL;
 -                      kfree(iucv_irq_data[cpu]);
 -                      iucv_irq_data[cpu] = NULL;
 -                      return notifier_from_errno(-ENOMEM);
 -              }
                break;
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
 -              kfree(iucv_param_irq[cpu]);
 -              iucv_param_irq[cpu] = NULL;
 -              kfree(iucv_param[cpu]);
 -              iucv_param[cpu] = NULL;
 -              kfree(iucv_irq_data[cpu]);
 -              iucv_irq_data[cpu] = NULL;
 +              free_iucv_data(cpu);
                break;
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
@@@ -2028,7 -2016,7 +2028,7 @@@ static int __init iucv_init(void
        rc = iucv_query_maxconn();
        if (rc)
                goto out_ctl;
-       rc = register_external_interrupt(0x4000, iucv_external_interrupt);
+       rc = register_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
        if (rc)
                goto out_ctl;
        iucv_root = root_device_register("iucv");
                goto out_int;
        }
  
 -      for_each_online_cpu(cpu) {
 -              /* Note: GFP_DMA used to get memory below 2G */
 -              iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
 -                                   GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 -              if (!iucv_irq_data[cpu]) {
 -                      rc = -ENOMEM;
 -                      goto out_free;
 -              }
 +      cpu_notifier_register_begin();
  
 -              /* Allocate parameter blocks. */
 -              iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
 -                                GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 -              if (!iucv_param[cpu]) {
 -                      rc = -ENOMEM;
 -                      goto out_free;
 -              }
 -              iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param),
 -                                GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 -              if (!iucv_param_irq[cpu]) {
 +      for_each_online_cpu(cpu) {
 +              if (alloc_iucv_data(cpu)) {
                        rc = -ENOMEM;
                        goto out_free;
                }
 -
        }
 -      rc = register_hotcpu_notifier(&iucv_cpu_notifier);
 +      rc = __register_hotcpu_notifier(&iucv_cpu_notifier);
        if (rc)
                goto out_free;
 +
 +      cpu_notifier_register_done();
 +
        rc = register_reboot_notifier(&iucv_reboot_notifier);
        if (rc)
                goto out_cpu;
  out_reboot:
        unregister_reboot_notifier(&iucv_reboot_notifier);
  out_cpu:
 -      unregister_hotcpu_notifier(&iucv_cpu_notifier);
 +      cpu_notifier_register_begin();
 +      __unregister_hotcpu_notifier(&iucv_cpu_notifier);
  out_free:
 -      for_each_possible_cpu(cpu) {
 -              kfree(iucv_param_irq[cpu]);
 -              iucv_param_irq[cpu] = NULL;
 -              kfree(iucv_param[cpu]);
 -              iucv_param[cpu] = NULL;
 -              kfree(iucv_irq_data[cpu]);
 -              iucv_irq_data[cpu] = NULL;
 -      }
 +      for_each_possible_cpu(cpu)
 +              free_iucv_data(cpu);
 +
 +      cpu_notifier_register_done();
 +
        root_device_unregister(iucv_root);
  out_int:
-       unregister_external_interrupt(0x4000, iucv_external_interrupt);
+       unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
  out_ctl:
        ctl_clear_bit(0, 1);
  out:
@@@ -2102,14 -2105,18 +2102,14 @@@ static void __exit iucv_exit(void
                kfree(p);
        spin_unlock_irq(&iucv_queue_lock);
        unregister_reboot_notifier(&iucv_reboot_notifier);
 -      unregister_hotcpu_notifier(&iucv_cpu_notifier);
 -      for_each_possible_cpu(cpu) {
 -              kfree(iucv_param_irq[cpu]);
 -              iucv_param_irq[cpu] = NULL;
 -              kfree(iucv_param[cpu]);
 -              iucv_param[cpu] = NULL;
 -              kfree(iucv_irq_data[cpu]);
 -              iucv_irq_data[cpu] = NULL;
 -      }
 +      cpu_notifier_register_begin();
 +      __unregister_hotcpu_notifier(&iucv_cpu_notifier);
 +      for_each_possible_cpu(cpu)
 +              free_iucv_data(cpu);
 +      cpu_notifier_register_done();
        root_device_unregister(iucv_root);
        bus_unregister(&iucv_bus);
-       unregister_external_interrupt(0x4000, iucv_external_interrupt);
+       unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
  }
  
  subsys_initcall(iucv_init);