Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Sep 2015 22:46:07 +0000 (15:46 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Sep 2015 22:46:07 +0000 (15:46 -0700)
Pull locking and atomic updates from Ingo Molnar:
 "Main changes in this cycle are:

   - Extend atomic primitives with coherent logic op primitives
     (atomic_{or,and,xor}()) and deprecate the old partial APIs
     (atomic_{set,clear}_mask())

     The old ops were incoherent with incompatible signatures across
     architectures and with incomplete support.  Now every architecture
     supports the primitives consistently (by Peter Zijlstra)

   - Generic support for 'relaxed atomics':

       - _acquire/release/relaxed() flavours of xchg(), cmpxchg() and {add,sub}_return()
       - atomic_read_acquire()
       - atomic_set_release()

     This came out of porting qwrlock code to arm64 (by Will Deacon)

   - Clean up the fragile static_key APIs that were causing repeat bugs,
     by introducing a new one:

       DEFINE_STATIC_KEY_TRUE(name);
       DEFINE_STATIC_KEY_FALSE(name);

     which define a key of different types with an initial true/false
     value.

     Then allow:

       static_branch_likely()
       static_branch_unlikely()

     to take a key of either type and emit the right instruction for the
     case.  To be able to know the 'type' of the static key we encode it
     in the jump entry (by Peter Zijlstra)

   - Static key self-tests (by Jason Baron)

   - qrwlock optimizations (by Waiman Long)

   - small futex enhancements (by Davidlohr Bueso)

   - ... and misc other changes"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (63 commits)
  jump_label/x86: Work around asm build bug on older/backported GCCs
  locking, ARM, atomics: Define our SMP atomics in terms of _relaxed() operations
  locking, include/llist: Use linux/atomic.h instead of asm/cmpxchg.h
  locking/qrwlock: Make use of _{acquire|release|relaxed}() atomics
  locking/qrwlock: Implement queue_write_unlock() using smp_store_release()
  locking/lockref: Remove homebrew cmpxchg64_relaxed() macro definition
  locking, asm-generic: Add _{relaxed|acquire|release}() variants for 'atomic_long_t'
  locking, asm-generic: Rework atomic-long.h to avoid bulk code duplication
  locking/atomics: Add _{acquire|release|relaxed}() variants of some atomic operations
  locking, compiler.h: Cast away attributes in the WRITE_ONCE() magic
  locking/static_keys: Make verify_keys() static
  jump label, locking/static_keys: Update docs
  locking/static_keys: Provide a selftest
  jump_label: Provide a self-test
  s390/uaccess, locking/static_keys: employ static_branch_likely()
  x86, tsc, locking/static_keys: Employ static_branch_likely()
  locking/static_keys: Add selftest
  locking/static_keys: Add a new static_key interface
  locking/static_keys: Rework update logic
  locking/static_keys: Add static_key_{en,dis}able() helpers
  ...

14 files changed:
1  2 
Documentation/memory-barriers.txt
arch/arc/include/asm/atomic.h
arch/s390/kernel/jump_label.c
arch/s390/kernel/time.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/lib/uaccess.c
arch/sparc/lib/ksyms.c
arch/x86/kernel/tsc.c
arch/xtensa/include/asm/atomic.h
drivers/s390/scsi/zfcp_fsf.c
kernel/sched/core.c
lib/Kconfig.debug
lib/Makefile

Simple merge
Simple merge
Simple merge
Simple merge
index b277d50dcf76a409072832059438e2f10ca3bc63,57309e9cdd8072d726c15b7869b48bd06e7ba852..5c2c169395c3ca61fe9e906baab078cf2966dba2
@@@ -1178,12 -1172,12 +1178,12 @@@ static int __inject_ckc(struct kvm_vcp
  {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
  
 -      VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
 +      VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external");
        trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
 -                                 0, 0, 2);
 +                                 0, 0);
  
        set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
-       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
        return 0;
  }
  
@@@ -1191,12 -1185,12 +1191,12 @@@ static int __inject_cpu_timer(struct kv
  {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
  
 -      VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
 +      VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external");
        trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
 -                                 0, 0, 2);
 +                                 0, 0);
  
        set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
-       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
        return 0;
  }
  
index 98df53c013439836773e39f21396157eca28bdca,b73302fb05079f63a7fdadd190267b6677728270..c91eb941b444ee7cad8c5a9ea2523495e71e8f2d
@@@ -1283,79 -1198,43 +1283,79 @@@ int kvm_arch_vcpu_init(struct kvm_vcpu 
        return 0;
  }
  
 +/*
 + * Backs up the current FP/VX register save area on a particular
 + * destination.  Used to switch between different register save
 + * areas.
 + */
 +static inline void save_fpu_to(struct fpu *dst)
 +{
 +      dst->fpc = current->thread.fpu.fpc;
 +      dst->flags = current->thread.fpu.flags;
 +      dst->regs = current->thread.fpu.regs;
 +}
 +
 +/*
 + * Switches the FP/VX register save area from which to lazy
 + * restore register contents.
 + */
 +static inline void load_fpu_from(struct fpu *from)
 +{
 +      current->thread.fpu.fpc = from->fpc;
 +      current->thread.fpu.flags = from->flags;
 +      current->thread.fpu.regs = from->regs;
 +}
 +
  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  {
 -      save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
 -      if (test_kvm_facility(vcpu->kvm, 129))
 -              save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
 -      else
 -              save_fp_regs(vcpu->arch.host_fpregs.fprs);
 -      save_access_regs(vcpu->arch.host_acrs);
 +      /* Save host register state */
 +      save_fpu_regs();
 +      save_fpu_to(&vcpu->arch.host_fpregs);
 +
        if (test_kvm_facility(vcpu->kvm, 129)) {
 -              restore_fp_ctl(&vcpu->run->s.regs.fpc);
 -              restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
 -      } else {
 -              restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
 -              restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
 -      }
 +              current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
 +              current->thread.fpu.flags = FPU_USE_VX;
 +              /*
 +               * Use the register save area in the SIE-control block
 +               * for register restore and save in kvm_arch_vcpu_put()
 +               */
 +              current->thread.fpu.vxrs =
 +                      (__vector128 *)&vcpu->run->s.regs.vrs;
 +              /* Always enable the vector extension for KVM */
 +              __ctl_set_vx();
 +      } else
 +              load_fpu_from(&vcpu->arch.guest_fpregs);
 +
 +      if (test_fp_ctl(current->thread.fpu.fpc))
 +              /* User space provided an invalid FPC, let's clear it */
 +              current->thread.fpu.fpc = 0;
 +
 +      save_access_regs(vcpu->arch.host_acrs);
        restore_access_regs(vcpu->run->s.regs.acrs);
        gmap_enable(vcpu->arch.gmap);
-       atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+       atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
  }
  
  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  {
-       atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+       atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
        gmap_disable(vcpu->arch.gmap);
 -      if (test_kvm_facility(vcpu->kvm, 129)) {
 -              save_fp_ctl(&vcpu->run->s.regs.fpc);
 -              save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
 -      } else {
 -              save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
 -              save_fp_regs(vcpu->arch.guest_fpregs.fprs);
 -      }
 -      save_access_regs(vcpu->run->s.regs.acrs);
 -      restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
 +
 +      save_fpu_regs();
 +
        if (test_kvm_facility(vcpu->kvm, 129))
 -              restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
 +              /*
 +               * kvm_arch_vcpu_load() set up the register save area to
 +               * the &vcpu->run->s.regs.vrs and, thus, the vector registers
 +               * are already saved.  Only the floating-point control must be
 +               * copied.
 +               */
 +              vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
        else
 -              restore_fp_regs(vcpu->arch.host_fpregs.fprs);
 +              save_fpu_to(&vcpu->arch.guest_fpregs);
 +      load_fpu_from(&vcpu->arch.host_fpregs);
 +
 +      save_access_regs(vcpu->run->s.regs.acrs);
        restore_access_regs(vcpu->arch.host_acrs);
  }
  
index 0d002a746bec157b9bd38a5006564bba26bb8b04,93cb1d09493dd68729ffdb3fa12be34ab1cd776b..ae4de559e3a04288c6be111de684b3355f35109b
@@@ -370,10 -370,23 +370,10 @@@ long __strncpy_from_user(char *dst, con
  }
  EXPORT_SYMBOL(__strncpy_from_user);
  
 -/*
 - * The "old" uaccess variant without mvcos can be enforced with the
 - * uaccess_primary kernel parameter. This is mainly for debugging purposes.
 - */
 -static int uaccess_primary __initdata;
 -
 -static int __init parse_uaccess_pt(char *__unused)
 -{
 -      uaccess_primary = 1;
 -      return 0;
 -}
 -early_param("uaccess_primary", parse_uaccess_pt);
 -
  static int __init uaccess_init(void)
  {
 -      if (!uaccess_primary && test_facility(27))
 +      if (test_facility(27))
-               static_key_slow_inc(&have_mvcos);
+               static_branch_enable(&have_mvcos);
        return 0;
  }
  early_initcall(uaccess_init);
Simple merge
index 79055cf2c497e8219ed9956eeb732b9234495593,b9cfd462f7e7b530458bde0c3595a8d97d862e67..c8d52cb4cb6e8b9ee9d81cfc9c0fa3603284ce0e
@@@ -284,26 -289,11 +289,19 @@@ u64 native_sched_clock(void
         *   very important for it to be as fast as the platform
         *   can achieve it. )
         */
-       if (!static_key_false(&__use_tsc)) {
-               /* No locking but a rare wrong value is not a big deal: */
-               return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
-       }
-       /* read the Time Stamp Counter: */
-       tsc_now = rdtsc();
  
-       /* return the value in ns */
-       return cycles_2_ns(tsc_now);
+       /* No locking but a rare wrong value is not a big deal: */
+       return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
  }
  
 +/*
 + * Generate a sched_clock if you already have a TSC value.
 + */
 +u64 native_sched_clock_from_tsc(u64 tsc)
 +{
 +      return cycles_2_ns(tsc);
 +}
 +
  /* We need to define a real function for sched_clock, to override the
     weak default version */
  #ifdef CONFIG_PARAVIRT
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc lib/Makefile
Simple merge