Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Nov 2017 22:23:52 +0000 (14:23 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Nov 2017 22:23:52 +0000 (14:23 -0800)
Pull second round of s390 updates from Martin Schwidefsky:

 - rework of the vdso code to avoid the use of the access register mode

 - use perf AUX buffers for the transport of diagnostic sample data

 - add perf_regs and user stack dump support

 - enable perf call graphs for user space programs

 - add perf register support for floating-point registers

 - all remaining s390 related timer_setup conversions

 - bug fixes and cleanups

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (30 commits)
  s390: remove unused parameter from Makefile
  zfcp: purely mechanical update using timer API, plus blank lines
  s390/scsi: Convert timers to use timer_setup()
  s390/cpum_sf: correctly set the PID and TID in perf samples
  s390/cpum_sf: load program parameter at sampler enablement
  s390/perf: add perf register support for floating-point registers
  s390/perf: extend perf_regs support to include floating-point registers
  s390/perf: define common DWARF register string table
  s390/perf: add support for perf_regs and libdw
  s390/perf: add perf_regs support and user stack dump
  s390/cpum_sf: do not register PMU if no sampling mode is authorized
  s390/cpumf: remove raw event support in basic-only sampling mode
  s390/perf: add callback to perf to enable using AUX buffer
  s390/cpumf: enable using AUX buffer
  s390/cpumf: introduce AUX buffer for dump diagnostic sample data
  s390/disassembler: increase show_code buffer size
  s390: Remove CONFIG_HARDENED_USERCOPY
  s390: enable CPU alternatives unconditionally
  s390/nmi: remove unused code
  s390/mm: remove unused code
  ...

64 files changed:
arch/s390/Kconfig
arch/s390/configs/default_defconfig
arch/s390/configs/gcov_defconfig
arch/s390/configs/performance_defconfig
arch/s390/include/asm/alternative.h
arch/s390/include/asm/cpu_mf.h
arch/s390/include/asm/futex.h
arch/s390/include/asm/lowcore.h
arch/s390/include/asm/mmu_context.h
arch/s390/include/asm/perf_event.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/ptrace.h
arch/s390/include/asm/setup.h
arch/s390/include/asm/uaccess.h
arch/s390/include/asm/vdso.h
arch/s390/include/uapi/asm/perf_regs.h [new file with mode: 0644]
arch/s390/kernel/Makefile
arch/s390/kernel/asm-offsets.c
arch/s390/kernel/dis.c
arch/s390/kernel/entry.S
arch/s390/kernel/head64.S
arch/s390/kernel/module.c
arch/s390/kernel/nmi.c
arch/s390/kernel/perf_cpum_sf.c
arch/s390/kernel/perf_regs.c [new file with mode: 0644]
arch/s390/kernel/vdso.c
arch/s390/kernel/vdso32/getcpu.S
arch/s390/kernel/vdso64/clock_gettime.S
arch/s390/kernel/vdso64/getcpu.S
arch/s390/lib/spinlock.c
arch/s390/lib/uaccess.c
arch/s390/mm/fault.c
arch/s390/mm/gmap.c
arch/s390/mm/init.c
arch/s390/mm/pgalloc.c
arch/s390/tools/Makefile
drivers/s390/char/con3215.c
drivers/s390/char/con3270.c
drivers/s390/char/sclp.c
drivers/s390/char/sclp_con.c
drivers/s390/char/sclp_tty.c
drivers/s390/char/sclp_vt220.c
drivers/s390/char/tape_core.c
drivers/s390/char/tty3270.c
drivers/s390/cio/device.c
drivers/s390/cio/device.h
drivers/s390/cio/device_fsm.c
drivers/s390/cio/eadm_sch.c
drivers/s390/cio/qdio.h
drivers/s390/cio/qdio_main.c
drivers/s390/cio/qdio_setup.c
drivers/s390/crypto/ap_bus.c
drivers/s390/crypto/ap_bus.h
drivers/s390/crypto/ap_queue.c
drivers/s390/scsi/zfcp_erp.c
drivers/s390/scsi/zfcp_ext.h
drivers/s390/scsi/zfcp_fsf.c
tools/perf/Makefile.config
tools/perf/arch/s390/include/dwarf-regs-table.h
tools/perf/arch/s390/include/perf_regs.h [new file with mode: 0644]
tools/perf/arch/s390/util/Build
tools/perf/arch/s390/util/auxtrace.c [new file with mode: 0644]
tools/perf/arch/s390/util/dwarf-regs.c
tools/perf/arch/s390/util/unwind-libdw.c [new file with mode: 0644]

index 863a62a6de3cc3f8daa92a9f9ae8c34065dff989..829c67986db7742f3d95a69ed35600fc41e75c55 100644 (file)
@@ -148,6 +148,7 @@ config S390
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
        select HAVE_FUTEX_CMPXCHG if FUTEX
+       select HAVE_GCC_PLUGINS
        select HAVE_KERNEL_BZIP2
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_LZ4
@@ -158,6 +159,8 @@ config S390
        select HAVE_KRETPROBES
        select HAVE_KVM
        select HAVE_LIVEPATCH
+       select HAVE_PERF_REGS
+       select HAVE_PERF_USER_STACK_DUMP
        select HAVE_MEMBLOCK
        select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_MEMBLOCK_PHYS_MAP
@@ -538,22 +541,6 @@ config ARCH_RANDOM
 
          If unsure, say Y.
 
-config ALTERNATIVES
-       def_bool y
-       prompt "Patch optimized instructions for running CPU type"
-       help
-         When enabled the kernel code is compiled with additional
-         alternative instructions blocks optimized for newer CPU types.
-         These alternative instructions blocks are patched at kernel boot
-         time when running CPU supports them. This mechanism is used to
-         optimize some critical code paths (i.e. spinlocks) for newer CPUs
-         even if kernel is build to support older machine generations.
-
-         This mechanism could be disabled by appending "noaltinstr"
-         option to the kernel command line.
-
-         If unsure, say Y.
-
 endmenu
 
 menu "Memory setup"
index 84eccc88c065083594fd71f895857813af2edcdb..5af8458951cf9db3a1802de32111d26c5a355d5b 100644 (file)
@@ -629,6 +629,7 @@ CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
+CONFIG_DMA_API_DEBUG=y
 CONFIG_LKDTM=m
 CONFIG_TEST_LIST_SORT=y
 CONFIG_TEST_SORT=y
@@ -637,14 +638,12 @@ CONFIG_RBTREE_TEST=y
 CONFIG_INTERVAL_TREE_TEST=m
 CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_DMA_API_DEBUG=y
 CONFIG_TEST_BPF=m
 CONFIG_BUG_ON_DATA_CORRUPTION=y
 CONFIG_S390_PTDUMP=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
-CONFIG_HARDENED_USERCOPY=y
 CONFIG_FORTIFY_SOURCE=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
@@ -660,13 +659,11 @@ CONFIG_CRYPTO_PCRYPT=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
index f7202358e6d74ff0e367f12ba6f83d260583fde2..d52eafe57ae86e430b96681e65e5c7dc0cc24b1a 100644 (file)
@@ -587,7 +587,6 @@ CONFIG_BIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
-CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
@@ -605,13 +604,10 @@ CONFIG_CRYPTO_PCRYPT=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
index 03100fe74ea8f8441798378d8cfd30be095e16d4..20ed149e113739afd64c6f016c2384b4aa03411a 100644 (file)
@@ -585,7 +585,6 @@ CONFIG_BIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
-CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
@@ -603,13 +602,10 @@ CONFIG_CRYPTO_PCRYPT=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
index 6c268f6a51d318003ebe42e00cf512508ba937a6..a72002056b54848103fc626338fee956c145bfc9 100644 (file)
@@ -15,14 +15,9 @@ struct alt_instr {
        u8  replacementlen;     /* length of new instruction */
 } __packed;
 
-#ifdef CONFIG_ALTERNATIVES
-extern void apply_alternative_instructions(void);
-extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
-#else
-static inline void apply_alternative_instructions(void) {};
-static inline void apply_alternatives(struct alt_instr *start,
-                                     struct alt_instr *end) {};
-#endif
+void apply_alternative_instructions(void);
+void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+
 /*
  * |661:       |662:     |6620      |663:
  * +-----------+---------------------+
@@ -109,7 +104,6 @@ static inline void apply_alternatives(struct alt_instr *start,
        b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n"      \
        INSTR_LEN_SANITY_CHECK(altinstr_len(num))
 
-#ifdef CONFIG_ALTERNATIVES
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, altinstr, facility) \
        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
@@ -130,14 +124,6 @@ static inline void apply_alternatives(struct alt_instr *start,
        ALTINSTR_ENTRY(facility1, 1)                                    \
        ALTINSTR_ENTRY(facility2, 2)                                    \
        ".popsection\n"
-#else
-/* Alternative instructions are disabled, let's put just oldinstr in */
-#define ALTERNATIVE(oldinstr, altinstr, facility) \
-       oldinstr "\n"
-
-#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
-       oldinstr "\n"
-#endif
 
 /*
  * Alternative instructions for different CPU types or capabilities.
index 05480e4cc5cabdab4e8bcc5006b8d0691b4bfef7..792cda339af1ae3ad25ce8fc0457f4db961a4b8a 100644 (file)
@@ -144,6 +144,12 @@ struct hws_trailer_entry {
        unsigned long long progusage2;   /*                                   */
 } __packed;
 
+/* Load program parameter */
+static inline void lpp(void *pp)
+{
+       asm volatile(".insn s,0xb2800000,0(%0)\n":: "a" (pp) : "memory");
+}
+
 /* Query counter information */
 static inline int qctri(struct cpumf_ctr_info *info)
 {
@@ -167,7 +173,7 @@ static inline int lcctl(u64 ctl)
                "       .insn   s,0xb2840000,%1\n"
                "       ipm     %0\n"
                "       srl     %0,28\n"
-               : "=d" (cc) : "m" (ctl) : "cc");
+               : "=d" (cc) : "Q" (ctl) : "cc");
        return cc;
 }
 
index 9b5a3469fed94fda814e64714cf196f9fbae76bf..5e97a43531470d13b63b7e4379a95d741363ebb7 100644 (file)
@@ -26,9 +26,9 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
                u32 __user *uaddr)
 {
        int oldval = 0, newval, ret;
+       mm_segment_t old_fs;
 
-       load_kernel_asce();
-
+       old_fs = enable_sacf_uaccess();
        pagefault_disable();
        switch (op) {
        case FUTEX_OP_SET:
@@ -55,6 +55,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
                ret = -ENOSYS;
        }
        pagefault_enable();
+       disable_sacf_uaccess(old_fs);
 
        if (!ret)
                *oval = oldval;
@@ -65,9 +66,10 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
 static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
                                                u32 oldval, u32 newval)
 {
+       mm_segment_t old_fs;
        int ret;
 
-       load_kernel_asce();
+       old_fs = enable_sacf_uaccess();
        asm volatile(
                "   sacf 256\n"
                "0: cs   %1,%4,0(%5)\n"
@@ -77,6 +79,7 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
                : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
                : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
                : "cc", "memory");
+       disable_sacf_uaccess(old_fs);
        *uval = oldval;
        return ret;
 }
index 9eb36a1592c797a7d1e70e86b2c905a8266d4760..ec6592e8ba36e20228b710d3474ca825dc9a264a 100644 (file)
@@ -115,33 +115,28 @@ struct lowcore {
        /* Address space pointer. */
        __u64   kernel_asce;                    /* 0x0378 */
        __u64   user_asce;                      /* 0x0380 */
+       __u64   vdso_asce;                      /* 0x0388 */
 
        /*
         * The lpp and current_pid fields form a
         * 64-bit value that is set as program
         * parameter with the LPP instruction.
         */
-       __u32   lpp;                            /* 0x0388 */
-       __u32   current_pid;                    /* 0x038c */
+       __u32   lpp;                            /* 0x0390 */
+       __u32   current_pid;                    /* 0x0394 */
 
        /* SMP info area */
-       __u32   cpu_nr;                         /* 0x0390 */
-       __u32   softirq_pending;                /* 0x0394 */
-       __u64   percpu_offset;                  /* 0x0398 */
-       __u64   vdso_per_cpu_data;              /* 0x03a0 */
-       __u64   machine_flags;                  /* 0x03a8 */
-       __u32   preempt_count;                  /* 0x03b0 */
-       __u8    pad_0x03b4[0x03b8-0x03b4];      /* 0x03b4 */
-       __u64   gmap;                           /* 0x03b8 */
-       __u32   spinlock_lockval;               /* 0x03c0 */
-       __u32   spinlock_index;                 /* 0x03c4 */
-       __u32   fpu_flags;                      /* 0x03c8 */
-       __u8    pad_0x03cc[0x0400-0x03cc];      /* 0x03cc */
-
-       /* Per cpu primary space access list */
-       __u32   paste[16];                      /* 0x0400 */
-
-       __u8    pad_0x04c0[0x0e00-0x0440];      /* 0x0440 */
+       __u32   cpu_nr;                         /* 0x0398 */
+       __u32   softirq_pending;                /* 0x039c */
+       __u32   preempt_count;                  /* 0x03a0 */
+       __u32   spinlock_lockval;               /* 0x03a4 */
+       __u32   spinlock_index;                 /* 0x03a8 */
+       __u32   fpu_flags;                      /* 0x03ac */
+       __u64   percpu_offset;                  /* 0x03b0 */
+       __u64   vdso_per_cpu_data;              /* 0x03b8 */
+       __u64   machine_flags;                  /* 0x03c0 */
+       __u64   gmap;                           /* 0x03c8 */
+       __u8    pad_0x03d0[0x0e00-0x03d0];      /* 0x03d0 */
 
        /*
         * 0xe00 contains the address of the IPL Parameter Information
@@ -193,14 +188,14 @@ extern struct lowcore *lowcore_ptr[];
 
 static inline void set_prefix(__u32 address)
 {
-       asm volatile("spx %0" : : "m" (address) : "memory");
+       asm volatile("spx %0" : : "Q" (address) : "memory");
 }
 
 static inline __u32 store_prefix(void)
 {
        __u32 address;
 
-       asm volatile("stpx %0" : "=m" (address));
+       asm volatile("stpx %0" : "=Q" (address));
        return address;
 }
 
index cf4c1cb17dcd35ab9a18197e9dbeed41d1a0b376..f4a07f788f78b3160f9ae312e699805b47f67ebe 100644 (file)
@@ -73,41 +73,38 @@ static inline int init_new_context(struct task_struct *tsk,
 static inline void set_user_asce(struct mm_struct *mm)
 {
        S390_lowcore.user_asce = mm->context.asce;
-       if (current->thread.mm_segment.ar4)
-               __ctl_load(S390_lowcore.user_asce, 7, 7);
-       set_cpu_flag(CIF_ASCE_PRIMARY);
+       __ctl_load(S390_lowcore.user_asce, 1, 1);
+       clear_cpu_flag(CIF_ASCE_PRIMARY);
 }
 
 static inline void clear_user_asce(void)
 {
        S390_lowcore.user_asce = S390_lowcore.kernel_asce;
-
-       __ctl_load(S390_lowcore.user_asce, 1, 1);
-       __ctl_load(S390_lowcore.user_asce, 7, 7);
-}
-
-static inline void load_kernel_asce(void)
-{
-       unsigned long asce;
-
-       __ctl_store(asce, 1, 1);
-       if (asce != S390_lowcore.kernel_asce)
-               __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+       __ctl_load(S390_lowcore.kernel_asce, 1, 1);
        set_cpu_flag(CIF_ASCE_PRIMARY);
 }
 
+mm_segment_t enable_sacf_uaccess(void);
+void disable_sacf_uaccess(mm_segment_t old_fs);
+
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
                             struct task_struct *tsk)
 {
        int cpu = smp_processor_id();
 
-       S390_lowcore.user_asce = next->context.asce;
        if (prev == next)
                return;
+       S390_lowcore.user_asce = next->context.asce;
        cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
-       /* Clear old ASCE by loading the kernel ASCE. */
-       __ctl_load(S390_lowcore.kernel_asce, 1, 1);
-       __ctl_load(S390_lowcore.kernel_asce, 7, 7);
+       /* Clear previous user-ASCE from CR1 and CR7 */
+       if (!test_cpu_flag(CIF_ASCE_PRIMARY)) {
+               __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+               set_cpu_flag(CIF_ASCE_PRIMARY);
+       }
+       if (test_cpu_flag(CIF_ASCE_SECONDARY)) {
+               __ctl_load(S390_lowcore.vdso_asce, 7, 7);
+               clear_cpu_flag(CIF_ASCE_SECONDARY);
+       }
        cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
 }
 
@@ -117,7 +114,6 @@ static inline void finish_arch_post_lock_switch(void)
        struct task_struct *tsk = current;
        struct mm_struct *mm = tsk->mm;
 
-       load_kernel_asce();
        if (mm) {
                preempt_disable();
                while (atomic_read(&mm->context.flush_count))
index 79aa6421fedbaa725a69bc7dfa20ec92bf622944..d6c9d1e0dc2d4bc0fe36a46109211f93682e5cfd 100644 (file)
@@ -64,27 +64,10 @@ struct perf_sf_sde_regs {
 #define REG_OVERFLOW           1
 #define OVERFLOW_REG(hwc)      ((hwc)->extra_reg.config)
 #define SFB_ALLOC_REG(hwc)     ((hwc)->extra_reg.alloc)
-#define RAWSAMPLE_REG(hwc)     ((hwc)->config)
 #define TEAR_REG(hwc)          ((hwc)->last_tag)
 #define SAMPL_RATE(hwc)                ((hwc)->event_base)
 #define SAMPL_FLAGS(hwc)       ((hwc)->config_base)
 #define SAMPL_DIAG_MODE(hwc)   (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
 #define SDB_FULL_BLOCKS(hwc)   (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
 
-/* Structure for sampling data entries to be passed as perf raw sample data
- * to user space.  Note that raw sample data must be aligned and, thus, might
- * be padded with zeros.
- */
-struct sf_raw_sample {
-#define SF_RAW_SAMPLE_BASIC    PERF_CPUM_SF_BASIC_MODE
-#define SF_RAW_SAMPLE_DIAG     PERF_CPUM_SF_DIAG_MODE
-       u64                     format;
-       u32                      size;    /* Size of sf_raw_sample */
-       u16                     bsdes;    /* Basic-sampling data entry size */
-       u16                     dsdes;    /* Diagnostic-sampling data entry size */
-       struct hws_basic_entry  basic;    /* Basic-sampling data entry */
-       struct hws_diag_entry    diag;    /* Diagnostic-sampling data entry */
-       u8                  padding[];    /* Padding to next multiple of 8 */
-} __packed;
-
 #endif /* _ASM_S390_PERF_EVENT_H */
index f25bfe888933fe92dda958003b3dcad3e19ac2fc..bfbfad4822897246bebc2e0214981de37b926a9d 100644 (file)
@@ -109,9 +109,7 @@ extern void execve_tail(void);
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
-typedef struct {
-        __u32 ar4;
-} mm_segment_t;
+typedef unsigned int mm_segment_t;
 
 /*
  * Thread structure
@@ -247,7 +245,7 @@ static inline unsigned short stap(void)
 {
        unsigned short cpu_address;
 
-       asm volatile("stap %0" : "=m" (cpu_address));
+       asm volatile("stap %0" : "=Q" (cpu_address));
        return cpu_address;
 }
 
index 2f84e77f1f1b9844133663842b220f842c529e80..a3788dafc0e1f2272abd0ba9c455b775e81f627a 100644 (file)
 #define PIF_SYSCALL            0       /* inside a system call */
 #define PIF_PER_TRAP           1       /* deliver sigtrap on return to user */
 #define PIF_SYSCALL_RESTART    2       /* restart the current system call */
+#define PIF_GUEST_FAULT                3       /* indicates program check in sie64a */
 
 #define _PIF_SYSCALL           _BITUL(PIF_SYSCALL)
 #define _PIF_PER_TRAP          _BITUL(PIF_PER_TRAP)
 #define _PIF_SYSCALL_RESTART   _BITUL(PIF_SYSCALL_RESTART)
+#define _PIF_GUEST_FAULT       _BITUL(PIF_GUEST_FAULT)
 
 #ifndef __ASSEMBLY__
 
index 8bc87dcb10ebdb002c52f54ead162f28c6559b22..2eb0c8a7b664817759d325c34ec8e53a9bdd6981 100644 (file)
@@ -36,7 +36,7 @@
 #define MACHINE_FLAG_SCC       _BITUL(17)
 
 #define LPP_MAGIC              _BITUL(31)
-#define LPP_PFAULT_PID_MASK    _AC(0xffffffff, UL)
+#define LPP_PID_MASK           _AC(0xffffffff, UL)
 
 #ifndef __ASSEMBLY__
 
index cdd0f0d999e2617f0ec372828da9482d5fa6b021..ad6b91013a0525d82002090788038e0c9773b34a 100644 (file)
@@ -16,7 +16,7 @@
 #include <asm/processor.h>
 #include <asm/ctl_reg.h>
 #include <asm/extable.h>
-
+#include <asm/facility.h>
 
 /*
  * The fs value determines whether argument validity checking should be
  * For historical reasons, these macros are grossly misnamed.
  */
 
-#define MAKE_MM_SEG(a)  ((mm_segment_t) { (a) })
-
-
-#define KERNEL_DS       MAKE_MM_SEG(0)
-#define USER_DS         MAKE_MM_SEG(1)
+#define KERNEL_DS      (0)
+#define KERNEL_DS_SACF (1)
+#define USER_DS                (2)
+#define USER_DS_SACF   (3)
 
 #define get_ds()        (KERNEL_DS)
 #define get_fs()        (current->thread.mm_segment)
-#define segment_eq(a,b) ((a).ar4 == (b).ar4)
+#define segment_eq(a,b) (((a) & 2) == ((b) & 2))
 
-static inline void set_fs(mm_segment_t fs)
-{
-       current->thread.mm_segment = fs;
-       if (uaccess_kernel()) {
-               set_cpu_flag(CIF_ASCE_SECONDARY);
-               __ctl_load(S390_lowcore.kernel_asce, 7, 7);
-       } else {
-               clear_cpu_flag(CIF_ASCE_SECONDARY);
-               __ctl_load(S390_lowcore.user_asce, 7, 7);
-       }
-}
+void set_fs(mm_segment_t fs);
 
 static inline int __range_ok(unsigned long addr, unsigned long size)
 {
@@ -95,7 +84,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
 
 static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
 {
-       unsigned long spec = 0x810000UL;
+       unsigned long spec = 0x010000UL;
        int rc;
 
        switch (size) {
@@ -125,7 +114,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
 
 static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
 {
-       unsigned long spec = 0x81UL;
+       unsigned long spec = 0x01UL;
        int rc;
 
        switch (size) {
index ae6261ef97d558bb8d52e38286503ed13fca8373..169d7604eb804432e179b74f2737acc8266fb92a 100644 (file)
@@ -46,6 +46,7 @@ struct vdso_per_cpu_data {
 };
 
 extern struct vdso_data *vdso_data;
+extern struct vdso_data boot_vdso_data;
 
 void vdso_alloc_boot_cpu(struct lowcore *lowcore);
 int vdso_alloc_per_cpu(struct lowcore *lowcore);
diff --git a/arch/s390/include/uapi/asm/perf_regs.h b/arch/s390/include/uapi/asm/perf_regs.h
new file mode 100644 (file)
index 0000000..7c8564f
--- /dev/null
@@ -0,0 +1,43 @@
+#ifndef _ASM_S390_PERF_REGS_H
+#define _ASM_S390_PERF_REGS_H
+
+enum perf_event_s390_regs {
+       PERF_REG_S390_R0,
+       PERF_REG_S390_R1,
+       PERF_REG_S390_R2,
+       PERF_REG_S390_R3,
+       PERF_REG_S390_R4,
+       PERF_REG_S390_R5,
+       PERF_REG_S390_R6,
+       PERF_REG_S390_R7,
+       PERF_REG_S390_R8,
+       PERF_REG_S390_R9,
+       PERF_REG_S390_R10,
+       PERF_REG_S390_R11,
+       PERF_REG_S390_R12,
+       PERF_REG_S390_R13,
+       PERF_REG_S390_R14,
+       PERF_REG_S390_R15,
+       PERF_REG_S390_FP0,
+       PERF_REG_S390_FP1,
+       PERF_REG_S390_FP2,
+       PERF_REG_S390_FP3,
+       PERF_REG_S390_FP4,
+       PERF_REG_S390_FP5,
+       PERF_REG_S390_FP6,
+       PERF_REG_S390_FP7,
+       PERF_REG_S390_FP8,
+       PERF_REG_S390_FP9,
+       PERF_REG_S390_FP10,
+       PERF_REG_S390_FP11,
+       PERF_REG_S390_FP12,
+       PERF_REG_S390_FP13,
+       PERF_REG_S390_FP14,
+       PERF_REG_S390_FP15,
+       PERF_REG_S390_MASK,
+       PERF_REG_S390_PC,
+
+       PERF_REG_S390_MAX
+};
+
+#endif /* _ASM_S390_PERF_REGS_H */
index 83bc82001c06ff1c7bd3dee875a6a6901adce115..909bce65cb2bd28cf86b0a170a183107db8ae05c 100644 (file)
@@ -59,7 +59,7 @@ obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y  += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
 obj-y  += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
 obj-y  += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
-obj-y  += entry.o reipl.o relocate_kernel.o kdebugfs.o
+obj-y  += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
 
 extra-y                                += head.o head64.o vmlinux.lds
 
@@ -77,10 +77,9 @@ obj-$(CONFIG_KPROBES)                += kprobes.o
 obj-$(CONFIG_FUNCTION_TRACER)  += mcount.o ftrace.o
 obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
 obj-$(CONFIG_UPROBES)          += uprobes.o
-obj-$(CONFIG_ALTERNATIVES)     += alternative.o
 
 obj-$(CONFIG_PERF_EVENTS)      += perf_event.o perf_cpum_cf.o perf_cpum_sf.o
-obj-$(CONFIG_PERF_EVENTS)      += perf_cpum_cf_events.o
+obj-$(CONFIG_PERF_EVENTS)      += perf_cpum_cf_events.o perf_regs.o
 
 obj-$(CONFIG_TRACEPOINTS)      += trace.o
 
index 33ec80df7ed443ffc161d99ee3fc591381cec0b8..587b195b588dd092ab5ff73fbccd31993ba895d7 100644 (file)
@@ -171,6 +171,7 @@ int main(void)
        OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
        OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source);
        OFFSET(__LC_USER_ASCE, lowcore, user_asce);
+       OFFSET(__LC_VDSO_ASCE, lowcore, vdso_asce);
        OFFSET(__LC_LPP, lowcore, lpp);
        OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
        OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset);
@@ -178,7 +179,6 @@ int main(void)
        OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
        OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count);
        OFFSET(__LC_GMAP, lowcore, gmap);
-       OFFSET(__LC_PASTE, lowcore, paste);
        /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
        OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
        /* hardware defined lowcore locations 0x1000 - 0x18ff */
index b811d3a8417d5614fdbb560aaa96937d4929ae5c..3be829721cf948adc5349a342f04073a90dcf20e 100644 (file)
@@ -480,7 +480,7 @@ void show_code(struct pt_regs *regs)
 {
        char *mode = user_mode(regs) ? "User" : "Krnl";
        unsigned char code[64];
-       char buffer[64], *ptr;
+       char buffer[128], *ptr;
        mm_segment_t old_fs;
        unsigned long addr;
        int start, end, opsize, hops, i;
@@ -543,7 +543,7 @@ void show_code(struct pt_regs *regs)
                start += opsize;
                pr_cont("%s", buffer);
                ptr = buffer;
-               ptr += sprintf(ptr, "\n          ");
+               ptr += sprintf(ptr, "\n\t  ");
                hops++;
        }
        pr_cont("\n");
index f498d201f98d09e527c26f815b048a8f48494499..a316cd6999ad9712defdf46db85e16eb429aebcb 100644 (file)
@@ -379,13 +379,21 @@ ENTRY(system_call)
        jg      s390_handle_mcck        # TIF bit will be cleared by handler
 
 #
-# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
+# _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce
 #
 .Lsysc_asce:
+       ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY
+       lctlg   %c7,%c7,__LC_VDSO_ASCE          # load secondary asce
+       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
+       jz      .Lsysc_return
+#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
+       tm      __LC_STFLE_FAC_LIST+3,0x10      # has MVCOS ?
+       jnz     .Lsysc_set_fs_fixup
        ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
        lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
-       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY
-       jz      .Lsysc_return
+       j       .Lsysc_return
+.Lsysc_set_fs_fixup:
+#endif
        larl    %r14,.Lsysc_return
        jg      set_fs_fixup
 
@@ -518,6 +526,7 @@ ENTRY(pgm_check_handler)
        stmg    %r8,%r15,__LC_SAVE_AREA_SYNC
        lg      %r10,__LC_LAST_BREAK
        lg      %r12,__LC_CURRENT
+       lghi    %r11,0
        larl    %r13,cleanup_critical
        lmg     %r8,%r9,__LC_PGM_OLD_PSW
        tmhh    %r8,0x0001              # test problem state bit
@@ -532,6 +541,7 @@ ENTRY(pgm_check_handler)
        ni      __SIE_PROG0C+3(%r14),0xfe       # no longer in SIE
        lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
        larl    %r9,sie_exit                    # skip forward to sie_exit
+       lghi    %r11,_PIF_GUEST_FAULT
 #endif
 0:     tmhh    %r8,0x4000              # PER bit set in old PSW ?
        jnz     1f                      # -> enabled, can't be a double fault
@@ -549,13 +559,14 @@ ENTRY(pgm_check_handler)
        jz      3f
        mvc     __THREAD_trap_tdb(256,%r14),0(%r13)
 3:     stg     %r10,__THREAD_last_break(%r14)
-4:     la      %r11,STACK_FRAME_OVERHEAD(%r15)
+4:     lgr     %r13,%r11
+       la      %r11,STACK_FRAME_OVERHEAD(%r15)
        stmg    %r0,%r7,__PT_R0(%r11)
        mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
        stmg    %r8,%r9,__PT_PSW(%r11)
        mvc     __PT_INT_CODE(4,%r11),__LC_PGM_ILC
        mvc     __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
-       xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+       stg     %r13,__PT_FLAGS(%r11)
        stg     %r10,__PT_ARGS(%r11)
        tm      __LC_PGM_ILC+3,0x80     # check for per exception
        jz      5f
@@ -738,10 +749,18 @@ ENTRY(io_int_handler)
 # _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
 #
 .Lio_asce:
+       ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY
+       lctlg   %c7,%c7,__LC_VDSO_ASCE          # load secondary asce
+       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
+       jz      .Lio_return
+#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
+       tm      __LC_STFLE_FAC_LIST+3,0x10      # has MVCOS ?
+       jnz     .Lio_set_fs_fixup
        ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
        lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
-       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY
-       jz      .Lio_return
+       j       .Lio_return
+.Lio_set_fs_fixup:
+#endif
        larl    %r14,.Lio_return
        jg      set_fs_fixup
 
index 172002da70754eb31d3d512f75db46e304fc1eb7..38a973ccf50108b423683496ee4daa28ab2382d6 100644 (file)
@@ -28,7 +28,7 @@ ENTRY(startup_continue)
        lctlg   %c0,%c15,.Lctl-.LPG1(%r13)      # load control registers
        lg      %r12,.Lparmaddr-.LPG1(%r13)     # pointer to parameter area
                                        # move IPL device to lowcore
-       lghi    %r0,__LC_PASTE
+       larl    %r0,boot_vdso_data
        stg     %r0,__LC_VDSO_PER_CPU
 #
 # Setup stack
index 6d9f73bb4142ab1b95a9dfd2dadc61853175961c..7b87991416fd6d882e7edf3f52b6f4af6fc45ad3 100644 (file)
@@ -433,16 +433,13 @@ int module_finalize(const Elf_Ehdr *hdr,
        const Elf_Shdr *s;
        char *secstrings;
 
-       if (IS_ENABLED(CONFIG_ALTERNATIVES)) {
-               secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-               for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
-                       if (!strcmp(".altinstructions",
-                                   secstrings + s->sh_name)) {
-                               /* patch .altinstructions */
-                               void *aseg = (void *)s->sh_addr;
+       secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+       for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+               if (!strcmp(".altinstructions", secstrings + s->sh_name)) {
+                       /* patch .altinstructions */
+                       void *aseg = (void *)s->sh_addr;
 
-                               apply_alternatives(aseg, aseg + s->sh_size);
-                       }
+                       apply_alternatives(aseg, aseg + s->sh_size);
                }
        }
 
index 3f3cda41f32a5441284da606138fec7e8c7598c0..6ff169253caeea0be88da521b84b796a72accb6e 100644 (file)
@@ -191,7 +191,6 @@ static int notrace s390_check_registers(union mci mci, int umode)
 {
        union ctlreg2 cr2;
        int kill_task;
-       void *fpt_save_area;
 
        kill_task = 0;
 
@@ -224,7 +223,6 @@ static int notrace s390_check_registers(union mci mci, int umode)
                if (!test_cpu_flag(CIF_FPU))
                        kill_task = 1;
        }
-       fpt_save_area = &S390_lowcore.floating_pt_save_area;
        if (!mci.fc) {
                /*
                 * Floating point control register can't be restored.
index bd4bbf61aaf368cba69d27edb2cd80dfac014493..227b38bd82c94f211392348ec03dd146549d19c4 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
 #include <linux/percpu.h>
+#include <linux/pid.h>
 #include <linux/notifier.h>
 #include <linux/export.h>
 #include <linux/slab.h>
@@ -77,6 +78,15 @@ struct sf_buffer {
        unsigned long    *tail;     /* last sample-data-block-table */
 };
 
+struct aux_buffer {
+       struct sf_buffer sfb;
+       unsigned long head;        /* index of SDB of buffer head */
+       unsigned long alert_mark;  /* index of SDB of alert request position */
+       unsigned long empty_mark;  /* mark of SDB not marked full */
+       unsigned long *sdb_index;  /* SDB address for fast lookup */
+       unsigned long *sdbt_index; /* SDBT address for fast lookup */
+};
+
 struct cpu_hw_sf {
        /* CPU-measurement sampling information block */
        struct hws_qsi_info_block qsi;
@@ -85,6 +95,7 @@ struct cpu_hw_sf {
        struct sf_buffer sfb;       /* Sampling buffer */
        unsigned int flags;         /* Status flags */
        struct perf_event *event;   /* Scheduled perf event */
+       struct perf_output_handle handle; /* AUX buffer output handle */
 };
 static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
 
@@ -341,22 +352,6 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
        sfb_account_allocs(num, hwc);
 }
 
-static size_t event_sample_size(struct hw_perf_event *hwc)
-{
-       struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
-       size_t sample_size;
-
-       /* The sample size depends on the sampling function: The basic-sampling
-        * function must be always enabled, diagnostic-sampling function is
-        * optional.
-        */
-       sample_size = sfr->bsdes;
-       if (SAMPL_DIAG_MODE(hwc))
-               sample_size += sfr->dsdes;
-
-       return sample_size;
-}
-
 static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
 {
        if (cpuhw->sfb.sdbt)
@@ -366,35 +361,7 @@ static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
 static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 {
        unsigned long n_sdb, freq, factor;
-       size_t sfr_size, sample_size;
-       struct sf_raw_sample *sfr;
-
-       /* Allocate raw sample buffer
-        *
-        *    The raw sample buffer is used to temporarily store sampling data
-        *    entries for perf raw sample processing.  The buffer size mainly
-        *    depends on the size of diagnostic-sampling data entries which is
-        *    machine-specific.  The exact size calculation includes:
-        *      1. The first 4 bytes of diagnostic-sampling data entries are
-        *         already reflected in the sf_raw_sample structure.  Subtract
-        *         these bytes.
-        *      2. The perf raw sample data must be 8-byte aligned (u64) and
-        *         perf's internal data size must be considered too.  So add
-        *         an additional u32 for correct alignment and subtract before
-        *         allocating the buffer.
-        *      3. Store the raw sample buffer pointer in the perf event
-        *         hardware structure.
-        */
-       sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) +
-                        sizeof(u32), sizeof(u64));
-       sfr_size -= sizeof(u32);
-       sfr = kzalloc(sfr_size, GFP_KERNEL);
-       if (!sfr)
-               return -ENOMEM;
-       sfr->size = sfr_size;
-       sfr->bsdes = cpuhw->qsi.bsdes;
-       sfr->dsdes = cpuhw->qsi.dsdes;
-       RAWSAMPLE_REG(hwc) = (unsigned long) sfr;
+       size_t sample_size;
 
        /* Calculate sampling buffers using 4K pages
         *
@@ -420,7 +387,7 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
         *       ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
         *       to 511 SDBs).
         */
-       sample_size = event_sample_size(hwc);
+       sample_size = sizeof(struct hws_basic_entry);
        freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
        factor = 1;
        n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
@@ -619,10 +586,6 @@ static int reserve_pmc_hardware(void)
 
 static void hw_perf_event_destroy(struct perf_event *event)
 {
-       /* Free raw sample buffer */
-       if (RAWSAMPLE_REG(&event->hw))
-               kfree((void *) RAWSAMPLE_REG(&event->hw));
-
        /* Release PMC if this is the last perf event */
        if (!atomic_add_unless(&num_events, -1, 1)) {
                mutex_lock(&pmc_reserve_mutex);
@@ -642,15 +605,8 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period)
 static void hw_reset_registers(struct hw_perf_event *hwc,
                               unsigned long *sdbt_origin)
 {
-       struct sf_raw_sample *sfr;
-
        /* (Re)set to first sample-data-block-table */
        TEAR_REG(hwc) = (unsigned long) sdbt_origin;
-
-       /* (Re)set raw sampling buffer register */
-       sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
-       memset(&sfr->basic, 0, sizeof(sfr->basic));
-       memset(&sfr->diag, 0, sfr->dsdes);
 }
 
 static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
@@ -660,6 +616,67 @@ static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
                       si->min_sampl_rate, si->max_sampl_rate);
 }
 
+static u32 cpumsf_pid_type(struct perf_event *event,
+                          u32 pid, enum pid_type type)
+{
+       struct task_struct *tsk;
+
+       /* Idle process */
+       if (!pid)
+               goto out;
+
+       tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+       pid = -1;
+       if (tsk) {
+               /*
+                * Only top level events contain the pid namespace in which
+                * they are created.
+                */
+               if (event->parent)
+                       event = event->parent;
+               pid = __task_pid_nr_ns(tsk, type, event->ns);
+               /*
+                * See also 1d953111b648
+                * "perf/core: Don't report zero PIDs for exiting tasks".
+                */
+               if (!pid && !pid_alive(tsk))
+                       pid = -1;
+       }
+out:
+       return pid;
+}
+
+static void cpumsf_output_event_pid(struct perf_event *event,
+                                   struct perf_sample_data *data,
+                                   struct pt_regs *regs)
+{
+       u32 pid;
+       struct perf_event_header header;
+       struct perf_output_handle handle;
+
+       /*
+        * Obtain the PID from the basic-sampling data entry and
+        * correct the data->tid_entry.pid value.
+        */
+       pid = data->tid_entry.pid;
+
+       /* Protect callchain buffers, tasks */
+       rcu_read_lock();
+
+       perf_prepare_sample(&header, data, event, regs);
+       if (perf_output_begin(&handle, event, header.size))
+               goto out;
+
+       /* Update the process ID (see also kernel/events/core.c) */
+       data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID);
+       data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
+
+       perf_output_sample(&handle, &header, data, event);
+       perf_output_end(&handle);
+out:
+       rcu_read_unlock();
+}
+
 static int __hw_perf_event_init(struct perf_event *event)
 {
        struct cpu_hw_sf *cpuhw;
@@ -770,6 +787,10 @@ static int __hw_perf_event_init(struct perf_event *event)
        hwc->extra_reg.reg = REG_OVERFLOW;
        OVERFLOW_REG(hwc) = 0;
 
+       /* Use AUX buffer. No need to allocate it by ourself */
+       if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
+               return 0;
+
        /* Allocate the per-CPU sampling buffer using the CPU information
         * from the event.  If the event is not pinned to a particular
         * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
@@ -789,6 +810,14 @@ static int __hw_perf_event_init(struct perf_event *event)
                                break;
                }
        }
+
+       /* If PID/TID sampling is active, replace the default overflow
+        * handler to extract and resolve the PIDs from the basic-sampling
+        * data entries.
+        */
+       if (event->attr.sample_type & PERF_SAMPLE_TID)
+               if (is_default_overflow_handler(event))
+                       event->overflow_handler = cpumsf_output_event_pid;
 out:
        return err;
 }
@@ -866,10 +895,15 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
         */
        if (cpuhw->event) {
                hwc = &cpuhw->event->hw;
-               /* Account number of overflow-designated buffer extents */
-               sfb_account_overflows(cpuhw, hwc);
-               if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
-                       extend_sampling_buffer(&cpuhw->sfb, hwc);
+               if (!(SAMPL_DIAG_MODE(hwc))) {
+                       /*
+                        * Account number of overflow-designated
+                        * buffer extents
+                        */
+                       sfb_account_overflows(cpuhw, hwc);
+                       if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
+                               extend_sampling_buffer(&cpuhw->sfb, hwc);
+               }
        }
 
        /* (Re)enable the PMU and sampling facility */
@@ -884,6 +918,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
                return;
        }
 
+       /* Load current program parameter */
+       lpp(&S390_lowcore.lpp);
+
        debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
                            "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
                            cpuhw->lsctl.ed, cpuhw->lsctl.cd,
@@ -967,22 +1004,16 @@ static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
  *
  * Return non-zero if an event overflow occurred.
  */
-static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
+static int perf_push_sample(struct perf_event *event,
+                           struct hws_basic_entry *basic)
 {
        int overflow;
        struct pt_regs regs;
        struct perf_sf_sde_regs *sde_regs;
        struct perf_sample_data data;
-       struct perf_raw_record raw = {
-               .frag = {
-                       .size = sfr->size,
-                       .data = sfr,
-               },
-       };
 
        /* Setup perf sample */
        perf_sample_data_init(&data, 0, event->hw.last_period);
-       data.raw = &raw;
 
        /* Setup pt_regs to look like an CPU-measurement external interrupt
         * using the Program Request Alert code.  The regs.int_parm_long
@@ -994,11 +1025,11 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
        regs.int_parm = CPU_MF_INT_SF_PRA;
        sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
 
-       psw_bits(regs.psw).ia   = sfr->basic.ia;
-       psw_bits(regs.psw).dat  = sfr->basic.T;
-       psw_bits(regs.psw).wait = sfr->basic.W;
-       psw_bits(regs.psw).pstate = sfr->basic.P;
-       psw_bits(regs.psw).as   = sfr->basic.AS;
+       psw_bits(regs.psw).ia   = basic->ia;
+       psw_bits(regs.psw).dat  = basic->T;
+       psw_bits(regs.psw).wait = basic->W;
+       psw_bits(regs.psw).pstate = basic->P;
+       psw_bits(regs.psw).as   = basic->AS;
 
        /*
         * Use the hardware provided configuration level to decide if the
@@ -1011,7 +1042,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
         * If the value differs from 0xffff (the host value), we assume to
         * be a KVM guest.
         */
-       switch (sfr->basic.CL) {
+       switch (basic->CL) {
        case 1: /* logical partition */
                sde_regs->in_guest = 0;
                break;
@@ -1019,11 +1050,17 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
                sde_regs->in_guest = 1;
                break;
        default: /* old machine, use heuristics */
-               if (sfr->basic.gpp || sfr->basic.prim_asn != 0xffff)
+               if (basic->gpp || basic->prim_asn != 0xffff)
                        sde_regs->in_guest = 1;
                break;
        }
 
+       /*
+        * Store the PID value from the sample-data-entry to be
+        * processed and resolved by cpumsf_output_event_pid().
+        */
+       data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
+
        overflow = 0;
        if (perf_exclude_event(event, &regs, sde_regs))
                goto out;
@@ -1041,75 +1078,12 @@ static void perf_event_count_update(struct perf_event *event, u64 count)
        local64_add(count, &event->count);
 }
 
-static int sample_format_is_valid(struct hws_combined_entry *sample,
-                                  unsigned int flags)
-{
-       if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
-               /* Only basic-sampling data entries with data-entry-format
-                * version of 0x0001 can be processed.
-                */
-               if (sample->basic.def != 0x0001)
-                       return 0;
-       if (flags & PERF_CPUM_SF_DIAG_MODE)
-               /* The data-entry-format number of diagnostic-sampling data
-                * entries can vary.  Because diagnostic data is just passed
-                * through, do only a sanity check on the DEF.
-                */
-               if (sample->diag.def < 0x8001)
-                       return 0;
-       return 1;
-}
-
-static int sample_is_consistent(struct hws_combined_entry *sample,
-                               unsigned long flags)
-{
-       /* This check applies only to basic-sampling data entries of potentially
-        * combined-sampling data entries.  Invalid entries cannot be processed
-        * by the PMU and, thus, do not deliver an associated
-        * diagnostic-sampling data entry.
-        */
-       if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE)))
-               return 0;
-       /*
-        * Samples are skipped, if they are invalid or for which the
-        * instruction address is not predictable, i.e., the wait-state bit is
-        * set.
-        */
-       if (sample->basic.I || sample->basic.W)
-               return 0;
-       return 1;
-}
-
-static void reset_sample_slot(struct hws_combined_entry *sample,
-                             unsigned long flags)
-{
-       if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
-               sample->basic.def = 0;
-       if (flags & PERF_CPUM_SF_DIAG_MODE)
-               sample->diag.def = 0;
-}
-
-static void sfr_store_sample(struct sf_raw_sample *sfr,
-                            struct hws_combined_entry *sample)
-{
-       if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE))
-               sfr->basic = sample->basic;
-       if (sfr->format & PERF_CPUM_SF_DIAG_MODE)
-               memcpy(&sfr->diag, &sample->diag, sfr->dsdes);
-}
-
-static void debug_sample_entry(struct hws_combined_entry *sample,
-                              struct hws_trailer_entry *te,
-                              unsigned long flags)
+static void debug_sample_entry(struct hws_basic_entry *sample,
+                              struct hws_trailer_entry *te)
 {
        debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
-                           "sampling data entry: te->f=%i basic.def=%04x (%p)"
-                           " diag.def=%04x (%p)\n", te->f,
-                           sample->basic.def, &sample->basic,
-                           (flags & PERF_CPUM_SF_DIAG_MODE)
-                                       ? sample->diag.def : 0xFFFF,
-                           (flags & PERF_CPUM_SF_DIAG_MODE)
-                                       ?  &sample->diag : NULL);
+                           "sampling data entry: te->f=%i basic.def=%04x (%p)\n",
+                           te->f, sample->def, sample);
 }
 
 /* hw_collect_samples() - Walk through a sample-data-block and collect samples
@@ -1135,44 +1109,37 @@ static void debug_sample_entry(struct hws_combined_entry *sample,
 static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
                               unsigned long long *overflow)
 {
-       unsigned long flags = SAMPL_FLAGS(&event->hw);
-       struct hws_combined_entry *sample;
        struct hws_trailer_entry *te;
-       struct sf_raw_sample *sfr;
-       size_t sample_size;
-
-       /* Prepare and initialize raw sample data */
-       sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw);
-       sfr->format = flags & PERF_CPUM_SF_MODE_MASK;
+       struct hws_basic_entry *sample;
 
-       sample_size = event_sample_size(&event->hw);
        te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
-       sample = (struct hws_combined_entry *) *sdbt;
+       sample = (struct hws_basic_entry *) *sdbt;
        while ((unsigned long *) sample < (unsigned long *) te) {
                /* Check for an empty sample */
-               if (!sample->basic.def)
+               if (!sample->def)
                        break;
 
                /* Update perf event period */
                perf_event_count_update(event, SAMPL_RATE(&event->hw));
 
-               /* Check sampling data entry */
-               if (sample_format_is_valid(sample, flags)) {
+               /* Check whether sample is valid */
+               if (sample->def == 0x0001) {
                        /* If an event overflow occurred, the PMU is stopped to
                         * throttle event delivery.  Remaining sample data is
                         * discarded.
                         */
                        if (!*overflow) {
-                               if (sample_is_consistent(sample, flags)) {
+                               /* Check whether sample is consistent */
+                               if (sample->I == 0 && sample->W == 0) {
                                        /* Deliver sample data to perf */
-                                       sfr_store_sample(sfr, sample);
-                                       *overflow = perf_push_sample(event, sfr);
+                                       *overflow = perf_push_sample(event,
+                                                                    sample);
                                }
                        } else
                                /* Count discarded samples */
                                *overflow += 1;
                } else {
-                       debug_sample_entry(sample, te, flags);
+                       debug_sample_entry(sample, te);
                        /* Sample slot is not yet written or other record.
                         *
                         * This condition can occur if the buffer was reused
@@ -1188,8 +1155,8 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
                }
 
                /* Reset sample slot and advance to next sample */
-               reset_sample_slot(sample, flags);
-               sample += sample_size;
+               sample->def = 0;
+               sample++;
        }
 }
 
@@ -1215,6 +1182,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
        unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
        int done;
 
+       /*
+        * AUX buffer is used when in diagnostic sampling mode.
+        * No perf events/samples are created.
+        */
+       if (SAMPL_DIAG_MODE(&event->hw))
+               return;
+
        if (flush_all && SDB_FULL_BLOCKS(hwc))
                flush_all = 0;
 
@@ -1291,6 +1265,439 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
                                    sampl_overflow, event_overflow);
 }
 
+#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
+#define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0)
+#define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark)
+#define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark)
+
+/*
+ * Get trailer entry by index of SDB.
+ */
+static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
+                                                unsigned long index)
+{
+       unsigned long sdb;
+
+       index = AUX_SDB_INDEX(aux, index);
+       sdb = aux->sdb_index[index];
+       return (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+}
+
+/*
+ * Finish sampling on the cpu. Called by cpumsf_pmu_del() with pmu
+ * disabled. Collect the full SDBs in AUX buffer which have not reached
+ * the point of alert indicator. And ignore the SDBs which are not
+ * full.
+ *
+ * 1. Scan SDBs to see how much data is there and consume them.
+ * 2. Remove alert indicator in the buffer.
+ */
+static void aux_output_end(struct perf_output_handle *handle)
+{
+       unsigned long i, range_scan, idx;
+       struct aux_buffer *aux;
+       struct hws_trailer_entry *te;
+
+       aux = perf_get_aux(handle);
+       if (!aux)
+               return;
+
+       range_scan = AUX_SDB_NUM_ALERT(aux);
+       for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
+               te = aux_sdb_trailer(aux, idx);
+               if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
+                       break;
+       }
+       /* i is num of SDBs which are full */
+       perf_aux_output_end(handle, i << PAGE_SHIFT);
+
+       /* Remove alert indicators in the buffer */
+       te = aux_sdb_trailer(aux, aux->alert_mark);
+       te->flags &= ~SDB_TE_ALERT_REQ_MASK;
+
+       debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i);
+}
+
+/*
+ * Start sampling on the CPU. Called by cpumsf_pmu_add() when an event
+ * is first added to the CPU or rescheduled again to the CPU. It is called
+ * with pmu disabled.
+ *
+ * 1. Reset the trailer of SDBs to get ready for new data.
+ * 2. Tell the hardware where to put the data by reset the SDBs buffer
+ *    head(tear/dear).
+ */
+static int aux_output_begin(struct perf_output_handle *handle,
+                           struct aux_buffer *aux,
+                           struct cpu_hw_sf *cpuhw)
+{
+       unsigned long range;
+       unsigned long i, range_scan, idx;
+       unsigned long head, base, offset;
+       struct hws_trailer_entry *te;
+
+       if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
+               return -EINVAL;
+
+       aux->head = handle->head >> PAGE_SHIFT;
+       range = (handle->size + 1) >> PAGE_SHIFT;
+       if (range <= 1)
+               return -ENOMEM;
+
+       /*
+        * SDBs between aux->head and aux->empty_mark are already ready
+        * for new data. range_scan is num of SDBs not within them.
+        */
+       if (range > AUX_SDB_NUM_EMPTY(aux)) {
+               range_scan = range - AUX_SDB_NUM_EMPTY(aux);
+               idx = aux->empty_mark + 1;
+               for (i = 0; i < range_scan; i++, idx++) {
+                       te = aux_sdb_trailer(aux, idx);
+                       te->flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
+                       te->flags = te->flags & ~SDB_TE_ALERT_REQ_MASK;
+                       te->overflow = 0;
+               }
+               /* Save the position of empty SDBs */
+               aux->empty_mark = aux->head + range - 1;
+       }
+
+       /* Set alert indicator */
+       aux->alert_mark = aux->head + range/2 - 1;
+       te = aux_sdb_trailer(aux, aux->alert_mark);
+       te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
+
+       /* Reset hardware buffer head */
+       head = AUX_SDB_INDEX(aux, aux->head);
+       base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
+       offset = head % CPUM_SF_SDB_PER_TABLE;
+       cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
+       cpuhw->lsctl.dear = aux->sdb_index[head];
+
+       debug_sprintf_event(sfdbg, 6, "aux_output_begin: "
+                           "head->alert_mark->empty_mark (num_alert, range)"
+                           "[%lx -> %lx -> %lx] (%lx, %lx) "
+                           "tear index %lx, tear %lx dear %lx\n",
+                           aux->head, aux->alert_mark, aux->empty_mark,
+                           AUX_SDB_NUM_ALERT(aux), range,
+                           head / CPUM_SF_SDB_PER_TABLE,
+                           cpuhw->lsctl.tear,
+                           cpuhw->lsctl.dear);
+
+       return 0;
+}
+
+/*
+ * Set alert indicator on SDB at index @alert_index while sampler is running.
+ *
+ * Return true if successfully.
+ * Return false if full indicator is already set by hardware sampler.
+ */
+static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
+                         unsigned long long *overflow)
+{
+       unsigned long long orig_overflow, orig_flags, new_flags;
+       struct hws_trailer_entry *te;
+
+       te = aux_sdb_trailer(aux, alert_index);
+       do {
+               orig_flags = te->flags;
+               orig_overflow = te->overflow;
+               *overflow = orig_overflow;
+               if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
+                       /*
+                        * SDB is already set by hardware.
+                        * Abort and try to set somewhere
+                        * behind.
+                        */
+                       return false;
+               }
+               new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
+       } while (!cmpxchg_double(&te->flags, &te->overflow,
+                                orig_flags, orig_overflow,
+                                new_flags, 0ULL));
+       return true;
+}
+
+/*
+ * aux_reset_buffer() - Scan and setup SDBs for new samples
+ * @aux:       The AUX buffer to set
+ * @range:     The range of SDBs to scan started from aux->head
+ * @overflow:  Set to overflow count
+ *
+ * Set alert indicator on the SDB at index of aux->alert_mark. If this SDB is
+ * marked as empty, check if it is already set full by the hardware sampler.
+ * If yes, that means new data is already there before we can set an alert
+ * indicator. Caller should try to set alert indicator to some position behind.
+ *
+ * Scan the SDBs in AUX buffer from behind aux->empty_mark. They are used
+ * previously and have already been consumed by user space. Reset these SDBs
+ * (clear full indicator and alert indicator) for new data.
+ * If aux->alert_mark fall in this area, just set it. Overflow count is
+ * recorded while scanning.
+ *
+ * SDBs between aux->head and aux->empty_mark are already reset at last time.
+ * and ready for new samples. So scanning on this area could be skipped.
+ *
+ * Return true if alert indicator is set successfully and false if not.
+ */
+static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
+                            unsigned long long *overflow)
+{
+       unsigned long long orig_overflow, orig_flags, new_flags;
+       unsigned long i, range_scan, idx;
+       struct hws_trailer_entry *te;
+
+       if (range <= AUX_SDB_NUM_EMPTY(aux))
+               /*
+                * No need to scan. All SDBs in range are marked as empty.
+                * Just set alert indicator. Should check race with hardware
+                * sampler.
+                */
+               return aux_set_alert(aux, aux->alert_mark, overflow);
+
+       if (aux->alert_mark <= aux->empty_mark)
+               /*
+                * Set alert indicator on empty SDB. Should check race
+                * with hardware sampler.
+                */
+               if (!aux_set_alert(aux, aux->alert_mark, overflow))
+                       return false;
+
+       /*
+        * Scan the SDBs to clear full and alert indicator used previously.
+        * Start scanning from one SDB behind empty_mark. If the new alert
+        * indicator fall into this range, set it.
+        */
+       range_scan = range - AUX_SDB_NUM_EMPTY(aux);
+       idx = aux->empty_mark + 1;
+       for (i = 0; i < range_scan; i++, idx++) {
+               te = aux_sdb_trailer(aux, idx);
+               do {
+                       orig_flags = te->flags;
+                       orig_overflow = te->overflow;
+                       new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
+                       if (idx == aux->alert_mark)
+                               new_flags |= SDB_TE_ALERT_REQ_MASK;
+                       else
+                               new_flags &= ~SDB_TE_ALERT_REQ_MASK;
+               } while (!cmpxchg_double(&te->flags, &te->overflow,
+                                        orig_flags, orig_overflow,
+                                        new_flags, 0ULL));
+               *overflow += orig_overflow;
+       }
+
+       /* Update empty_mark to new position */
+       aux->empty_mark = aux->head + range - 1;
+
+       return true;
+}
+
+/*
+ * Measurement alert handler for diagnostic mode sampling.
+ */
+static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
+{
+       struct aux_buffer *aux;
+       int done = 0;
+       unsigned long range = 0, size;
+       unsigned long long overflow = 0;
+       struct perf_output_handle *handle = &cpuhw->handle;
+       unsigned long num_sdb;
+
+       aux = perf_get_aux(handle);
+       if (WARN_ON_ONCE(!aux))
+               return;
+
+       /* Inform user space new data arrived */
+       size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+       perf_aux_output_end(handle, size);
+       num_sdb = aux->sfb.num_sdb;
+
+       while (!done) {
+               /* Get an output handle */
+               aux = perf_aux_output_begin(handle, cpuhw->event);
+               if (handle->size == 0) {
+                       pr_err("The AUX buffer with %lu pages for the "
+                              "diagnostic-sampling mode is full\n",
+                               num_sdb);
+                       debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n");
+                       break;
+               }
+               if (WARN_ON_ONCE(!aux))
+                       return;
+
+               /* Update head and alert_mark to new position */
+               aux->head = handle->head >> PAGE_SHIFT;
+               range = (handle->size + 1) >> PAGE_SHIFT;
+               if (range == 1)
+                       aux->alert_mark = aux->head;
+               else
+                       aux->alert_mark = aux->head + range/2 - 1;
+
+               if (aux_reset_buffer(aux, range, &overflow)) {
+                       if (!overflow) {
+                               done = 1;
+                               break;
+                       }
+                       size = range << PAGE_SHIFT;
+                       perf_aux_output_end(&cpuhw->handle, size);
+                       pr_err("Sample data caused the AUX buffer with %lu "
+                              "pages to overflow\n", num_sdb);
+                       debug_sprintf_event(sfdbg, 1, "head %lx range %lx "
+                                           "overflow %llx\n",
+                                           aux->head, range, overflow);
+               } else {
+                       size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+                       perf_aux_output_end(&cpuhw->handle, size);
+                       debug_sprintf_event(sfdbg, 6, "head %lx alert %lx "
+                                           "already full, try another\n",
+                                           aux->head, aux->alert_mark);
+               }
+       }
+
+       if (done)
+               debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: "
+                                   "[%lx -> %lx -> %lx] (%lx, %lx)\n",
+                                   aux->head, aux->alert_mark, aux->empty_mark,
+                                   AUX_SDB_NUM_ALERT(aux), range);
+}
+
+/*
+ * Callback when freeing AUX buffers.
+ */
+static void aux_buffer_free(void *data)
+{
+       struct aux_buffer *aux = data;
+       unsigned long i, num_sdbt;
+
+       if (!aux)
+               return;
+
+       /* Free SDBT. SDB is freed by the caller */
+       num_sdbt = aux->sfb.num_sdbt;
+       for (i = 0; i < num_sdbt; i++)
+               free_page(aux->sdbt_index[i]);
+
+       kfree(aux->sdbt_index);
+       kfree(aux->sdb_index);
+       kfree(aux);
+
+       debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free "
+                           "%lu SDBTs\n", num_sdbt);
+}
+
+/*
+ * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling
+ * @cpu:       On which to allocate, -1 means current
+ * @pages:     Array of pointers to buffer pages passed from perf core
+ * @nr_pages:  Total pages
+ * @snapshot:  Flag for snapshot mode
+ *
+ * This is the callback when setup an event using AUX buffer. Perf tool can
+ * trigger this by an additional mmap() call on the event. Unlike the buffer
+ * for basic samples, AUX buffer belongs to the event. It is scheduled with
+ * the task among online cpus when it is a per-thread event.
+ *
+ * Return the private AUX buffer structure if success or NULL if fails.
+ */
+static void *aux_buffer_setup(int cpu, void **pages, int nr_pages,
+                             bool snapshot)
+{
+       struct sf_buffer *sfb;
+       struct aux_buffer *aux;
+       unsigned long *new, *tail;
+       int i, n_sdbt;
+
+       if (!nr_pages || !pages)
+               return NULL;
+
+       if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
+               pr_err("AUX buffer size (%i pages) is larger than the "
+                      "maximum sampling buffer limit\n",
+                      nr_pages);
+               return NULL;
+       } else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
+               pr_err("AUX buffer size (%i pages) is less than the "
+                      "minimum sampling buffer limit\n",
+                      nr_pages);
+               return NULL;
+       }
+
+       /* Allocate aux_buffer struct for the event */
+       aux = kmalloc(sizeof(struct aux_buffer), GFP_KERNEL);
+       if (!aux)
+               goto no_aux;
+       sfb = &aux->sfb;
+
+       /* Allocate sdbt_index for fast reference */
+       n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE;
+       aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
+       if (!aux->sdbt_index)
+               goto no_sdbt_index;
+
+       /* Allocate sdb_index for fast reference */
+       aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
+       if (!aux->sdb_index)
+               goto no_sdb_index;
+
+       /* Allocate the first SDBT */
+       sfb->num_sdbt = 0;
+       sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+       if (!sfb->sdbt)
+               goto no_sdbt;
+       aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt;
+       tail = sfb->tail = sfb->sdbt;
+
+       /*
+        * Link the provided pages of AUX buffer to SDBT.
+        * Allocate SDBT if needed.
+        */
+       for (i = 0; i < nr_pages; i++, tail++) {
+               if (require_table_link(tail)) {
+                       new = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+                       if (!new)
+                               goto no_sdbt;
+                       aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
+                       /* Link current page to tail of chain */
+                       *tail = (unsigned long)(void *) new + 1;
+                       tail = new;
+               }
+               /* Tail is the entry in a SDBT */
+               *tail = (unsigned long)pages[i];
+               aux->sdb_index[i] = (unsigned long)pages[i];
+       }
+       sfb->num_sdb = nr_pages;
+
+       /* Link the last entry in the SDBT to the first SDBT */
+       *tail = (unsigned long) sfb->sdbt + 1;
+       sfb->tail = tail;
+
+       /*
+        * Initial all SDBs are zeroed. Mark it as empty.
+        * So there is no need to clear the full indicator
+        * when this event is first added.
+        */
+       aux->empty_mark = sfb->num_sdb - 1;
+
+       debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs"
+                           " and %lu SDBs\n",
+                           sfb->num_sdbt, sfb->num_sdb);
+
+       return aux;
+
+no_sdbt:
+       /* SDBs (AUX buffer pages) are freed by caller */
+       for (i = 0; i < sfb->num_sdbt; i++)
+               free_page(aux->sdbt_index[i]);
+       kfree(aux->sdb_index);
+no_sdb_index:
+       kfree(aux->sdbt_index);
+no_sdbt_index:
+       kfree(aux);
+no_aux:
+       return NULL;
+}
+
 static void cpumsf_pmu_read(struct perf_event *event)
 {
        /* Nothing to do ... updates are interrupt-driven */
@@ -1342,12 +1749,13 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags)
 static int cpumsf_pmu_add(struct perf_event *event, int flags)
 {
        struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+       struct aux_buffer *aux;
        int err;
 
        if (cpuhw->flags & PMU_F_IN_USE)
                return -EAGAIN;
 
-       if (!cpuhw->sfb.sdbt)
+       if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
                return -EINVAL;
 
        err = 0;
@@ -1362,10 +1770,12 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
         */
        cpuhw->lsctl.s = 0;
        cpuhw->lsctl.h = 1;
-       cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
-       cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
        cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
-       hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+       if (!SAMPL_DIAG_MODE(&event->hw)) {
+               cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
+               cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
+               hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+       }
 
        /* Ensure sampling functions are in the disabled state.  If disabled,
         * switch on sampling enable control. */
@@ -1373,9 +1783,18 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
                err = -EAGAIN;
                goto out;
        }
-       cpuhw->lsctl.es = 1;
-       if (SAMPL_DIAG_MODE(&event->hw))
+       if (SAMPL_DIAG_MODE(&event->hw)) {
+               aux = perf_aux_output_begin(&cpuhw->handle, event);
+               if (!aux) {
+                       err = -EINVAL;
+                       goto out;
+               }
+               err = aux_output_begin(&cpuhw->handle, aux, cpuhw);
+               if (err)
+                       goto out;
                cpuhw->lsctl.ed = 1;
+       }
+       cpuhw->lsctl.es = 1;
 
        /* Set in_use flag and store event */
        cpuhw->event = event;
@@ -1401,6 +1820,8 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags)
        cpuhw->flags &= ~PMU_F_IN_USE;
        cpuhw->event = NULL;
 
+       if (SAMPL_DIAG_MODE(&event->hw))
+               aux_output_end(&cpuhw->handle);
        perf_event_update_userpage(event);
        perf_pmu_enable(event->pmu);
 }
@@ -1448,6 +1869,9 @@ static struct pmu cpumf_sampling = {
        .read         = cpumsf_pmu_read,
 
        .attr_groups  = cpumsf_pmu_attr_groups,
+
+       .setup_aux    = aux_buffer_setup,
+       .free_aux     = aux_buffer_free,
 };
 
 static void cpumf_measurement_alert(struct ext_code ext_code,
@@ -1471,7 +1895,10 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
        /* Program alert request */
        if (alert & CPU_MF_INT_SF_PRA) {
                if (cpuhw->flags & PMU_F_IN_USE)
-                       hw_perf_event_update(cpuhw->event, 0);
+                       if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
+                               hw_collect_aux(cpuhw);
+                       else
+                               hw_perf_event_update(cpuhw->event, 0);
                else
                        WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
        }
@@ -1590,6 +2017,9 @@ static int __init init_cpum_sampling_pmu(void)
                return -ENODEV;
        }
 
+       if (!si.as && !si.ad)
+               return -ENODEV;
+
        if (si.bsdes != sizeof(struct hws_basic_entry)) {
                pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
                return -EINVAL;
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
new file mode 100644 (file)
index 0000000..f8603eb
--- /dev/null
@@ -0,0 +1,70 @@
+#include <linux/perf_event.h>
+#include <linux/perf_regs.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <asm/ptrace.h>
+#include <asm/fpu/api.h>
+#include <asm/fpu/types.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+       freg_t fp;
+
+       if (WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX))
+               return 0;
+
+       if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15)
+               return regs->gprs[idx];
+
+       if (idx >= PERF_REG_S390_FP0 && idx <= PERF_REG_S390_FP15) {
+               if (!user_mode(regs))
+                       return 0;
+
+               idx -= PERF_REG_S390_FP0;
+               fp = MACHINE_HAS_VX ? *(freg_t *)(current->thread.fpu.vxrs + idx)
+                                   : current->thread.fpu.fprs[idx];
+               return fp.ui;
+       }
+
+       if (idx == PERF_REG_S390_MASK)
+               return regs->psw.mask;
+       if (idx == PERF_REG_S390_PC)
+               return regs->psw.addr;
+
+       return regs->gprs[idx];
+}
+
+#define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+       if (!mask || mask & REG_RESERVED)
+               return -EINVAL;
+
+       return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+       if (test_tsk_thread_flag(task, TIF_31BIT))
+               return PERF_SAMPLE_REGS_ABI_32;
+
+       return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+                       struct pt_regs *regs,
+                       struct pt_regs *regs_user_copy)
+{
+       /*
+        * Use the regs from the first interruption and let
+        * perf_sample_regs_intr() handle interrupts (regs == get_irq_regs()).
+        *
+        * Also save FPU registers for user-space tasks only.
+        */
+       regs_user->regs = task_pt_regs(current);
+       if (user_mode(regs_user->regs))
+               save_fpu_regs();
+       regs_user->abi = perf_reg_abi(current);
+}
index 0520854a4dab27f7ac0feef52311a765686aebf1..39a218703c50add3defe9aa2ce5a85f88188e776 100644 (file)
@@ -158,16 +158,9 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore)
 {
        unsigned long segment_table, page_table, page_frame;
        struct vdso_per_cpu_data *vd;
-       u32 *psal, *aste;
-       int i;
-
-       lowcore->vdso_per_cpu_data = __LC_PASTE;
-
-       if (!vdso_enabled)
-               return 0;
 
        segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
-       page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
+       page_table = get_zeroed_page(GFP_KERNEL);
        page_frame = get_zeroed_page(GFP_KERNEL);
        if (!segment_table || !page_table || !page_frame)
                goto out;
@@ -179,25 +172,15 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore)
        vd->cpu_nr = lowcore->cpu_nr;
        vd->node_id = cpu_to_node(vd->cpu_nr);
 
-       /* Set up access register mode page table */
+       /* Set up page table for the vdso address space */
        memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES);
        memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE);
 
        *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
        *(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
 
-       psal = (u32 *) (page_table + 256*sizeof(unsigned long));
-       aste = psal + 32;
-
-       for (i = 4; i < 32; i += 4)
-               psal[i] = 0x80000000;
-
-       lowcore->paste[4] = (u32)(addr_t) psal;
-       psal[0] = 0x02000000;
-       psal[2] = (u32)(addr_t) aste;
-       *(unsigned long *) (aste + 2) = segment_table +
+       lowcore->vdso_asce = segment_table +
                _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
-       aste[4] = (u32)(addr_t) psal;
        lowcore->vdso_per_cpu_data = page_frame;
 
        return 0;
@@ -212,14 +195,8 @@ out:
 void vdso_free_per_cpu(struct lowcore *lowcore)
 {
        unsigned long segment_table, page_table, page_frame;
-       u32 *psal, *aste;
-
-       if (!vdso_enabled)
-               return;
 
-       psal = (u32 *)(addr_t) lowcore->paste[4];
-       aste = (u32 *)(addr_t) psal[2];
-       segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
+       segment_table = lowcore->vdso_asce & PAGE_MASK;
        page_table = *(unsigned long *) segment_table;
        page_frame = *(unsigned long *) page_table;
 
@@ -228,16 +205,6 @@ void vdso_free_per_cpu(struct lowcore *lowcore)
        free_pages(segment_table, SEGMENT_ORDER);
 }
 
-static void vdso_init_cr5(void)
-{
-       unsigned long cr5;
-
-       if (!vdso_enabled)
-               return;
-       cr5 = offsetof(struct lowcore, paste);
-       __ctl_load(cr5, 5, 5);
-}
-
 /*
  * This is called from binfmt_elf, we create the special vma for the
  * vDSO and insert it into the mm struct tree
@@ -314,8 +281,6 @@ static int __init vdso_init(void)
 {
        int i;
 
-       if (!vdso_enabled)
-               return 0;
        vdso_init_data(vdso_data);
 #ifdef CONFIG_COMPAT
        /* Calculate the size of the 32 bit vDSO */
@@ -354,7 +319,6 @@ static int __init vdso_init(void)
        vdso64_pagelist[vdso64_pages] = NULL;
        if (vdso_alloc_per_cpu(&S390_lowcore))
                BUG();
-       vdso_init_cr5();
 
        get_page(virt_to_page(vdso_data));
 
index 6e30769dd017654550617efa39f02a07814dd5ca..5477a2c112fb800e3c1013cb17a03a1d6aa47228 100644 (file)
        .type  __kernel_getcpu,@function
 __kernel_getcpu:
        .cfi_startproc
-       ear     %r1,%a4
-       lhi     %r4,1
-       sll     %r4,24
-       sar     %a4,%r4
        la      %r4,0
-       epsw    %r0,0
-       sacf    512
+       sacf    256
        l       %r5,__VDSO_CPU_NR(%r4)
        l       %r4,__VDSO_NODE_ID(%r4)
-       tml     %r0,0x4000
-       jo      1f
-       tml     %r0,0x8000
-       jno     0f
-       sacf    256
-       j       1f
-0:     sacf    0
-1:     sar     %a4,%r1
+       sacf    0
        ltr     %r2,%r2
        jz      2f
        st      %r5,0(%r2)
index 9c3b12626dbae06a77a12b4e186a4bb0eb253e09..5d7b56b49458d03ba885f105a9c07bcdecea019a 100644 (file)
@@ -114,23 +114,12 @@ __kernel_clock_gettime:
        br      %r14
 
        /* CPUCLOCK_VIRT for this thread */
-9:     icm     %r0,15,__VDSO_ECTG_OK(%r5)
+9:     lghi    %r4,0
+       icm     %r0,15,__VDSO_ECTG_OK(%r5)
        jz      12f
-       ear     %r2,%a4
-       llilh   %r4,0x0100
-       sar     %a4,%r4
-       lghi    %r4,0
-       epsw    %r5,0
-       sacf    512                             /* Magic ectg instruction */
+       sacf    256                             /* Magic ectg instruction */
        .insn   ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
-       tml     %r5,0x4000
-       jo      11f
-       tml     %r5,0x8000
-       jno     10f
-       sacf    256
-       j       11f
-10:    sacf    0
-11:    sar     %a4,%r2
+       sacf    0
        algr    %r1,%r0                         /* r1 = cputime as TOD value */
        mghi    %r1,1000                        /* convert to nanoseconds */
        srlg    %r1,%r1,12                      /* r1 = cputime in nanosec */
index 43983764b959827951ddb121ff8158dabbfa1a12..e9c34364d97b1aafdef2b0fa24c3ece518e3fe53 100644 (file)
        .type  __kernel_getcpu,@function
 __kernel_getcpu:
        .cfi_startproc
-       ear     %r1,%a4
-       llilh   %r4,0x0100
-       sar     %a4,%r4
        la      %r4,0
-       epsw    %r0,0
-       sacf    512
+       sacf    256
        l       %r5,__VDSO_CPU_NR(%r4)
        l       %r4,__VDSO_NODE_ID(%r4)
-       tml     %r0,0x4000
-       jo      1f
-       tml     %r0,0x8000
-       jno     0f
-       sacf    256
-       j       1f
-0:     sacf    0
-1:     sar     %a4,%r1
+       sacf    0
        ltgr    %r2,%r2
        jz      2f
        st      %r5,0(%r2)
index 84c0faeaf7ea7c6dab061e6cc4993fcf642cc140..30a7c8c299646db0cf228d1509b6313bbf59b51e 100644 (file)
@@ -78,7 +78,7 @@ static inline int arch_load_niai4(int *lock)
                ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */
                "       l       %0,%1\n"
                : "=d" (owner) : "Q" (*lock) : "memory");
-       return owner;
+       return owner;
 }
 
 static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
@@ -226,9 +226,10 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
                /* Try to get the lock if it is free. */
                if (!owner) {
                        new = (old & _Q_TAIL_MASK) | lockval;
-                       if (arch_cmpxchg_niai8(&lp->lock, old, new))
+                       if (arch_cmpxchg_niai8(&lp->lock, old, new)) {
                                /* Got the lock */
-                              return;
+                               return;
+                       }
                        continue;
                }
                if (count-- >= 0)
index 802903c50de125f54f8b4f3713e243c0b85a5b6e..cae5a1e16cbd2d9ac5cc7b2fd1f67443919b8f80 100644 (file)
@@ -40,10 +40,67 @@ static inline int copy_with_mvcos(void)
 }
 #endif
 
+void set_fs(mm_segment_t fs)
+{
+       current->thread.mm_segment = fs;
+       if (fs == USER_DS) {
+               __ctl_load(S390_lowcore.user_asce, 1, 1);
+               clear_cpu_flag(CIF_ASCE_PRIMARY);
+       } else {
+               __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+               set_cpu_flag(CIF_ASCE_PRIMARY);
+       }
+       if (fs & 1) {
+               if (fs == USER_DS_SACF)
+                       __ctl_load(S390_lowcore.user_asce, 7, 7);
+               else
+                       __ctl_load(S390_lowcore.kernel_asce, 7, 7);
+               set_cpu_flag(CIF_ASCE_SECONDARY);
+       }
+}
+EXPORT_SYMBOL(set_fs);
+
+mm_segment_t enable_sacf_uaccess(void)
+{
+       mm_segment_t old_fs;
+       unsigned long asce, cr;
+
+       old_fs = current->thread.mm_segment;
+       if (old_fs & 1)
+               return old_fs;
+       current->thread.mm_segment |= 1;
+       asce = S390_lowcore.kernel_asce;
+       if (likely(old_fs == USER_DS)) {
+               __ctl_store(cr, 1, 1);
+               if (cr != S390_lowcore.kernel_asce) {
+                       __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+                       set_cpu_flag(CIF_ASCE_PRIMARY);
+               }
+               asce = S390_lowcore.user_asce;
+       }
+       __ctl_store(cr, 7, 7);
+       if (cr != asce) {
+               __ctl_load(asce, 7, 7);
+               set_cpu_flag(CIF_ASCE_SECONDARY);
+       }
+       return old_fs;
+}
+EXPORT_SYMBOL(enable_sacf_uaccess);
+
+void disable_sacf_uaccess(mm_segment_t old_fs)
+{
+       if (old_fs == USER_DS && test_facility(27)) {
+               __ctl_load(S390_lowcore.user_asce, 1, 1);
+               clear_cpu_flag(CIF_ASCE_PRIMARY);
+       }
+       current->thread.mm_segment = old_fs;
+}
+EXPORT_SYMBOL(disable_sacf_uaccess);
+
 static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
                                                 unsigned long size)
 {
-       register unsigned long reg0 asm("0") = 0x81UL;
+       register unsigned long reg0 asm("0") = 0x01UL;
        unsigned long tmp1, tmp2;
 
        tmp1 = -4096UL;
@@ -74,8 +131,9 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
                                                unsigned long size)
 {
        unsigned long tmp1, tmp2;
+       mm_segment_t old_fs;
 
-       load_kernel_asce();
+       old_fs = enable_sacf_uaccess();
        tmp1 = -256UL;
        asm volatile(
                "   sacf  0\n"
@@ -102,6 +160,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
                EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
                : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
                : : "cc", "memory");
+       disable_sacf_uaccess(old_fs);
        return size;
 }
 
@@ -116,7 +175,7 @@ EXPORT_SYMBOL(raw_copy_from_user);
 static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
                                               unsigned long size)
 {
-       register unsigned long reg0 asm("0") = 0x810000UL;
+       register unsigned long reg0 asm("0") = 0x010000UL;
        unsigned long tmp1, tmp2;
 
        tmp1 = -4096UL;
@@ -147,8 +206,9 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
                                              unsigned long size)
 {
        unsigned long tmp1, tmp2;
+       mm_segment_t old_fs;
 
-       load_kernel_asce();
+       old_fs = enable_sacf_uaccess();
        tmp1 = -256UL;
        asm volatile(
                "   sacf  0\n"
@@ -175,6 +235,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
                EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
                : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
                : : "cc", "memory");
+       disable_sacf_uaccess(old_fs);
        return size;
 }
 
@@ -189,7 +250,7 @@ EXPORT_SYMBOL(raw_copy_to_user);
 static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from,
                                               unsigned long size)
 {
-       register unsigned long reg0 asm("0") = 0x810081UL;
+       register unsigned long reg0 asm("0") = 0x010001UL;
        unsigned long tmp1, tmp2;
 
        tmp1 = -4096UL;
@@ -212,9 +273,10 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use
 static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from,
                                             unsigned long size)
 {
+       mm_segment_t old_fs;
        unsigned long tmp1;
 
-       load_kernel_asce();
+       old_fs = enable_sacf_uaccess();
        asm volatile(
                "   sacf  256\n"
                "   aghi  %0,-1\n"
@@ -238,6 +300,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
                EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
                : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
                : : "cc", "memory");
+       disable_sacf_uaccess(old_fs);
        return size;
 }
 
@@ -251,7 +314,7 @@ EXPORT_SYMBOL(raw_copy_in_user);
 
 static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
 {
-       register unsigned long reg0 asm("0") = 0x810000UL;
+       register unsigned long reg0 asm("0") = 0x010000UL;
        unsigned long tmp1, tmp2;
 
        tmp1 = -4096UL;
@@ -279,9 +342,10 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
 
 static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
 {
+       mm_segment_t old_fs;
        unsigned long tmp1, tmp2;
 
-       load_kernel_asce();
+       old_fs = enable_sacf_uaccess();
        asm volatile(
                "   sacf  256\n"
                "   aghi  %0,-1\n"
@@ -310,6 +374,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
                EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
                : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
                : : "cc", "memory");
+       disable_sacf_uaccess(old_fs);
        return size;
 }
 
@@ -345,10 +410,15 @@ static inline unsigned long strnlen_user_srst(const char __user *src,
 
 unsigned long __strnlen_user(const char __user *src, unsigned long size)
 {
+       mm_segment_t old_fs;
+       unsigned long len;
+
        if (unlikely(!size))
                return 0;
-       load_kernel_asce();
-       return strnlen_user_srst(src, size);
+       old_fs = enable_sacf_uaccess();
+       len = strnlen_user_srst(src, size);
+       disable_sacf_uaccess(old_fs);
+       return len;
 }
 EXPORT_SYMBOL(__strnlen_user);
 
index 242b78c0a9ec84ba7a956d197cdd887bf86c9ebd..93faeca522841ba4e1aec2164f52e476b8c265bf 100644 (file)
 #define VM_FAULT_SIGNAL                0x080000
 #define VM_FAULT_PFAULT                0x100000
 
+enum fault_type {
+       KERNEL_FAULT,
+       USER_FAULT,
+       VDSO_FAULT,
+       GMAP_FAULT,
+};
+
 static unsigned long store_indication __read_mostly;
 
 static int __init fault_init(void)
@@ -99,27 +106,34 @@ void bust_spinlocks(int yes)
 }
 
 /*
- * Returns the address space associated with the fault.
- * Returns 0 for kernel space and 1 for user space.
+ * Find out which address space caused the exception.
+ * Access register mode is impossible, ignore space == 3.
  */
-static inline int user_space_fault(struct pt_regs *regs)
+static inline enum fault_type get_fault_type(struct pt_regs *regs)
 {
        unsigned long trans_exc_code;
 
-       /*
-        * The lowest two bits of the translation exception
-        * identification indicate which paging table was used.
-        */
        trans_exc_code = regs->int_parm_long & 3;
-       if (trans_exc_code == 3) /* home space -> kernel */
-               return 0;
-       if (user_mode(regs))
-               return 1;
-       if (trans_exc_code == 2) /* secondary space -> set_fs */
-               return current->thread.mm_segment.ar4;
-       if (current->flags & PF_VCPU)
-               return 1;
-       return 0;
+       if (likely(trans_exc_code == 0)) {
+               /* primary space exception */
+               if (IS_ENABLED(CONFIG_PGSTE) &&
+                   test_pt_regs_flag(regs, PIF_GUEST_FAULT))
+                       return GMAP_FAULT;
+               if (current->thread.mm_segment == USER_DS)
+                       return USER_FAULT;
+               return KERNEL_FAULT;
+       }
+       if (trans_exc_code == 2) {
+               /* secondary space exception */
+               if (current->thread.mm_segment & 1) {
+                       if (current->thread.mm_segment == USER_DS_SACF)
+                               return USER_FAULT;
+                       return KERNEL_FAULT;
+               }
+               return VDSO_FAULT;
+       }
+       /* home space exception -> access via kernel ASCE */
+       return KERNEL_FAULT;
 }
 
 static int bad_address(void *p)
@@ -204,20 +218,23 @@ static void dump_fault_info(struct pt_regs *regs)
                break;
        }
        pr_cont("mode while using ");
-       if (!user_space_fault(regs)) {
-               asce = S390_lowcore.kernel_asce;
-               pr_cont("kernel ");
-       }
-#ifdef CONFIG_PGSTE
-       else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
-               struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
-               asce = gmap->asce;
-               pr_cont("gmap ");
-       }
-#endif
-       else {
+       switch (get_fault_type(regs)) {
+       case USER_FAULT:
                asce = S390_lowcore.user_asce;
                pr_cont("user ");
+               break;
+       case VDSO_FAULT:
+               asce = S390_lowcore.vdso_asce;
+               pr_cont("vdso ");
+               break;
+       case GMAP_FAULT:
+               asce = ((struct gmap *) S390_lowcore.gmap)->asce;
+               pr_cont("gmap ");
+               break;
+       case KERNEL_FAULT:
+               asce = S390_lowcore.kernel_asce;
+               pr_cont("kernel ");
+               break;
        }
        pr_cont("ASCE.\n");
        dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
@@ -273,7 +290,7 @@ static noinline void do_no_context(struct pt_regs *regs)
         * Oops. The kernel tried to access some bad page. We'll have to
         * terminate things with extreme prejudice.
         */
-       if (!user_space_fault(regs))
+       if (get_fault_type(regs) == KERNEL_FAULT)
                printk(KERN_ALERT "Unable to handle kernel pointer dereference"
                       " in virtual kernel address space\n");
        else
@@ -395,12 +412,11 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, int fault)
  */
 static inline int do_exception(struct pt_regs *regs, int access)
 {
-#ifdef CONFIG_PGSTE
        struct gmap *gmap;
-#endif
        struct task_struct *tsk;
        struct mm_struct *mm;
        struct vm_area_struct *vma;
+       enum fault_type type;
        unsigned long trans_exc_code;
        unsigned long address;
        unsigned int flags;
@@ -425,8 +441,19 @@ static inline int do_exception(struct pt_regs *regs, int access)
         * user context.
         */
        fault = VM_FAULT_BADCONTEXT;
-       if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm))
+       type = get_fault_type(regs);
+       switch (type) {
+       case KERNEL_FAULT:
+               goto out;
+       case VDSO_FAULT:
+               fault = VM_FAULT_BADMAP;
                goto out;
+       case USER_FAULT:
+       case GMAP_FAULT:
+               if (faulthandler_disabled() || !mm)
+                       goto out;
+               break;
+       }
 
        address = trans_exc_code & __FAIL_ADDR_MASK;
        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
@@ -437,10 +464,9 @@ static inline int do_exception(struct pt_regs *regs, int access)
                flags |= FAULT_FLAG_WRITE;
        down_read(&mm->mmap_sem);
 
-#ifdef CONFIG_PGSTE
-       gmap = (current->flags & PF_VCPU) ?
-               (struct gmap *) S390_lowcore.gmap : NULL;
-       if (gmap) {
+       gmap = NULL;
+       if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
+               gmap = (struct gmap *) S390_lowcore.gmap;
                current->thread.gmap_addr = address;
                current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
                current->thread.gmap_int_code = regs->int_code & 0xffff;
@@ -452,7 +478,6 @@ static inline int do_exception(struct pt_regs *regs, int access)
                if (gmap->pfault_enabled)
                        flags |= FAULT_FLAG_RETRY_NOWAIT;
        }
-#endif
 
 retry:
        fault = VM_FAULT_BADMAP;
@@ -507,15 +532,14 @@ retry:
                                      regs, address);
                }
                if (fault & VM_FAULT_RETRY) {
-#ifdef CONFIG_PGSTE
-                       if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+                       if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
+                           (flags & FAULT_FLAG_RETRY_NOWAIT)) {
                                /* FAULT_FLAG_RETRY_NOWAIT has been set,
                                 * mmap_sem has not been released */
                                current->thread.gmap_pfault = 1;
                                fault = VM_FAULT_PFAULT;
                                goto out_up;
                        }
-#endif
                        /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
                         * of starvation. */
                        flags &= ~(FAULT_FLAG_ALLOW_RETRY |
@@ -525,8 +549,7 @@ retry:
                        goto retry;
                }
        }
-#ifdef CONFIG_PGSTE
-       if (gmap) {
+       if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
                address =  __gmap_link(gmap, current->thread.gmap_addr,
                                       address);
                if (address == -EFAULT) {
@@ -538,7 +561,6 @@ retry:
                        goto out_up;
                }
        }
-#endif
        fault = 0;
 out_up:
        up_read(&mm->mmap_sem);
@@ -706,7 +728,7 @@ static void pfault_interrupt(struct ext_code ext_code,
                return;
        inc_irq_stat(IRQEXT_PFL);
        /* Get the token (= pid of the affected task). */
-       pid = param64 & LPP_PFAULT_PID_MASK;
+       pid = param64 & LPP_PID_MASK;
        rcu_read_lock();
        tsk = find_task_by_pid_ns(pid, &init_pid_ns);
        if (tsk)
index 2f66290c9b9273b2bf0295742bb5e56c0d51f1d6..b2c140193b0af72273ffcbafd78a4ee38417ca42 100644 (file)
@@ -1187,12 +1187,11 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
                                unsigned long *sgt)
 {
-       unsigned long asce, *pgt;
+       unsigned long *pgt;
        struct page *page;
        int i;
 
        BUG_ON(!gmap_is_shadow(sg));
-       asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT;
        for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
                if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
                        continue;
@@ -1245,12 +1244,11 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
                                unsigned long *r3t)
 {
-       unsigned long asce, *sgt;
+       unsigned long *sgt;
        struct page *page;
        int i;
 
        BUG_ON(!gmap_is_shadow(sg));
-       asce = (unsigned long) r3t | _ASCE_TYPE_REGION3;
        for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
                if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
                        continue;
@@ -1303,12 +1301,11 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
                                unsigned long *r2t)
 {
-       unsigned long asce, *r3t;
+       unsigned long *r3t;
        struct page *page;
        int i;
 
        BUG_ON(!gmap_is_shadow(sg));
-       asce = (unsigned long) r2t | _ASCE_TYPE_REGION2;
        for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
                if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
                        continue;
index 817c9e16e83e5d3fe5e75b7e3b440cea5b23fe95..671535e64abab615afca53c4b57ce89863981f9a 100644 (file)
@@ -95,6 +95,7 @@ void __init paging_init(void)
        }
        init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
        S390_lowcore.kernel_asce = init_mm.context.asce;
+       S390_lowcore.user_asce = S390_lowcore.kernel_asce;
        crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
        vmem_map_init();
 
index 4ad4c4f77b4d9fa0362ed42a6e8474e443285e26..434a9564917beceadeffd0f34d041683b717af1c 100644 (file)
@@ -71,10 +71,8 @@ static void __crst_table_upgrade(void *arg)
 {
        struct mm_struct *mm = arg;
 
-       if (current->active_mm == mm) {
-               clear_user_asce();
+       if (current->active_mm == mm)
                set_user_asce(mm);
-       }
        __tlb_flush_local();
 }
 
index 2ebf2872cc16b6bc84990e30100501da87682ce5..2e70e25de07aea8de3d405b3c04d14299329f7ee 100644 (file)
@@ -21,4 +21,4 @@ include/generated/facilities.h: $(obj)/gen_facilities FORCE
        $(call filechk,facilities.h)
 
 include/generated/dis.h: $(obj)/gen_opcode_table FORCE
-       $(call filechk,dis.h,__FUN)
+       $(call filechk,dis.h)
index 353f0bebcf8c55e9d7aaa4b5be1eeb93e7831598..8c9d412b6d33bd14b873446260b26caacd31642c 100644 (file)
@@ -282,9 +282,9 @@ static void raw3215_start_io(struct raw3215_info *raw)
 /*
  * Function to start a delayed output after RAW3215_TIMEOUT seconds
  */
-static void raw3215_timeout(unsigned long __data)
+static void raw3215_timeout(struct timer_list *t)
 {
-       struct raw3215_info *raw = (struct raw3215_info *) __data;
+       struct raw3215_info *raw = from_timer(raw, t, timer);
        unsigned long flags;
 
        spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags);
@@ -670,7 +670,7 @@ static struct raw3215_info *raw3215_alloc_info(void)
                return NULL;
        }
 
-       setup_timer(&info->timer, raw3215_timeout, (unsigned long)info);
+       timer_setup(&info->timer, raw3215_timeout, 0);
        init_waitqueue_head(&info->empty_wait);
        tasklet_init(&info->tlet, raw3215_wakeup, (unsigned long)info);
        tty_port_init(&info->port);
index be3e3c1206c242bc849b732d1ad09f1e8f9a8afc..fd2146bcc0add9aae3b71ba4cc88b788b7702591 100644 (file)
@@ -69,7 +69,7 @@ static struct con3270 *condev;
 #define CON_UPDATE_STATUS      4       /* Update status line. */
 #define CON_UPDATE_ALL         8       /* Recreate screen. */
 
-static void con3270_update(struct con3270 *);
+static void con3270_update(struct timer_list *);
 
 /*
  * Setup timeout for a device. On timeout trigger an update.
@@ -205,8 +205,9 @@ con3270_write_callback(struct raw3270_request *rq, void *data)
  * Update console display.
  */
 static void
-con3270_update(struct con3270 *cp)
+con3270_update(struct timer_list *t)
 {
+       struct con3270 *cp = from_timer(cp, t, timer);
        struct raw3270_request *wrq;
        char wcc, prolog[6];
        unsigned long flags;
@@ -552,7 +553,7 @@ con3270_flush(void)
        con3270_update_status(cp);
        while (cp->update_flags != 0) {
                spin_unlock_irqrestore(&cp->view.lock, flags);
-               con3270_update(cp);
+               con3270_update(&cp->timer);
                spin_lock_irqsave(&cp->view.lock, flags);
                con3270_wait_write(cp);
        }
@@ -623,8 +624,7 @@ con3270_init(void)
 
        INIT_LIST_HEAD(&condev->lines);
        INIT_LIST_HEAD(&condev->update);
-       setup_timer(&condev->timer, (void (*)(unsigned long)) con3270_update,
-                   (unsigned long) condev);
+       timer_setup(&condev->timer, con3270_update, 0);
        tasklet_init(&condev->readlet, 
                     (void (*)(unsigned long)) con3270_read_tasklet,
                     (unsigned long) condev->read);
index 41d8aa96801f2922e399e8a7cc0a16059c1ea27f..9b4c61c1e3097e8888e9e4b6fbacad76e30df2b8 100644 (file)
@@ -136,6 +136,7 @@ static enum sclp_suspend_state_t {
 #define SCLP_BUSY_INTERVAL     10
 #define SCLP_RETRY_INTERVAL    30
 
+static void sclp_request_timeout(bool force_restart);
 static void sclp_process_queue(void);
 static void __sclp_make_read_req(void);
 static int sclp_init_mask(int calculate);
@@ -154,25 +155,32 @@ __sclp_queue_read_req(void)
 
 /* Set up request retry timer. Called while sclp_lock is locked. */
 static inline void
-__sclp_set_request_timer(unsigned long time, void (*function)(unsigned long),
-                        unsigned long data)
+__sclp_set_request_timer(unsigned long time, void (*cb)(struct timer_list *))
 {
        del_timer(&sclp_request_timer);
-       sclp_request_timer.function = function;
-       sclp_request_timer.data = data;
+       sclp_request_timer.function = (TIMER_FUNC_TYPE)cb;
        sclp_request_timer.expires = jiffies + time;
        add_timer(&sclp_request_timer);
 }
 
-/* Request timeout handler. Restart the request queue. If DATA is non-zero,
+static void sclp_request_timeout_restart(struct timer_list *unused)
+{
+       sclp_request_timeout(true);
+}
+
+static void sclp_request_timeout_normal(struct timer_list *unused)
+{
+       sclp_request_timeout(false);
+}
+
+/* Request timeout handler. Restart the request queue. If force_restart,
  * force restart of running request. */
-static void
-sclp_request_timeout(unsigned long data)
+static void sclp_request_timeout(bool force_restart)
 {
        unsigned long flags;
 
        spin_lock_irqsave(&sclp_lock, flags);
-       if (data) {
+       if (force_restart) {
                if (sclp_running_state == sclp_running_state_running) {
                        /* Break running state and queue NOP read event request
                         * to get a defined interface state. */
@@ -181,7 +189,7 @@ sclp_request_timeout(unsigned long data)
                }
        } else {
                __sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ,
-                                        sclp_request_timeout, 0);
+                                        sclp_request_timeout_normal);
        }
        spin_unlock_irqrestore(&sclp_lock, flags);
        sclp_process_queue();
@@ -239,7 +247,7 @@ out:
  * invokes callback. This timer can be set per request in situations where
  * waiting too long would be harmful to the system, e.g. during SE reboot.
  */
-static void sclp_req_queue_timeout(unsigned long data)
+static void sclp_req_queue_timeout(struct timer_list *unused)
 {
        unsigned long flags, expires_next;
        struct sclp_req *req;
@@ -276,12 +284,12 @@ __sclp_start_request(struct sclp_req *req)
                req->status = SCLP_REQ_RUNNING;
                sclp_running_state = sclp_running_state_running;
                __sclp_set_request_timer(SCLP_RETRY_INTERVAL * HZ,
-                                        sclp_request_timeout, 1);
+                                        sclp_request_timeout_restart);
                return 0;
        } else if (rc == -EBUSY) {
                /* Try again later */
                __sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ,
-                                        sclp_request_timeout, 0);
+                                        sclp_request_timeout_normal);
                return 0;
        }
        /* Request failed */
@@ -315,7 +323,7 @@ sclp_process_queue(void)
                        /* Cannot abort already submitted request - could still
                         * be active at the SCLP */
                        __sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ,
-                                                sclp_request_timeout, 0);
+                                                sclp_request_timeout_normal);
                        break;
                }
 do_post:
@@ -558,7 +566,7 @@ sclp_sync_wait(void)
                if (timer_pending(&sclp_request_timer) &&
                    get_tod_clock_fast() > timeout &&
                    del_timer(&sclp_request_timer))
-                       sclp_request_timer.function(sclp_request_timer.data);
+                       sclp_request_timer.function((TIMER_DATA_TYPE)&sclp_request_timer);
                cpu_relax();
        }
        local_irq_disable();
@@ -915,7 +923,7 @@ static void sclp_check_handler(struct ext_code ext_code,
 
 /* Initial init mask request timed out. Modify request state to failed. */
 static void
-sclp_check_timeout(unsigned long data)
+sclp_check_timeout(struct timer_list *unused)
 {
        unsigned long flags;
 
@@ -954,7 +962,7 @@ sclp_check_interface(void)
                sclp_init_req.status = SCLP_REQ_RUNNING;
                sclp_running_state = sclp_running_state_running;
                __sclp_set_request_timer(SCLP_RETRY_INTERVAL * HZ,
-                                        sclp_check_timeout, 0);
+                                        sclp_check_timeout);
                spin_unlock_irqrestore(&sclp_lock, flags);
                /* Enable service-signal interruption - needs to happen
                 * with IRQs enabled. */
@@ -1159,9 +1167,8 @@ sclp_init(void)
        INIT_LIST_HEAD(&sclp_req_queue);
        INIT_LIST_HEAD(&sclp_reg_list);
        list_add(&sclp_state_change_event.list, &sclp_reg_list);
-       init_timer(&sclp_request_timer);
-       init_timer(&sclp_queue_timer);
-       sclp_queue_timer.function = sclp_req_queue_timeout;
+       timer_setup(&sclp_request_timer, NULL, 0);
+       timer_setup(&sclp_queue_timer, sclp_req_queue_timeout, 0);
        /* Check interface */
        spin_unlock_irqrestore(&sclp_lock, flags);
        rc = sclp_check_interface();
index 7027e61a6931028a2f2e5230110493f9659eb305..8966a1c1b5489ab37d823e5710838ea8bef7b912 100644 (file)
@@ -125,7 +125,7 @@ static void sclp_console_sync_queue(void)
  * temporary write buffer without further waiting on a final new line.
  */
 static void
-sclp_console_timeout(unsigned long data)
+sclp_console_timeout(struct timer_list *unused)
 {
        sclp_conbuf_emit();
 }
@@ -211,7 +211,6 @@ sclp_console_write(struct console *console, const char *message,
        /* Setup timer to output current console buffer after 1/10 second */
        if (sclp_conbuf != NULL && sclp_chars_in_buffer(sclp_conbuf) != 0 &&
            !timer_pending(&sclp_con_timer)) {
-               setup_timer(&sclp_con_timer, sclp_console_timeout, 0UL);
                mod_timer(&sclp_con_timer, jiffies + HZ / 10);
        }
 out:
@@ -332,7 +331,7 @@ sclp_console_init(void)
        INIT_LIST_HEAD(&sclp_con_outqueue);
        spin_lock_init(&sclp_con_lock);
        sclp_conbuf = NULL;
-       init_timer(&sclp_con_timer);
+       timer_setup(&sclp_con_timer, sclp_console_timeout, 0);
 
        /* Set output format */
        if (MACHINE_IS_VM)
index 1cceefdc03e08850b9cbfecd9a7d5126ecf112b5..9f7b87d6d4349f25c07a2fe421f1a3130efe3903 100644 (file)
@@ -151,7 +151,7 @@ __sclp_ttybuf_emit(struct sclp_buffer *buffer)
  * temporary write buffer.
  */
 static void
-sclp_tty_timeout(unsigned long data)
+sclp_tty_timeout(struct timer_list *unused)
 {
        unsigned long flags;
        struct sclp_buffer *buf;
@@ -218,7 +218,6 @@ static int sclp_tty_write_string(const unsigned char *str, int count, int may_fa
        /* Setup timer to output current console buffer after 1/10 second */
        if (sclp_ttybuf && sclp_chars_in_buffer(sclp_ttybuf) &&
            !timer_pending(&sclp_tty_timer)) {
-               setup_timer(&sclp_tty_timer, sclp_tty_timeout, 0UL);
                mod_timer(&sclp_tty_timer, jiffies + HZ / 10);
        }
        spin_unlock_irqrestore(&sclp_tty_lock, flags);
@@ -526,7 +525,7 @@ sclp_tty_init(void)
        }
        INIT_LIST_HEAD(&sclp_tty_outqueue);
        spin_lock_init(&sclp_tty_lock);
-       init_timer(&sclp_tty_timer);
+       timer_setup(&sclp_tty_timer, sclp_tty_timeout, 0);
        sclp_ttybuf = NULL;
        sclp_tty_buffer_count = 0;
        if (MACHINE_IS_VM) {
index e84395d713896c0271825642b047f441452f93e0..3f9a6ef650fac41c938cc797ced3a62d1b10fa81 100644 (file)
@@ -357,7 +357,7 @@ sclp_vt220_add_msg(struct sclp_vt220_request *request,
  * Emit buffer after having waited long enough for more data to arrive.
  */
 static void
-sclp_vt220_timeout(unsigned long data)
+sclp_vt220_timeout(struct timer_list *unused)
 {
        sclp_vt220_emit_current();
 }
@@ -454,8 +454,6 @@ __sclp_vt220_write(const unsigned char *buf, int count, int do_schedule,
        /* Setup timer to output current console buffer after some time */
        if (sclp_vt220_current_request != NULL &&
            !timer_pending(&sclp_vt220_timer) && do_schedule) {
-               sclp_vt220_timer.function = sclp_vt220_timeout;
-               sclp_vt220_timer.data = 0UL;
                sclp_vt220_timer.expires = jiffies + BUFFER_MAX_DELAY;
                add_timer(&sclp_vt220_timer);
        }
@@ -699,7 +697,7 @@ static int __init __sclp_vt220_init(int num_pages)
        spin_lock_init(&sclp_vt220_lock);
        INIT_LIST_HEAD(&sclp_vt220_empty);
        INIT_LIST_HEAD(&sclp_vt220_outqueue);
-       init_timer(&sclp_vt220_timer);
+       timer_setup(&sclp_vt220_timer, sclp_vt220_timeout, 0);
        tty_port_init(&sclp_vt220_port);
        sclp_vt220_current_request = NULL;
        sclp_vt220_buffered_chars = 0;
index 9dd4534823b399fefff52511315500d465fce490..32503a60ee851698049c2fc1221ce01c581ebb6a 100644 (file)
@@ -32,7 +32,7 @@
 
 static void __tape_do_irq (struct ccw_device *, unsigned long, struct irb *);
 static void tape_delayed_next_request(struct work_struct *);
-static void tape_long_busy_timeout(unsigned long data);
+static void tape_long_busy_timeout(struct timer_list *t);
 
 /*
  * One list to contain all tape devices of all disciplines, so
@@ -381,8 +381,7 @@ tape_generic_online(struct tape_device *device,
                return -EINVAL;
        }
 
-       init_timer(&device->lb_timeout);
-       device->lb_timeout.function = tape_long_busy_timeout;
+       timer_setup(&device->lb_timeout, tape_long_busy_timeout, 0);
 
        /* Let the discipline have a go at the device. */
        device->discipline = discipline;
@@ -867,18 +866,16 @@ tape_delayed_next_request(struct work_struct *work)
        spin_unlock_irq(get_ccwdev_lock(device->cdev));
 }
 
-static void tape_long_busy_timeout(unsigned long data)
+static void tape_long_busy_timeout(struct timer_list *t)
 {
+       struct tape_device *device = from_timer(device, t, lb_timeout);
        struct tape_request *request;
-       struct tape_device *device;
 
-       device = (struct tape_device *) data;
        spin_lock_irq(get_ccwdev_lock(device->cdev));
        request = list_entry(device->req_queue.next, struct tape_request, list);
        BUG_ON(request->status != TAPE_REQUEST_LONG_BUSY);
        DBF_LH(6, "%08x: Long busy timeout.\n", device->cdev_id);
        __tape_start_next_request(device);
-       device->lb_timeout.data = 0UL;
        tape_put_device(device);
        spin_unlock_irq(get_ccwdev_lock(device->cdev));
 }
@@ -1157,7 +1154,6 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
                if (req->status == TAPE_REQUEST_LONG_BUSY) {
                        DBF_EVENT(3, "(%08x): del timer\n", device->cdev_id);
                        if (del_timer(&device->lb_timeout)) {
-                               device->lb_timeout.data = 0UL;
                                tape_put_device(device);
                                __tape_start_next_request(device);
                        }
@@ -1212,8 +1208,6 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
                case TAPE_IO_PENDING:
                        break;
                case TAPE_IO_LONG_BUSY:
-                       device->lb_timeout.data =
-                               (unsigned long) tape_get_device(device);
                        device->lb_timeout.expires = jiffies +
                                LONG_BUSY_TIMEOUT * HZ;
                        DBF_EVENT(3, "(%08x): add timer\n", device->cdev_id);
index e5ebe2fbee2353435002ec357542efd336c47f12..e417ccd9e299891560b2b2c1e67565f0eb46df47 100644 (file)
@@ -118,7 +118,7 @@ struct tty3270 {
 #define TTY_UPDATE_STATUS      8       /* Update status line. */
 #define TTY_UPDATE_ALL         16      /* Recreate screen. */
 
-static void tty3270_update(struct tty3270 *);
+static void tty3270_update(struct timer_list *);
 static void tty3270_resize_work(struct work_struct *work);
 
 /*
@@ -361,8 +361,9 @@ tty3270_write_callback(struct raw3270_request *rq, void *data)
  * Update 3270 display.
  */
 static void
-tty3270_update(struct tty3270 *tp)
+tty3270_update(struct timer_list *t)
 {
+       struct tty3270 *tp = from_timer(tp, t, timer);
        static char invalid_sba[2] = { 0xff, 0xff };
        struct raw3270_request *wrq;
        unsigned long updated;
@@ -748,8 +749,7 @@ tty3270_alloc_view(void)
                goto out_reset;
 
        tty_port_init(&tp->port);
-       setup_timer(&tp->timer, (void (*)(unsigned long)) tty3270_update,
-                   (unsigned long) tp);
+       timer_setup(&tp->timer, tty3270_update, 0);
        tasklet_init(&tp->readlet,
                     (void (*)(unsigned long)) tty3270_read_tasklet,
                     (unsigned long) tp->read);
index e5c32f4b5287ebc4da569d403475d5624bac43ef..318d8269f5dee10c56114224b4a08b8b617a96da 100644 (file)
@@ -142,7 +142,7 @@ static void io_subchannel_shutdown(struct subchannel *);
 static int io_subchannel_sch_event(struct subchannel *, int);
 static int io_subchannel_chp_event(struct subchannel *, struct chp_link *,
                                   int);
-static void recovery_func(unsigned long data);
+static void recovery_func(struct timer_list *unused);
 
 static struct css_device_id io_subchannel_ids[] = {
        { .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, },
@@ -194,7 +194,7 @@ int __init io_subchannel_init(void)
 {
        int ret;
 
-       setup_timer(&recovery_timer, recovery_func, 0);
+       timer_setup(&recovery_timer, recovery_func, 0);
        ret = bus_register(&ccw_bus_type);
        if (ret)
                return ret;
@@ -726,7 +726,7 @@ static int io_subchannel_initialize_dev(struct subchannel *sch,
        INIT_WORK(&priv->todo_work, ccw_device_todo);
        INIT_LIST_HEAD(&priv->cmb_list);
        init_waitqueue_head(&priv->wait_q);
-       init_timer(&priv->timer);
+       timer_setup(&priv->timer, ccw_device_timeout, 0);
 
        atomic_set(&priv->onoff, 0);
        cdev->ccwlock = sch->lock;
@@ -1271,7 +1271,7 @@ static void recovery_work_func(struct work_struct *unused)
 
 static DECLARE_WORK(recovery_work, recovery_work_func);
 
-static void recovery_func(unsigned long data)
+static void recovery_func(struct timer_list *unused)
 {
        /*
         * We can't do our recovery in softirq context and it's not
index b37c22adcc7af1796fb507930f857c837690563b..f5c427ec24b12998e59ea5fd84111cfcf280730e 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <asm/ccwdev.h>
 #include <linux/atomic.h>
+#include <linux/timer.h>
 #include <linux/wait.h>
 #include <linux/notifier.h>
 #include <linux/kernel_stat.h>
@@ -134,6 +135,7 @@ int ccw_device_notify(struct ccw_device *, int);
 void ccw_device_set_disconnected(struct ccw_device *cdev);
 void ccw_device_set_notoper(struct ccw_device *cdev);
 
+void ccw_device_timeout(struct timer_list *t);
 void ccw_device_set_timeout(struct ccw_device *, int);
 void ccw_device_schedule_recovery(void);
 
index f98ea674c3d8054390a5486802fedd482f0dbe16..dd7d79d30edc440662a02432a3ad3ce822503225 100644 (file)
@@ -91,12 +91,12 @@ static void ccw_timeout_log(struct ccw_device *cdev)
 /*
  * Timeout function. It just triggers a DEV_EVENT_TIMEOUT.
  */
-static void
-ccw_device_timeout(unsigned long data)
+void
+ccw_device_timeout(struct timer_list *t)
 {
-       struct ccw_device *cdev;
+       struct ccw_device_private *priv = from_timer(priv, t, timer);
+       struct ccw_device *cdev = priv->cdev;
 
-       cdev = (struct ccw_device *) data;
        spin_lock_irq(cdev->ccwlock);
        if (timeout_log_enabled)
                ccw_timeout_log(cdev);
@@ -118,8 +118,6 @@ ccw_device_set_timeout(struct ccw_device *cdev, int expires)
                if (mod_timer(&cdev->private->timer, jiffies + expires))
                        return;
        }
-       cdev->private->timer.function = ccw_device_timeout;
-       cdev->private->timer.data = (unsigned long) cdev;
        cdev->private->timer.expires = jiffies + expires;
        add_timer(&cdev->private->timer);
 }
index d14795f7110b4021a2fa58b08b332af34573a735..ce16e4f45d440fd25538d9223db5e07566ad22f5 100644 (file)
@@ -94,9 +94,10 @@ static int eadm_subchannel_clear(struct subchannel *sch)
        return 0;
 }
 
-static void eadm_subchannel_timeout(unsigned long data)
+static void eadm_subchannel_timeout(struct timer_list *t)
 {
-       struct subchannel *sch = (struct subchannel *) data;
+       struct eadm_private *private = from_timer(private, t, timer);
+       struct subchannel *sch = private->sch;
 
        spin_lock_irq(sch->lock);
        EADM_LOG(1, "timeout");
@@ -118,8 +119,6 @@ static void eadm_subchannel_set_timeout(struct subchannel *sch, int expires)
                if (mod_timer(&private->timer, jiffies + expires))
                        return;
        }
-       private->timer.function = eadm_subchannel_timeout;
-       private->timer.data = (unsigned long) sch;
        private->timer.expires = jiffies + expires;
        add_timer(&private->timer);
 }
@@ -224,7 +223,7 @@ static int eadm_subchannel_probe(struct subchannel *sch)
                return -ENOMEM;
 
        INIT_LIST_HEAD(&private->head);
-       init_timer(&private->timer);
+       timer_setup(&private->timer, eadm_subchannel_timeout, 0);
 
        spin_lock_irq(sch->lock);
        set_eadm_private(sch, private);
index 29d6b5222f1cdb6420c9632f9825597bb524f295..a6f7c2986b94f7a4a7ad24141ea546538ef8455d 100644 (file)
@@ -393,7 +393,7 @@ int test_nonshared_ind(struct qdio_irq *);
 /* prototypes for setup */
 void qdio_inbound_processing(unsigned long data);
 void qdio_outbound_processing(unsigned long data);
-void qdio_outbound_timer(unsigned long data);
+void qdio_outbound_timer(struct timer_list *t);
 void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
                      struct irb *irb);
 int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs,
index a4ad39ba3873f64911802e567b8c19a6abbad5c6..ed4852fab44b5737fa5edae05ddd640067486304 100644 (file)
@@ -894,9 +894,9 @@ void qdio_outbound_processing(unsigned long data)
        __qdio_outbound_processing(q);
 }
 
-void qdio_outbound_timer(unsigned long data)
+void qdio_outbound_timer(struct timer_list *t)
 {
-       struct qdio_q *q = (struct qdio_q *)data;
+       struct qdio_q *q = from_timer(q, t, u.out.timer);
 
        qdio_tasklet_schedule(q);
 }
index 48b3866a9ded31401d986ea79a6d3f1629c08f74..9ae1380cbc31300f5e251f03e6027ad903b2d666 100644 (file)
@@ -252,8 +252,7 @@ static void setup_queues(struct qdio_irq *irq_ptr,
 
                tasklet_init(&q->tasklet, qdio_outbound_processing,
                             (unsigned long) q);
-               setup_timer(&q->u.out.timer, (void(*)(unsigned long))
-                           &qdio_outbound_timer, (unsigned long)q);
+               timer_setup(&q->u.out.timer, qdio_outbound_timer, 0);
        }
 }
 
index 8b5658b0bec368784f140d3bbe3b7f4428cfc8f3..faeba9db3d95999526fdf2ab0667751cd82ab1e0 100644 (file)
@@ -374,13 +374,13 @@ void ap_wait(enum ap_wait wait)
 
 /**
  * ap_request_timeout(): Handling of request timeouts
- * @data: Holds the AP device.
+ * @t: timer making this callback
  *
  * Handles request timeouts.
  */
-void ap_request_timeout(unsigned long data)
+void ap_request_timeout(struct timer_list *t)
 {
-       struct ap_queue *aq = (struct ap_queue *) data;
+       struct ap_queue *aq = from_timer(aq, t, timeout);
 
        if (ap_suspend_flag)
                return;
@@ -1203,7 +1203,7 @@ out:
        mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ);
 }
 
-static void ap_config_timeout(unsigned long ptr)
+static void ap_config_timeout(struct timer_list *unused)
 {
        if (ap_suspend_flag)
                return;
@@ -1306,7 +1306,7 @@ int __init ap_module_init(void)
                goto out_bus;
 
        /* Setup the AP bus rescan timer. */
-       setup_timer(&ap_config_timer, ap_config_timeout, 0);
+       timer_setup(&ap_config_timer, ap_config_timeout, 0);
 
        /*
         * Setup the high resultion poll timer.
index 3a0e19d87e7cebf295dca38e22910430440f5435..7e45c4d08cad40e9124913abd79b715312cbd990 100644 (file)
@@ -241,7 +241,7 @@ void ap_flush_queue(struct ap_queue *aq);
 
 void *ap_airq_ptr(void);
 void ap_wait(enum ap_wait wait);
-void ap_request_timeout(unsigned long data);
+void ap_request_timeout(struct timer_list *t);
 void ap_bus_force_rescan(void);
 
 void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *ap_msg);
index a550d40921e7b6e4c2636377140705230bc0134d..ba3a2e13b0ebe252effaa866910421234518e57d 100644 (file)
@@ -634,7 +634,7 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
        INIT_LIST_HEAD(&aq->list);
        INIT_LIST_HEAD(&aq->pendingq);
        INIT_LIST_HEAD(&aq->requestq);
-       setup_timer(&aq->timeout, ap_request_timeout, (unsigned long) aq);
+       timer_setup(&aq->timeout, ap_request_timeout, 0);
 
        return aq;
 }
index cbb8156bf5e0c4979896b3f1edd8004c93837cfb..1d91a32db08ef33bfe8e571375f243fa4871ef2c 100644 (file)
@@ -564,21 +564,24 @@ void zfcp_erp_notify(struct zfcp_erp_action *erp_action, unsigned long set_mask)
  * zfcp_erp_timeout_handler - Trigger ERP action from timed out ERP request
  * @data: ERP action (from timer data)
  */
-void zfcp_erp_timeout_handler(unsigned long data)
+void zfcp_erp_timeout_handler(struct timer_list *t)
 {
-       struct zfcp_erp_action *act = (struct zfcp_erp_action *) data;
+       struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer);
+       struct zfcp_erp_action *act = fsf_req->erp_action;
+
        zfcp_erp_notify(act, ZFCP_STATUS_ERP_TIMEDOUT);
 }
 
-static void zfcp_erp_memwait_handler(unsigned long data)
+static void zfcp_erp_memwait_handler(struct timer_list *t)
 {
-       zfcp_erp_notify((struct zfcp_erp_action *)data, 0);
+       struct zfcp_erp_action *act = from_timer(act, t, timer);
+
+       zfcp_erp_notify(act, 0);
 }
 
 static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action)
 {
-       setup_timer(&erp_action->timer, zfcp_erp_memwait_handler,
-                   (unsigned long) erp_action);
+       timer_setup(&erp_action->timer, zfcp_erp_memwait_handler, 0);
        erp_action->timer.expires = jiffies + HZ;
        add_timer(&erp_action->timer);
 }
index 8ca2ab7deaa9e471a2ce0afb3a3ed31127974b86..bf8ea4df2bb8c9fa621da9061795dd99c56ee091 100644 (file)
@@ -69,7 +69,7 @@ extern int  zfcp_erp_thread_setup(struct zfcp_adapter *);
 extern void zfcp_erp_thread_kill(struct zfcp_adapter *);
 extern void zfcp_erp_wait(struct zfcp_adapter *);
 extern void zfcp_erp_notify(struct zfcp_erp_action *, unsigned long);
-extern void zfcp_erp_timeout_handler(unsigned long);
+extern void zfcp_erp_timeout_handler(struct timer_list *t);
 
 /* zfcp_fc.c */
 extern struct kmem_cache *zfcp_fc_req_cache;
index 00fb98f7b2cd0efa4c85a2604742e9b354e6eb6b..51b81c0a06520bfaa55446aa443b3d394f4c120e 100644 (file)
 
 struct kmem_cache *zfcp_fsf_qtcb_cache;
 
-static void zfcp_fsf_request_timeout_handler(unsigned long data)
+static void zfcp_fsf_request_timeout_handler(struct timer_list *t)
 {
-       struct zfcp_adapter *adapter = (struct zfcp_adapter *) data;
+       struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer);
+       struct zfcp_adapter *adapter = fsf_req->adapter;
+
        zfcp_qdio_siosl(adapter);
        zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED,
                                "fsrth_1");
@@ -32,8 +34,7 @@ static void zfcp_fsf_request_timeout_handler(unsigned long data)
 static void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req,
                                 unsigned long timeout)
 {
-       fsf_req->timer.function = zfcp_fsf_request_timeout_handler;
-       fsf_req->timer.data = (unsigned long) fsf_req->adapter;
+       fsf_req->timer.function = (TIMER_FUNC_TYPE)zfcp_fsf_request_timeout_handler;
        fsf_req->timer.expires = jiffies + timeout;
        add_timer(&fsf_req->timer);
 }
@@ -41,8 +42,7 @@ static void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req,
 static void zfcp_fsf_start_erp_timer(struct zfcp_fsf_req *fsf_req)
 {
        BUG_ON(!fsf_req->erp_action);
-       fsf_req->timer.function = zfcp_erp_timeout_handler;
-       fsf_req->timer.data = (unsigned long) fsf_req->erp_action;
+       fsf_req->timer.function = (TIMER_FUNC_TYPE)zfcp_erp_timeout_handler;
        fsf_req->timer.expires = jiffies + 30 * HZ;
        add_timer(&fsf_req->timer);
 }
@@ -692,7 +692,7 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio,
                adapter->req_no++;
 
        INIT_LIST_HEAD(&req->list);
-       init_timer(&req->timer);
+       timer_setup(&req->timer, NULL, 0);
        init_completion(&req->completion);
 
        req->adapter = adapter;
index 63f534a0902f29e5682570d70751ccb6035d13bb..ed65e82f034efe0e76cef718fdb8a9bd07511edb 100644 (file)
@@ -53,6 +53,10 @@ ifeq ($(SRCARCH),arm64)
   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
 endif
 
+ifeq ($(ARCH),s390)
+  NO_PERF_REGS := 0
+endif
+
 ifeq ($(NO_PERF_REGS),0)
   $(call detected,CONFIG_PERF_REGS)
 endif
@@ -61,7 +65,7 @@ endif
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
 # to the check.
-ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc s390))
   NO_LIBDW_DWARF_UNWIND := 1
 endif
 
index 792d4c2772250c27ad22d55e95e7636fa97ee80a..671553525f41548e1e87477109874f7af31ba893 100644 (file)
@@ -1,9 +1,72 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifdef DEFINE_DWARF_REGSTR_TABLE
-/* This is included in perf/util/dwarf-regs.c */
+#ifndef S390_DWARF_REGS_TABLE_H
+#define S390_DWARF_REGS_TABLE_H
 
-static const char * const s390_regstr_tbl[] = {
+#define REG_DWARFNUM_NAME(reg, idx)    [idx] = "%" #reg
+
+/*
+ * For reference, see DWARF register mapping:
+ * http://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_s390/x1542.html
+ */
+static const char * const s390_dwarf_regs[] = {
        "%r0", "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
        "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+       REG_DWARFNUM_NAME(f0, 16),
+       REG_DWARFNUM_NAME(f1, 20),
+       REG_DWARFNUM_NAME(f2, 17),
+       REG_DWARFNUM_NAME(f3, 21),
+       REG_DWARFNUM_NAME(f4, 18),
+       REG_DWARFNUM_NAME(f5, 22),
+       REG_DWARFNUM_NAME(f6, 19),
+       REG_DWARFNUM_NAME(f7, 23),
+       REG_DWARFNUM_NAME(f8, 24),
+       REG_DWARFNUM_NAME(f9, 28),
+       REG_DWARFNUM_NAME(f10, 25),
+       REG_DWARFNUM_NAME(f11, 29),
+       REG_DWARFNUM_NAME(f12, 26),
+       REG_DWARFNUM_NAME(f13, 30),
+       REG_DWARFNUM_NAME(f14, 27),
+       REG_DWARFNUM_NAME(f15, 31),
+       REG_DWARFNUM_NAME(c0, 32),
+       REG_DWARFNUM_NAME(c1, 33),
+       REG_DWARFNUM_NAME(c2, 34),
+       REG_DWARFNUM_NAME(c3, 35),
+       REG_DWARFNUM_NAME(c4, 36),
+       REG_DWARFNUM_NAME(c5, 37),
+       REG_DWARFNUM_NAME(c6, 38),
+       REG_DWARFNUM_NAME(c7, 39),
+       REG_DWARFNUM_NAME(c8, 40),
+       REG_DWARFNUM_NAME(c9, 41),
+       REG_DWARFNUM_NAME(c10, 42),
+       REG_DWARFNUM_NAME(c11, 43),
+       REG_DWARFNUM_NAME(c12, 44),
+       REG_DWARFNUM_NAME(c13, 45),
+       REG_DWARFNUM_NAME(c14, 46),
+       REG_DWARFNUM_NAME(c15, 47),
+       REG_DWARFNUM_NAME(a0, 48),
+       REG_DWARFNUM_NAME(a1, 49),
+       REG_DWARFNUM_NAME(a2, 50),
+       REG_DWARFNUM_NAME(a3, 51),
+       REG_DWARFNUM_NAME(a4, 52),
+       REG_DWARFNUM_NAME(a5, 53),
+       REG_DWARFNUM_NAME(a6, 54),
+       REG_DWARFNUM_NAME(a7, 55),
+       REG_DWARFNUM_NAME(a8, 56),
+       REG_DWARFNUM_NAME(a9, 57),
+       REG_DWARFNUM_NAME(a10, 58),
+       REG_DWARFNUM_NAME(a11, 59),
+       REG_DWARFNUM_NAME(a12, 60),
+       REG_DWARFNUM_NAME(a13, 61),
+       REG_DWARFNUM_NAME(a14, 62),
+       REG_DWARFNUM_NAME(a15, 63),
+       REG_DWARFNUM_NAME(pswm, 64),
+       REG_DWARFNUM_NAME(pswa, 65),
 };
-#endif
+
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+#define s390_regstr_tbl s390_dwarf_regs
+
+#endif /* DEFINE_DWARF_REGSTR_TABLE */
+#endif /* S390_DWARF_REGS_TABLE_H */
diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h
new file mode 100644 (file)
index 0000000..d2df54a
--- /dev/null
@@ -0,0 +1,95 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <../../../../arch/s390/include/uapi/asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_S390_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_S390_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_S390_PC
+#define PERF_REG_SP PERF_REG_S390_R15
+
+static inline const char *perf_reg_name(int id)
+{
+       switch (id) {
+       case PERF_REG_S390_R0:
+               return "R0";
+       case PERF_REG_S390_R1:
+               return "R1";
+       case PERF_REG_S390_R2:
+               return "R2";
+       case PERF_REG_S390_R3:
+               return "R3";
+       case PERF_REG_S390_R4:
+               return "R4";
+       case PERF_REG_S390_R5:
+               return "R5";
+       case PERF_REG_S390_R6:
+               return "R6";
+       case PERF_REG_S390_R7:
+               return "R7";
+       case PERF_REG_S390_R8:
+               return "R8";
+       case PERF_REG_S390_R9:
+               return "R9";
+       case PERF_REG_S390_R10:
+               return "R10";
+       case PERF_REG_S390_R11:
+               return "R11";
+       case PERF_REG_S390_R12:
+               return "R12";
+       case PERF_REG_S390_R13:
+               return "R13";
+       case PERF_REG_S390_R14:
+               return "R14";
+       case PERF_REG_S390_R15:
+               return "R15";
+       case PERF_REG_S390_FP0:
+               return "FP0";
+       case PERF_REG_S390_FP1:
+               return "FP1";
+       case PERF_REG_S390_FP2:
+               return "FP2";
+       case PERF_REG_S390_FP3:
+               return "FP3";
+       case PERF_REG_S390_FP4:
+               return "FP4";
+       case PERF_REG_S390_FP5:
+               return "FP5";
+       case PERF_REG_S390_FP6:
+               return "FP6";
+       case PERF_REG_S390_FP7:
+               return "FP7";
+       case PERF_REG_S390_FP8:
+               return "FP8";
+       case PERF_REG_S390_FP9:
+               return "FP9";
+       case PERF_REG_S390_FP10:
+               return "FP10";
+       case PERF_REG_S390_FP11:
+               return "FP11";
+       case PERF_REG_S390_FP12:
+               return "FP12";
+       case PERF_REG_S390_FP13:
+               return "FP13";
+       case PERF_REG_S390_FP14:
+               return "FP14";
+       case PERF_REG_S390_FP15:
+               return "FP15";
+       case PERF_REG_S390_MASK:
+               return "MASK";
+       case PERF_REG_S390_PC:
+               return "PC";
+       default:
+               return NULL;
+       }
+
+       return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
index 5bd7b9260cc0858c36730ee367aecc56df6c91bb..4a233683c6848115f8619253aaa1109a92522f54 100644 (file)
@@ -2,5 +2,8 @@ libperf-y += header.o
 libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 
 libperf-y += machine.o
+
+libperf-$(CONFIG_AUXTRACE) += auxtrace.o
diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c
new file mode 100644 (file)
index 0000000..6cb48e4
--- /dev/null
@@ -0,0 +1,118 @@
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "../../util/evlist.h"
+#include "../../util/auxtrace.h"
+#include "../../util/evsel.h"
+
+#define PERF_EVENT_CPUM_SF             0xB0000 /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG                0xBD000 /* Event: Combined-sampling */
+#define DEFAULT_AUX_PAGES              128
+#define DEFAULT_FREQ                   4000
+
+static void cpumsf_free(struct auxtrace_record *itr)
+{
+       free(itr);
+}
+
+static size_t cpumsf_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+                                   struct perf_evlist *evlist __maybe_unused)
+{
+       return 0;
+}
+
+static int
+cpumsf_info_fill(struct auxtrace_record *itr __maybe_unused,
+                struct perf_session *session __maybe_unused,
+                struct auxtrace_info_event *auxtrace_info __maybe_unused,
+                size_t priv_size __maybe_unused)
+{
+       return 0;
+}
+
+static unsigned long
+cpumsf_reference(struct auxtrace_record *itr __maybe_unused)
+{
+       return 0;
+}
+
+static int
+cpumsf_recording_options(struct auxtrace_record *ar __maybe_unused,
+                        struct perf_evlist *evlist __maybe_unused,
+                        struct record_opts *opts)
+{
+       unsigned int factor = 1;
+       unsigned int pages;
+
+       opts->full_auxtrace = true;
+
+       /*
+        * The AUX buffer size should be set properly to avoid
+        * overflow of samples if it is not set explicitly.
+        * DEFAULT_AUX_PAGES is an proper size when sampling frequency
+        * is DEFAULT_FREQ. It is expected to hold about 1/2 second
+        * of sampling data. The size used for AUX buffer will scale
+        * according to the specified frequency and DEFAULT_FREQ.
+        */
+       if (!opts->auxtrace_mmap_pages) {
+               if (opts->user_freq != UINT_MAX)
+                       factor = (opts->user_freq + DEFAULT_FREQ
+                                 - 1) / DEFAULT_FREQ;
+               pages = DEFAULT_AUX_PAGES * factor;
+               opts->auxtrace_mmap_pages = roundup_pow_of_two(pages);
+       }
+
+       return 0;
+}
+
+static int
+cpumsf_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+                             struct record_opts *opts __maybe_unused,
+                             const char *str __maybe_unused)
+{
+       return 0;
+}
+
+/*
+ * auxtrace_record__init is called when perf record
+ * check if the event really need auxtrace
+ */
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
+                                             int *err)
+{
+       struct auxtrace_record *aux;
+       struct perf_evsel *pos;
+       int diagnose = 0;
+
+       if (evlist->nr_entries == 0)
+               return NULL;
+
+       evlist__for_each_entry(evlist, pos) {
+               if (pos->attr.config == PERF_EVENT_CPUM_SF_DIAG) {
+                       diagnose = 1;
+                       break;
+               }
+       }
+
+       if (!diagnose)
+               return NULL;
+
+       /* sampling in diagnose mode. alloc aux buffer */
+       aux = zalloc(sizeof(*aux));
+       if (aux == NULL) {
+               *err = -ENOMEM;
+               return NULL;
+       }
+
+       aux->parse_snapshot_options = cpumsf_parse_snapshot_options;
+       aux->recording_options = cpumsf_recording_options;
+       aux->info_priv_size = cpumsf_info_priv_size;
+       aux->info_fill = cpumsf_info_fill;
+       aux->free = cpumsf_free;
+       aux->reference = cpumsf_reference;
+
+       return aux;
+}
index 0dff5b2ed1e5093d3d8df16210061415630a1f95..f47576ce13ea9da3d6220894c8c1d191a5fe889b 100644 (file)
@@ -9,15 +9,10 @@
 
 #include <stddef.h>
 #include <dwarf-regs.h>
-
-#define NUM_GPRS 16
-
-static const char *gpr_names[NUM_GPRS] = {
-       "%r0", "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
-       "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
-};
+#include <linux/kernel.h>
+#include "dwarf-regs-table.h"
 
 const char *get_arch_regstr(unsigned int n)
 {
-       return (n >= NUM_GPRS) ? NULL : gpr_names[n];
+       return (n >= ARRAY_SIZE(s390_dwarf_regs)) ? NULL : s390_dwarf_regs[n];
 }
diff --git a/tools/perf/arch/s390/util/unwind-libdw.c b/tools/perf/arch/s390/util/unwind-libdw.c
new file mode 100644 (file)
index 0000000..387c698
--- /dev/null
@@ -0,0 +1,63 @@
+#include <linux/kernel.h>
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+#include "dwarf-regs-table.h"
+
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+       struct unwind_info *ui = arg;
+       struct regs_dump *user_regs = &ui->sample->user_regs;
+       Dwarf_Word dwarf_regs[ARRAY_SIZE(s390_dwarf_regs)];
+
+#define REG(r) ({                                              \
+       Dwarf_Word val = 0;                                     \
+       perf_reg_value(&val, user_regs, PERF_REG_S390_##r);     \
+       val;                                                    \
+})
+       /*
+        * For DWARF register mapping details,
+        * see also perf/arch/s390/include/dwarf-regs-table.h
+        */
+       dwarf_regs[0]  = REG(R0);
+       dwarf_regs[1]  = REG(R1);
+       dwarf_regs[2]  = REG(R2);
+       dwarf_regs[3]  = REG(R3);
+       dwarf_regs[4]  = REG(R4);
+       dwarf_regs[5]  = REG(R5);
+       dwarf_regs[6]  = REG(R6);
+       dwarf_regs[7]  = REG(R7);
+       dwarf_regs[8]  = REG(R8);
+       dwarf_regs[9]  = REG(R9);
+       dwarf_regs[10] = REG(R10);
+       dwarf_regs[11] = REG(R11);
+       dwarf_regs[12] = REG(R12);
+       dwarf_regs[13] = REG(R13);
+       dwarf_regs[14] = REG(R14);
+       dwarf_regs[15] = REG(R15);
+
+       dwarf_regs[16] = REG(FP0);
+       dwarf_regs[17] = REG(FP2);
+       dwarf_regs[18] = REG(FP4);
+       dwarf_regs[19] = REG(FP6);
+       dwarf_regs[20] = REG(FP1);
+       dwarf_regs[21] = REG(FP3);
+       dwarf_regs[22] = REG(FP5);
+       dwarf_regs[23] = REG(FP7);
+       dwarf_regs[24] = REG(FP8);
+       dwarf_regs[25] = REG(FP10);
+       dwarf_regs[26] = REG(FP12);
+       dwarf_regs[27] = REG(FP14);
+       dwarf_regs[28] = REG(FP9);
+       dwarf_regs[29] = REG(FP11);
+       dwarf_regs[30] = REG(FP13);
+       dwarf_regs[31] = REG(FP15);
+
+       dwarf_regs[64] = REG(MASK);
+       dwarf_regs[65] = REG(PC);
+
+       dwfl_thread_state_register_pc(thread, dwarf_regs[65]);
+       return dwfl_thread_state_registers(thread, 0, 32, dwarf_regs);
+}