Merge branch 'sh/stable-updates'

author Paul Mundt <lethal@linux-sh.org>

Sat, 15 Aug 2009 03:59:42 +0000 (12:59 +0900)

committer Paul Mundt <lethal@linux-sh.org>

Sat, 15 Aug 2009 03:59:42 +0000 (12:59 +0900)
author Paul Mundt <lethal@linux-sh.org>
Sat, 15 Aug 2009 03:59:42 +0000 (12:59 +0900)
committer Paul Mundt <lethal@linux-sh.org>
Sat, 15 Aug 2009 03:59:42 +0000 (12:59 +0900)
diff --git a/Makefile b/Makefile

index 0d46615bffe5c95704a83cfb7cdf82d2fa792c48..abcfa85f8f825392890bc26da007e5af3fd94d6c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
  VERSION = 2
  PATCHLEVEL = 6
  SUBLEVEL = 31
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
  NAME = Man-Eating Seals of Antiquity
  
  # *DOCUMENTATION*
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c

index 36a4ca3a90008eb33c99dde97608424506f3e2a8..9162081504ef43ccf61e61d2765a08710b8fbd8f 100644 (file)
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -246,7 +246,7 @@ static struct platform_device ceu1_device = {
         },
  };
  
-/* KEYSC */
+/* KEYSC in SoC (Needs SW33-2 set to ON) */
  static struct sh_keysc_info keysc_info = {
         .mode = SH_KEYSC_MODE_1,
         .scan_timing = 10,
@@ -263,12 +263,13 @@ static struct sh_keysc_info keysc_info = {
  
  static struct resource keysc_resources[] = {
         [0] = {
-               .start  = 0x1a204000,
-               .end    = 0x1a20400f,
+               .name   = "KEYSC",
+               .start  = 0x044b0000,
+               .end    = 0x044b000f,
                 .flags  = IORESOURCE_MEM,
         },
         [1] = {
-               .start  = IRQ0_KEY,
+               .start  = 79,
                 .flags  = IORESOURCE_IRQ,
         },
  };
diff --git a/arch/sh/kernel/cpu/shmobile/sleep.S b/arch/sh/kernel/cpu/shmobile/sleep.S

index 5d888ef53d82b02e5eec2ebe1b29d63f2f7afa46..baf2d7d46b05ba35fe936d2788128d0c1ebe9df5 100644 (file)
--- a/arch/sh/kernel/cpu/shmobile/sleep.S
+++ b/arch/sh/kernel/cpu/shmobile/sleep.S
@@ -26,8 +26,30 @@ ENTRY(sh_mobile_standby)
  
         tst     #SUSP_SH_SF, r0
         bt      skip_set_sf
+#ifdef CONFIG_CPU_SUBTYPE_SH7724
+       /* DBSC: put memory in self-refresh mode */
  
-       /* SDRAM: disable power down and put in self-refresh mode */
+       mov.l   dben_reg, r4
+       mov.l   dben_data0, r1
+       mov.l   r1, @r4
+
+       mov.l   dbrfpdn0_reg, r4
+       mov.l   dbrfpdn0_data0, r1
+       mov.l   r1, @r4
+
+       mov.l   dbcmdcnt_reg, r4
+       mov.l   dbcmdcnt_data0, r1
+       mov.l   r1, @r4
+
+       mov.l   dbcmdcnt_reg, r4
+       mov.l   dbcmdcnt_data1, r1
+       mov.l   r1, @r4
+
+       mov.l   dbrfpdn0_reg, r4
+       mov.l   dbrfpdn0_data1, r1
+       mov.l   r1, @r4
+#else
+       /* SBSC: disable power down and put in self-refresh mode */
         mov.l   1f, r4
         mov.l   2f, r1
         mov.l   @r4, r2
@@ -35,6 +57,7 @@ ENTRY(sh_mobile_standby)
         mov.l   3f, r3
         and     r3, r2
         mov.l   r2, @r4
+#endif
  
  skip_set_sf:
         tst     #SUSP_SH_SLEEP, r0
@@ -84,7 +107,36 @@ done_sleep:
         tst     #SUSP_SH_SF, r0
         bt      skip_restore_sf
  
-       /* SDRAM: set auto-refresh mode */
+#ifdef CONFIG_CPU_SUBTYPE_SH7724
+       /* DBSC: put memory in auto-refresh mode */
+
+       mov.l   dbrfpdn0_reg, r4
+       mov.l   dbrfpdn0_data0, r1
+       mov.l   r1, @r4
+
+       /* sleep 140 ns */
+       nop
+       nop
+       nop
+       nop
+
+       mov.l   dbcmdcnt_reg, r4
+       mov.l   dbcmdcnt_data0, r1
+       mov.l   r1, @r4
+
+       mov.l   dbcmdcnt_reg, r4
+       mov.l   dbcmdcnt_data1, r1
+       mov.l   r1, @r4
+
+       mov.l   dben_reg, r4
+       mov.l   dben_data1, r1
+       mov.l   r1, @r4
+
+       mov.l   dbrfpdn0_reg, r4
+       mov.l   dbrfpdn0_data2, r1
+       mov.l   r1, @r4
+#else
+       /* SBSC: set auto-refresh mode */
         mov.l   1f, r4
         mov.l   @r4, r2
         mov.l   4f, r3
@@ -98,15 +150,29 @@ done_sleep:
         add     r4, r3
         or      r2, r3
         mov.l   r3, @r1
+#endif
  skip_restore_sf:
         rts
          nop
  
         .balign 4
+#ifdef CONFIG_CPU_SUBTYPE_SH7724
+dben_reg:      .long   0xfd000010 /* DBEN */
+dben_data0:    .long   0
+dben_data1:    .long   1
+dbrfpdn0_reg:  .long   0xfd000040 /* DBRFPDN0 */
+dbrfpdn0_data0:        .long   0
+dbrfpdn0_data1:        .long   1
+dbrfpdn0_data2:        .long   0x00010000
+dbcmdcnt_reg:  .long   0xfd000014 /* DBCMDCNT */
+dbcmdcnt_data0:        .long   2
+dbcmdcnt_data1:        .long   4
+#else
  1:     .long   0xfe400008 /* SDCR0 */
  2:     .long   0x00000400
  3:     .long   0xffff7fff
  4:     .long   0xfffffbff
+#endif
  5:     .long   0xa4150020 /* STBCR */
  6:     .long   0xfe40001c /* RTCOR */
  7:     .long   0xfe400018 /* RTCNT */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 738bdc6b0f8b8dcd938eddefa79beb4ed77e16f2..13ffa5df37d75120e6a3965102b24a05d3ee0fb7 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
         select HAVE_UNSTABLE_SCHED_CLOCK
         select HAVE_IDE
         select HAVE_OPROFILE
+       select HAVE_PERF_COUNTERS if (!M386 && !M486)
         select HAVE_IOREMAP_PROT
         select HAVE_KPROBES
         select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC
  config X86_LOCAL_APIC
         def_bool y
         depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
-       select HAVE_PERF_COUNTERS if (!M386 && !M486)
  
  config X86_IO_APIC
         def_bool y
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c

index e2485b03f1cf2649341c9c8d9412495f894eeaa4..63fddcd082cdf2a3b4cc261d54608ec2dac07b64 100644 (file)
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
                 level = cpuid_eax(1);
                 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
                         set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+
+               /*
+                * Some BIOSes incorrectly force this feature, but only K8
+                * revision D (model = 0x14) and later actually support it.
+                */
+               if (c->x86_model < 0x14)
+                       clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
         }
         if (c->x86 == 0x10 || c->x86 == 0x11)
                 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index f1961c07af9a684b0fd15ad1636ef21dbf1d31ac..5ce60a88027b16a46edf3b1afb3430d550cd5012 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
         alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
  }
  
-static const struct cpu_dev *this_cpu __cpuinitdata;
+static void __cpuinit default_init(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
+       display_cacheinfo(c);
+#else
+       /* Not much we can do here... */
+       /* Check if at least it has cpuid */
+       if (c->cpuid_level == -1) {
+               /* No cpuid. It must be an ancient CPU */
+               if (c->x86 == 4)
+                       strcpy(c->x86_model_id, "486");
+               else if (c->x86 == 3)
+                       strcpy(c->x86_model_id, "386");
+       }
+#endif
+}
+
+static const struct cpu_dev __cpuinitconst default_cpu = {
+       .c_init         = default_init,
+       .c_vendor       = "Unknown",
+       .c_x86_vendor   = X86_VENDOR_UNKNOWN,
+};
+
+static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
  
  DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
  #ifdef CONFIG_X86_64
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
  
  static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
  
-static void __cpuinit default_init(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_X86_64
-       display_cacheinfo(c);
-#else
-       /* Not much we can do here... */
-       /* Check if at least it has cpuid */
-       if (c->cpuid_level == -1) {
-               /* No cpuid. It must be an ancient CPU */
-               if (c->x86 == 4)
-                       strcpy(c->x86_model_id, "486");
-               else if (c->x86 == 3)
-                       strcpy(c->x86_model_id, "386");
-       }
-#endif
-}
-
-static const struct cpu_dev __cpuinitconst default_cpu = {
-       .c_init = default_init,
-       .c_vendor = "Unknown",
-       .c_x86_vendor = X86_VENDOR_UNKNOWN,
-};
-
  static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
  {
         unsigned int *v;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c

index bff8dd191dd5c9ae5f3235ad22a6239bf8e8de74..8bc64cfbe9368fdc204b4ae32874f175dc5696cb 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -36,6 +36,7 @@
  
  static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
  static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
+static DEFINE_PER_CPU(bool, thermal_throttle_active);
  
  static atomic_t therm_throt_en         = ATOMIC_INIT(0);
  
@@ -96,24 +97,27 @@ static int therm_throt_process(int curr)
  {
         unsigned int cpu = smp_processor_id();
         __u64 tmp_jiffs = get_jiffies_64();
+       bool was_throttled = __get_cpu_var(thermal_throttle_active);
+       bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
  
-       if (curr)
+       if (is_throttled)
                 __get_cpu_var(thermal_throttle_count)++;
  
-       if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
+       if (!(was_throttled ^ is_throttled) &&
+           time_before64(tmp_jiffs, __get_cpu_var(next_check)))
                 return 0;
  
         __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
  
         /* if we just entered the thermal event */
-       if (curr) {
+       if (is_throttled) {
                 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
-                      "cpu clock throttled (total events = %lu)\n", cpu,
-                      __get_cpu_var(thermal_throttle_count));
+                      "cpu clock throttled (total events = %lu)\n",
+                      cpu, __get_cpu_var(thermal_throttle_count));
  
                 add_taint(TAINT_MACHINE_CHECK);
-       } else {
-               printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
+       } else if (was_throttled) {
+               printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
         }
  
         return 1;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c

index a7aa8f900954811e046f397547a19e88291aaeb3..900332b800f870d9f3f9e03bf2c5fa12505bc914 100644 (file)
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -55,6 +55,7 @@ struct x86_pmu {
         int             num_counters_fixed;
         int             counter_bits;
         u64             counter_mask;
+       int             apic;
         u64             max_period;
         u64             intel_ctrl;
  };
@@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] =
  {
    [PERF_COUNT_HW_CPU_CYCLES]           = 0x0079,
    [PERF_COUNT_HW_INSTRUCTIONS]         = 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]     = 0x0000,
-  [PERF_COUNT_HW_CACHE_MISSES]         = 0x0000,
+  [PERF_COUNT_HW_CACHE_REFERENCES]     = 0x0f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]         = 0x012e,
    [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]  = 0x00c4,
    [PERF_COUNT_HW_BRANCH_MISSES]                = 0x00c5,
    [PERF_COUNT_HW_BUS_CYCLES]           = 0x0062,
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
  
  static bool reserve_pmc_hardware(void)
  {
+#ifdef CONFIG_X86_LOCAL_APIC
         int i;
  
         if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
                 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
                         goto eventsel_fail;
         }
+#endif
  
         return true;
  
+#ifdef CONFIG_X86_LOCAL_APIC
  eventsel_fail:
         for (i--; i >= 0; i--)
                 release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -644,10 +648,12 @@ perfctr_fail:
                 enable_lapic_nmi_watchdog();
  
         return false;
+#endif
  }
  
  static void release_pmc_hardware(void)
  {
+#ifdef CONFIG_X86_LOCAL_APIC
         int i;
  
         for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void)
  
         if (nmi_watchdog == NMI_LOCAL_APIC)
                 enable_lapic_nmi_watchdog();
+#endif
  }
  
  static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
                 hwc->sample_period = x86_pmu.max_period;
                 hwc->last_period = hwc->sample_period;
                 atomic64_set(&hwc->period_left, hwc->sample_period);
+       } else {
+               /*
+                * If we have a PMU initialized but no APIC
+                * interrupts, we cannot sample hardware
+                * counters (user-space has to fall back and
+                * sample via a hrtimer based software counter):
+                */
+               if (!x86_pmu.apic)
+                       return -EOPNOTSUPP;
         }
  
         counter->destroy = hw_perf_counter_destroy;
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
  
  void set_perf_counter_pending(void)
  {
+#ifdef CONFIG_X86_LOCAL_APIC
         apic->send_IPI_self(LOCAL_PENDING_VECTOR);
+#endif
  }
  
  void perf_counters_lapic_init(void)
  {
-       if (!x86_pmu_initialized())
+#ifdef CONFIG_X86_LOCAL_APIC
+       if (!x86_pmu.apic || !x86_pmu_initialized())
                 return;
  
         /*
          * Always use NMI for PMU
          */
         apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
  }
  
  static int __kprobes
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
  
         regs = args->regs;
  
+#ifdef CONFIG_X86_LOCAL_APIC
         apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
         /*
          * Can't rely on the handled return value to say it was our NMI, two
          * counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
         .event_map              = p6_pmu_event_map,
         .raw_event              = p6_pmu_raw_event,
         .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
+       .apic                   = 1,
         .max_period             = (1ULL << 31) - 1,
         .version                = 0,
         .num_counters           = 2,
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
         .event_map              = intel_pmu_event_map,
         .raw_event              = intel_pmu_raw_event,
         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
+       .apic                   = 1,
         /*
          * Intel PMCs cannot be accessed sanely above 32 bit width,
          * so we install an artificial 1<<31 period regardless of
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
         .num_counters           = 4,
         .counter_bits           = 48,
         .counter_mask           = (1ULL << 48) - 1,
+       .apic                   = 1,
         /* use highest bit to detect overflow */
         .max_period             = (1ULL << 47) - 1,
  };
@@ -1589,13 +1614,14 @@ static int p6_pmu_init(void)
                 return -ENODEV;
         }
  
+       x86_pmu = p6_pmu;
+
         if (!cpu_has_apic) {
-               pr_info("no Local APIC, try rebooting with lapic");
-               return -ENODEV;
+               pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+               pr_info("no hardware sampling interrupt available.\n");
+               x86_pmu.apic = 0;
         }
  
-       x86_pmu                         = p6_pmu;
-
         return 0;
  }
  
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c

index 9eb897603705724ce9daaedaf62ba778aff193dc..a06e8d1018449dd70e4aac4b1098b6b06c4b9bd1 100644 (file)
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -418,20 +418,20 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
  }
  
  static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
-       {       /* Handle problems with rebooting on Apple MacBook5,2 */
+       {       /* Handle problems with rebooting on Apple MacBook5 */
                 .callback = set_pci_reboot,
-               .ident = "Apple MacBook",
+               .ident = "Apple MacBook5",
                 .matches = {
                         DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
                 },
         },
-       {       /* Handle problems with rebooting on Apple MacBookPro5,1 */
+       {       /* Handle problems with rebooting on Apple MacBookPro5 */
                 .callback = set_pci_reboot,
-               .ident = "Apple MacBookPro5,1",
+               .ident = "Apple MacBookPro5",
                 .matches = {
                         DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5,1"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
                 },
         },
         { }
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c

index 2964f5f4a7ef3348104044f008992d22b08b55b1..6b3e0c2f33e2b193838fbecc0dda72aa9f0db5ef 100644 (file)
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -40,6 +40,7 @@ struct sh_cmt_priv {
         struct platform_device *pdev;
  
         unsigned long flags;
+       unsigned long flags_suspend;
         unsigned long match_value;
         unsigned long next_match_value;
         unsigned long max_match_value;
@@ -667,11 +668,38 @@ static int __devexit sh_cmt_remove(struct platform_device *pdev)
         return -EBUSY; /* cannot unregister clockevent and clocksource */
  }
  
+static int sh_cmt_suspend(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct sh_cmt_priv *p = platform_get_drvdata(pdev);
+
+       /* save flag state and stop CMT channel */
+       p->flags_suspend = p->flags;
+       sh_cmt_stop(p, p->flags);
+       return 0;
+}
+
+static int sh_cmt_resume(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct sh_cmt_priv *p = platform_get_drvdata(pdev);
+
+       /* start CMT channel from saved state */
+       sh_cmt_start(p, p->flags_suspend);
+       return 0;
+}
+
+static struct dev_pm_ops sh_cmt_dev_pm_ops = {
+       .suspend = sh_cmt_suspend,
+       .resume = sh_cmt_resume,
+};
+
  static struct platform_driver sh_cmt_device_driver = {
         .probe          = sh_cmt_probe,
         .remove         = __devexit_p(sh_cmt_remove),
         .driver         = {
                 .name   = "sh_cmt",
+               .pm     = &sh_cmt_dev_pm_ops,
         }
  };
  
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 5b98bea4ff9b36ccd132398dd9f8cc5e5568b8f4..103f2d33fa8978bc5a01724846cf6628b260d0be 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
         else
                 new->md_minor = MINOR(unit) >> MdpMinorShift;
  
+       mutex_init(&new->open_mutex);
         mutex_init(&new->reconfig_mutex);
         INIT_LIST_HEAD(&new->disks);
         INIT_LIST_HEAD(&new->all_mddevs);
@@ -1974,17 +1975,14 @@ repeat:
                 /* otherwise we have to go forward and ... */
                 mddev->events ++;
                 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
-                       /* .. if the array isn't clean, insist on an odd 'events' */
-                       if ((mddev->events&1)==0) {
-                               mddev->events++;
+                       /* .. if the array isn't clean, an 'even' event must also go
+                        * to spares. */
+                       if ((mddev->events&1)==0)
                                 nospares = 0;
-                       }
                 } else {
-                       /* otherwise insist on an even 'events' (for clean states) */
-                       if ((mddev->events&1)) {
-                               mddev->events++;
+                       /* otherwise an 'odd' event must go to spares */
+                       if ((mddev->events&1))
                                 nospares = 0;
-                       }
                 }
         }
  
@@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
                 if (max < mddev->resync_min)
                         return -EINVAL;
                 if (max < mddev->resync_max &&
+                   mddev->ro == 0 &&
                     test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                         return -EBUSY;
  
@@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
         struct gendisk *disk = mddev->gendisk;
         mdk_rdev_t *rdev;
  
+       mutex_lock(&mddev->open_mutex);
         if (atomic_read(&mddev->openers) > is_open) {
                 printk("md: %s still in use.\n",mdname(mddev));
-               return -EBUSY;
-       }
-
-       if (mddev->pers) {
+               err = -EBUSY;
+       } else if (mddev->pers) {
  
                 if (mddev->sync_thread) {
                         set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
                         set_disk_ro(disk, 1);
                 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
         }
-
+out:
+       mutex_unlock(&mddev->open_mutex);
+       if (err)
+               return err;
         /*
          * Free resources if final stop
          */
@@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
         blk_integrity_unregister(disk);
         md_new_event(mddev);
         sysfs_notify_dirent(mddev->sysfs_state);
-out:
         return err;
  }
  
@@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
         }
         BUG_ON(mddev != bdev->bd_disk->private_data);
  
-       if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
+       if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
                 goto out;
  
         err = 0;
         atomic_inc(&mddev->openers);
-       mddev_unlock(mddev);
+       mutex_unlock(&mddev->open_mutex);
  
         check_disk_change(bdev);
   out:
diff --git a/drivers/md/md.h b/drivers/md/md.h

index 78f03168baf93a8949ca534aa2c13ceb34edf69b..f8fc188bc762d4b6fe1ffb5dadc9ac43a36d6dff 100644 (file)
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
                                                             * so we don't loop trying */
  
         int                             in_sync;        /* know to not need resync */
+       /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
+        * that we are never stopping an array while it is open.
+        * 'reconfig_mutex' protects all other reconfiguration.
+        * These locks are separate due to conflicting interactions
+        * with bdev->bd_mutex.
+        * Lock ordering is:
+        *  reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
+        *  bd_mutex -> open_mutex:  e.g. __blkdev_get -> md_open
+        */
+       struct mutex                    open_mutex;
         struct mutex                    reconfig_mutex;
         atomic_t                        active;         /* general refcount */
         atomic_t                        openers;        /* number of active opens */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 2b521ee67dfa0c86923ff2c0e1ba2790977cc63a..b8a2c5dc67ba7753d575e27ce0036f440451664d 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                     conf->reshape_progress < raid5_size(mddev, 0, 0)) {
                         sector_nr = raid5_size(mddev, 0, 0)
                                 - conf->reshape_progress;
-               } else if (mddev->delta_disks > 0 &&
+               } else if (mddev->delta_disks >= 0 &&
                            conf->reshape_progress > 0)
                         sector_nr = conf->reshape_progress;
                 sector_div(sector_nr, new_data_disks);
@@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev)
                            (old_disks-max_degraded));
                 /* here_old is the first stripe that we might need to read
                  * from */
-               if (here_new >= here_old) {
+               if (mddev->delta_disks == 0) {
+                       /* We cannot be sure it is safe to start an in-place
+                        * reshape.  It is only safe if user-space if monitoring
+                        * and taking constant backups.
+                        * mdadm always starts a situation like this in
+                        * readonly mode so it can take control before
+                        * allowing any writes.  So just check for that.
+                        */
+                       if ((here_new * mddev->new_chunk_sectors != 
+                            here_old * mddev->chunk_sectors) ||
+                           mddev->ro == 0) {
+                               printk(KERN_ERR "raid5: in-place reshape must be started"
+                                      " in read-only mode - aborting\n");
+                               return -EINVAL;
+                       }
+               } else if (mddev->delta_disks < 0
+                   ? (here_new * mddev->new_chunk_sectors <=
+                      here_old * mddev->chunk_sectors)
+                   : (here_new * mddev->new_chunk_sectors >=
+                      here_old * mddev->chunk_sectors)) {
                         /* Reading from the same stripe as writing to - bad */
                         printk(KERN_ERR "raid5: reshape_position too early for "
                                "auto-recovery - aborting.\n");
@@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
                                         mddev->degraded--;
                         for (d = conf->raid_disks ;
                              d < conf->raid_disks - mddev->delta_disks;
-                            d++)
-                               raid5_remove_disk(mddev, d);
+                            d++) {
+                               mdk_rdev_t *rdev = conf->disks[d].rdev;
+                               if (rdev && raid5_remove_disk(mddev, d) == 0) {
+                                       char nm[20];
+                                       sprintf(nm, "rd%d", rdev->raid_disk);
+                                       sysfs_remove_link(&mddev->kobj, nm);
+                                       rdev->raid_disk = -1;
+                               }
+                       }
                 }
                 mddev->layout = conf->algorithm;
                 mddev->chunk_sectors = conf->chunk_sectors;
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c

index cff406de3d15285b330e06248b7227c034fc6e27..fc3f9662ceae8d778f2790307050b04db8f5226d 100644 (file)
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -477,6 +477,9 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
         /* tell the board code to enable the panel */
         for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
                 ch = &priv->ch[k];
+               if (!ch->enabled)
+                       continue;
+
                 board_cfg = &ch->cfg.board_cfg;
                 if (board_cfg->display_on)
                         board_cfg->display_on(board_cfg->board_data);
@@ -494,6 +497,8 @@ static void sh_mobile_lcdc_stop(struct sh_mobile_lcdc_priv *priv)
         /* clean up deferred io and ask board code to disable panel */
         for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
                 ch = &priv->ch[k];
+               if (!ch->enabled)
+                       continue;
  
                 /* deferred io mode:
                  * flush frame, and wait for frame end interrupt
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c

index 9edcde4974aa22a37841d8b22aac86c7e435cea6..f9a3e8942669f436ad64fa00b550a46f34cd44be 100644 (file)
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
          * immediately to their right.
          */
         left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
-       if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) {
+       if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
+               BUG_ON(right_child_el->l_tree_depth);
                 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
                 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
         }
@@ -2476,15 +2477,37 @@ out_ret_path:
         return ret;
  }
  
-static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
-                                     struct ocfs2_path *path)
+static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
+                                    int subtree_index, struct ocfs2_path *path)
  {
-       int i, idx;
+       int i, idx, ret;
         struct ocfs2_extent_rec *rec;
         struct ocfs2_extent_list *el;
         struct ocfs2_extent_block *eb;
         u32 range;
  
+       /*
+        * In normal tree rotation process, we will never touch the
+        * tree branch above subtree_index and ocfs2_extend_rotate_transaction
+        * doesn't reserve the credits for them either.
+        *
+        * But we do have a special case here which will update the rightmost
+        * records for all the bh in the path.
+        * So we have to allocate extra credits and access them.
+        */
+       ret = ocfs2_extend_trans(handle,
+                                handle->h_buffer_credits + subtree_index);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       ret = ocfs2_journal_access_path(inode, handle, path);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+
         /* Path should always be rightmost. */
         eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
         BUG_ON(eb->h_next_leaf_blk != 0ULL);
@@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
  
                 ocfs2_journal_dirty(handle, path->p_node[i].bh);
         }
+out:
+       return ret;
  }
  
  static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
@@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
         if (del_right_subtree) {
                 ocfs2_unlink_subtree(inode, handle, left_path, right_path,
                                      subtree_index, dealloc);
-               ocfs2_update_edge_lengths(inode, handle, left_path);
+               ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
+                                               left_path);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
  
                 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
                 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
  
                 ocfs2_unlink_subtree(inode, handle, left_path, path,
                                      subtree_index, dealloc);
-               ocfs2_update_edge_lengths(inode, handle, left_path);
+               ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
+                                               left_path);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
  
                 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
                 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c

index b2c52b3a1484f1c57c4b098bf9706fe21c186ac0..b401654011a2b64c4e55f87c7916e3badc07f68e 100644 (file)
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
                              (unsigned long long)OCFS2_I(inode)->ip_blkno);
                         mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);
                         dump_stack();
+                       goto bail;
                 }
  
                 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
@@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc {
          */
         unsigned        c_new;
         unsigned        c_unwritten;
+       unsigned        c_needs_zero;
  };
  
-static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
-{
-       return d->c_new || d->c_unwritten;
-}
-
  struct ocfs2_write_ctxt {
         /* Logical cluster position / len of write */
         u32                             w_cpos;
         u32                             w_clen;
  
+       /* First cluster allocated in a nonsparse extend */
+       u32                             w_first_new_cpos;
+
         struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
  
         /*
@@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
                 return -ENOMEM;
  
         wc->w_cpos = pos >> osb->s_clustersize_bits;
+       wc->w_first_new_cpos = UINT_MAX;
         cend = (pos + len - 1) >> osb->s_clustersize_bits;
         wc->w_clen = cend - wc->w_cpos + 1;
         get_bh(di_bh);
@@ -1217,20 +1218,18 @@ out:
   */
  static int ocfs2_write_cluster(struct address_space *mapping,
                                u32 phys, unsigned int unwritten,
+                              unsigned int should_zero,
                                struct ocfs2_alloc_context *data_ac,
                                struct ocfs2_alloc_context *meta_ac,
                                struct ocfs2_write_ctxt *wc, u32 cpos,
                                loff_t user_pos, unsigned user_len)
  {
-       int ret, i, new, should_zero = 0;
+       int ret, i, new;
         u64 v_blkno, p_blkno;
         struct inode *inode = mapping->host;
         struct ocfs2_extent_tree et;
  
         new = phys == 0 ? 1 : 0;
-       if (new || unwritten)
-               should_zero = 1;
-
         if (new) {
                 u32 tmp_pos;
  
@@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
                 if (tmpret) {
                         mlog_errno(tmpret);
                         if (ret == 0)
-                               tmpret = ret;
+                               ret = tmpret;
                 }
         }
  
@@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
                         local_len = osb->s_clustersize - cluster_off;
  
                 ret = ocfs2_write_cluster(mapping, desc->c_phys,
-                                         desc->c_unwritten, data_ac, meta_ac,
+                                         desc->c_unwritten,
+                                         desc->c_needs_zero,
+                                         data_ac, meta_ac,
                                           wc, desc->c_cpos, pos, local_len);
                 if (ret) {
                         mlog_errno(ret);
@@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
                  * newly allocated cluster.
                  */
                 desc = &wc->w_desc[0];
-               if (ocfs2_should_zero_cluster(desc))
+               if (desc->c_needs_zero)
                         ocfs2_figure_cluster_boundaries(osb,
                                                         desc->c_cpos,
                                                         &wc->w_target_from,
                                                         NULL);
  
                 desc = &wc->w_desc[wc->w_clen - 1];
-               if (ocfs2_should_zero_cluster(desc))
+               if (desc->c_needs_zero)
                         ocfs2_figure_cluster_boundaries(osb,
                                                         desc->c_cpos,
                                                         NULL,
@@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
                         phys++;
                 }
  
+               /*
+                * If w_first_new_cpos is < UINT_MAX, we have a non-sparse
+                * file that got extended.  w_first_new_cpos tells us
+                * where the newly allocated clusters are so we can
+                * zero them.
+                */
+               if (desc->c_cpos >= wc->w_first_new_cpos) {
+                       BUG_ON(phys == 0);
+                       desc->c_needs_zero = 1;
+               }
+
                 desc->c_phys = phys;
                 if (phys == 0) {
                         desc->c_new = 1;
+                       desc->c_needs_zero = 1;
                         *clusters_to_alloc = *clusters_to_alloc + 1;
                 }
-               if (ext_flags & OCFS2_EXT_UNWRITTEN)
+
+               if (ext_flags & OCFS2_EXT_UNWRITTEN) {
                         desc->c_unwritten = 1;
+                       desc->c_needs_zero = 1;
+               }
  
                 num_clusters--;
         }
@@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
         if (newsize <= i_size_read(inode))
                 return 0;
  
-       ret = ocfs2_extend_no_holes(inode, newsize, newsize - len);
+       ret = ocfs2_extend_no_holes(inode, newsize, pos);
         if (ret)
                 mlog_errno(ret);
  
+       wc->w_first_new_cpos =
+               ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
+
         return ret;
  }
  
@@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
                              struct page **pagep, void **fsdata,
                              struct buffer_head *di_bh, struct page *mmap_page)
  {
-       int ret, credits = OCFS2_INODE_UPDATE_CREDITS;
+       int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
         unsigned int clusters_to_alloc, extents_to_split;
         struct ocfs2_write_ctxt *wc;
         struct inode *inode = mapping->host;
@@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
  
         }
  
-       ocfs2_set_target_boundaries(osb, wc, pos, len,
-                                   clusters_to_alloc + extents_to_split);
+       /*
+        * We have to zero sparse allocated clusters, unwritten extent clusters,
+        * and non-sparse clusters we just extended.  For non-sparse writes,
+        * we know zeros will only be needed in the first and/or last cluster.
+        */
+       if (clusters_to_alloc || extents_to_split ||
+           wc->w_desc[0].c_needs_zero ||
+           wc->w_desc[wc->w_clen - 1].c_needs_zero)
+               cluster_of_pages = 1;
+       else
+               cluster_of_pages = 0;
+
+       ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
  
         handle = ocfs2_start_trans(osb, credits);
         if (IS_ERR(handle)) {
@@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
          * extent.
          */
         ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
-                                        clusters_to_alloc + extents_to_split,
-                                        mmap_page);
+                                        cluster_of_pages, mmap_page);
         if (ret) {
                 mlog_errno(ret);
                 goto out_quota;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c

index b574431a031d5cd9721de53c46a80825f752bb9d..2f28b7de2c8d2cae30d052e54fc12fa7e91c740f 100644 (file)
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -310,22 +310,19 @@ out_attach:
         return ret;
  }
  
-static DEFINE_SPINLOCK(dentry_list_lock);
+DEFINE_SPINLOCK(dentry_list_lock);
  
  /* We limit the number of dentry locks to drop in one go. We have
   * this limit so that we don't starve other users of ocfs2_wq. */
  #define DL_INODE_DROP_COUNT 64
  
  /* Drop inode references from dentry locks */
-void ocfs2_drop_dl_inodes(struct work_struct *work)
+static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
  {
-       struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
-                                              dentry_lock_work);
         struct ocfs2_dentry_lock *dl;
-       int drop_count = DL_INODE_DROP_COUNT;
  
         spin_lock(&dentry_list_lock);
-       while (osb->dentry_lock_list && drop_count--) {
+       while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
                 dl = osb->dentry_lock_list;
                 osb->dentry_lock_list = dl->dl_next;
                 spin_unlock(&dentry_list_lock);
@@ -333,11 +330,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work)
                 kfree(dl);
                 spin_lock(&dentry_list_lock);
         }
-       if (osb->dentry_lock_list)
+       spin_unlock(&dentry_list_lock);
+}
+
+void ocfs2_drop_dl_inodes(struct work_struct *work)
+{
+       struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
+                                              dentry_lock_work);
+
+       __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
+       /*
+        * Don't queue dropping if umount is in progress. We flush the
+        * list in ocfs2_dismount_volume
+        */
+       spin_lock(&dentry_list_lock);
+       if (osb->dentry_lock_list &&
+           !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
                 queue_work(ocfs2_wq, &osb->dentry_lock_work);
         spin_unlock(&dentry_list_lock);
  }
  
+/* Flush the whole work queue */
+void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
+{
+       __ocfs2_drop_dl_inodes(osb, -1);
+}
+
  /*
   * ocfs2_dentry_iput() and friends.
   *
@@ -368,7 +386,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
         /* We leave dropping of inode reference to ocfs2_wq as that can
          * possibly lead to inode deletion which gets tricky */
         spin_lock(&dentry_list_lock);
-       if (!osb->dentry_lock_list)
+       if (!osb->dentry_lock_list &&
+           !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
                 queue_work(ocfs2_wq, &osb->dentry_lock_work);
         dl->dl_next = osb->dentry_lock_list;
         osb->dentry_lock_list = dl;
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h

index faa12e75f98d117a9cf2f7f0bf519a05b83c5401..f5dd1789acf1b4527373217ce7c6b0374695eb17 100644 (file)
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -49,10 +49,13 @@ struct ocfs2_dentry_lock {
  int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
                              u64 parent_blkno);
  
+extern spinlock_t dentry_list_lock;
+
  void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
                            struct ocfs2_dentry_lock *dl);
  
  void ocfs2_drop_dl_inodes(struct work_struct *work);
+void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
  
  struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
                                       int skip_unhashed);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c

index d07ddbe4b2836d3e3e6d0f002808d8c36ef6c591..81eff8e583222747cc824ce02d6edde94378e711 100644 (file)
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
                      lock->ast_pending, lock->ml.type);
                 BUG();
         }
-       BUG_ON(!list_empty(&lock->ast_list));
         if (lock->ast_pending)
                 mlog(0, "lock has an ast getting flushed right now\n");
  
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c

index bcb9260c37359a832c601f0c00e718094063af25..43e6e328056902a0fdd8cb98c7961b76a912c3dd 100644 (file)
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
  
         mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
              dlm->name, res->lockname.len, res->lockname.name,
-            orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery",
+            orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
              send_to);
  
         /* send it */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index 62442e413a001cfbe57607c43e06d408418cf13e..aa501d3f93f1bcec3af38a24361ab61cda2cc052 100644 (file)
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1851,6 +1851,7 @@ relock:
                 if (ret)
                         goto out_dio;
  
+               count = ocount;
                 ret = generic_write_checks(file, ppos, &count,
                                            S_ISBLK(inode->i_mode));
                 if (ret)
@@ -1918,8 +1919,10 @@ out_sems:
  
         mutex_unlock(&inode->i_mutex);
  
+       if (written)
+               ret = written;
         mlog_exit(ret);
-       return written ? written : ret;
+       return ret;
  }
  
  static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c

index f033760ecbeaae0dde0944d190828aedb7b9c5d0..c48b93ac6b65c55bc2b90daf8b3efa1397b9b6e4 100644 (file)
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
         os->os_osb = osb;
         os->os_count = 0;
         os->os_seqno = 0;
-       os->os_scantime = CURRENT_TIME;
         mutex_init(&os->os_lock);
         INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
+}
  
+void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
+{
+       struct ocfs2_orphan_scan *os;
+
+       os = &osb->osb_orphan_scan;
+       os->os_scantime = CURRENT_TIME;
         if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
                 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
         else {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h

index 5432c7f79cc6a959d3c39cd2cbcb41663ef599b5..2c3222aec6228a83b9acac2122e42beea94042fb 100644 (file)
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
  
  /* Exported only for the journal struct init code in super.c. Do not call. */
  void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
+void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
  void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
  void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
  
@@ -329,20 +330,27 @@ int                  ocfs2_journal_dirty(handle_t *handle,
  /* extended attribute block update */
  #define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
  
+/* Update of a single quota block */
+#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1
+
  /* global quotafile inode update, data block */
-#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
+                                  OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
  
+#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS
  /*
   * The two writes below can accidentally see global info dirty due
   * to set_info() quotactl so make them prepared for the writes.
   */
  /* quota data block, global info */
  /* Write to local quota file */
-#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1)
+#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
+                             OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
  
  /* global quota data block, local quota data block, global quota inode,
   * global quota info */
-#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3)
+#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
+                            2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
  
  static inline int ocfs2_quota_trans_credits(struct super_block *sb)
  {
@@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb)
         return credits;
  }
  
-/* Number of credits needed for removing quota structure from file */
-int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
-/* Number of credits needed for initialization of new quota structure */
-int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
-
  /* group extend. inode update and last group update. */
  #define OCFS2_GROUP_EXTEND_CREDITS     (OCFS2_INODE_UPDATE_CREDITS + 1)
  
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h

index c9345ebb849343873adf6b2b67ae226f8181f65e..39e1d5a39505221a458846ab37950c922d6da76b 100644 (file)
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -224,10 +224,12 @@ enum ocfs2_mount_options
         OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
  };
  
-#define OCFS2_OSB_SOFT_RO      0x0001
-#define OCFS2_OSB_HARD_RO      0x0002
-#define OCFS2_OSB_ERROR_FS     0x0004
-#define OCFS2_DEFAULT_ATIME_QUANTUM    60
+#define OCFS2_OSB_SOFT_RO                      0x0001
+#define OCFS2_OSB_HARD_RO                      0x0002
+#define OCFS2_OSB_ERROR_FS                     0x0004
+#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED       0x0008
+
+#define OCFS2_DEFAULT_ATIME_QUANTUM            60
  
  struct ocfs2_journal;
  struct ocfs2_slot_info;
@@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
         spin_unlock(&osb->osb_lock);
  }
  
+
+static inline unsigned long  ocfs2_test_osb_flag(struct ocfs2_super *osb,
+                                                unsigned long flag)
+{
+       unsigned long ret;
+
+       spin_lock(&osb->osb_lock);
+       ret = osb->osb_flags & flag;
+       spin_unlock(&osb->osb_lock);
+       return ret;
+}
+
  static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
                                      int hard)
  {
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h

index 7365e2e08706ef6ea07b5f034310f5fec6fd9e4a..3fb96fcd4c81cd185db0e7b5132f831c322dedca 100644 (file)
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo {
         unsigned int dqi_chunks;        /* Number of chunks in local quota file */
         unsigned int dqi_blocks;        /* Number of blocks allocated for local quota file */
         unsigned int dqi_syncms;        /* How often should we sync with other nodes */
-       unsigned int dqi_syncjiff;      /* Precomputed dqi_syncms in jiffies */
         struct list_head dqi_chunk;     /* List of chunks */
         struct inode *dqi_gqinode;      /* Global quota file inode */
         struct ocfs2_lock_res dqi_gqlock;       /* Lock protecting quota information structure */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c

index edfa60cd155c18e9ac67a1fe6ae1476900fed8b7..bf7742d0ee3bba05a7ceabab948aff62f27482f0 100644 (file)
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -69,6 +69,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
         d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
         d->dqb_btime = cpu_to_le64(m->dqb_btime);
         d->dqb_itime = cpu_to_le64(m->dqb_itime);
+       d->dqb_pad1 = d->dqb_pad2 = 0;
  }
  
  static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
@@ -211,14 +212,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
  
         mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
         if (gqinode->i_size < off + len) {
-               down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
-               err = ocfs2_extend_no_holes(gqinode, off + len, off);
-               up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
-               if (err < 0)
-                       goto out;
+               loff_t rounded_end =
+                               ocfs2_align_bytes_to_blocks(sb, off + len);
+
+               /* Space is already allocated in ocfs2_global_read_dquot() */
                 err = ocfs2_simple_size_update(gqinode,
                                                oinfo->dqi_gqi_bh,
-                                              off + len);
+                                              rounded_end);
                 if (err < 0)
                         goto out;
                 new = 1;
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
         }
         if (err) {
                 mlog_errno(err);
-               return err;
+               goto out;
         }
         lock_buffer(bh);
         if (new)
@@ -342,7 +342,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
         oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
-       oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
         oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
         oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
         oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@@ -352,7 +351,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
         oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
         INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
         queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
-                          oinfo->dqi_syncjiff);
+                          msecs_to_jiffies(oinfo->dqi_syncms));
  
  out_err:
         mlog_exit(status);
@@ -402,13 +401,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
         return err;
  }
  
+static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
+{
+       struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+
+       /*
+        * We may need to allocate tree blocks and a leaf block but not the
+        * root block
+        */
+       return oinfo->dqi_gi.dqi_qtree_depth;
+}
+
+static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
+{
+       /* We modify all the allocated blocks, tree root, and info block */
+       return (ocfs2_global_qinit_alloc(sb, type) + 2) *
+                       OCFS2_QUOTA_BLOCK_UPDATE_CREDITS;
+}
+
  /* Read in information from global quota file and acquire a reference to it.
   * dquot_acquire() has already started the transaction and locked quota file */
  int ocfs2_global_read_dquot(struct dquot *dquot)
  {
         int err, err2, ex = 0;
-       struct ocfs2_mem_dqinfo *info =
-                       sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+       struct super_block *sb = dquot->dq_sb;
+       int type = dquot->dq_type;
+       struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
+       struct ocfs2_super *osb = OCFS2_SB(sb);
+       struct inode *gqinode = info->dqi_gqinode;
+       int need_alloc = ocfs2_global_qinit_alloc(sb, type);
+       handle_t *handle = NULL;
  
         err = ocfs2_qinfo_lock(info, 0);
         if (err < 0)
@@ -419,14 +441,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
         OCFS2_DQUOT(dquot)->dq_use_count++;
         OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
         OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
+       ocfs2_qinfo_unlock(info, 0);
+
         if (!dquot->dq_off) {   /* No real quota entry? */
-               /* Upgrade to exclusive lock for allocation */
-               ocfs2_qinfo_unlock(info, 0);
-               err = ocfs2_qinfo_lock(info, 1);
-               if (err < 0)
-                       goto out_qlock;
                 ex = 1;
+               /*
+                * Add blocks to quota file before we start a transaction since
+                * locking allocators ranks above a transaction start
+                */
+               WARN_ON(journal_current_handle());
+               down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+               err = ocfs2_extend_no_holes(gqinode,
+                       gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
+                       gqinode->i_size);
+               up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+               if (err < 0)
+                       goto out;
         }
+
+       handle = ocfs2_start_trans(osb,
+                                  ocfs2_calc_global_qinit_credits(sb, type));
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               goto out;
+       }
+       err = ocfs2_qinfo_lock(info, ex);
+       if (err < 0)
+               goto out_trans;
         err = qtree_write_dquot(&info->dqi_gi, dquot);
         if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
                 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
@@ -438,6 +479,9 @@ out_qlock:
                 ocfs2_qinfo_unlock(info, 1);
         else
                 ocfs2_qinfo_unlock(info, 0);
+out_trans:
+       if (handle)
+               ocfs2_commit_trans(osb, handle);
  out:
         if (err < 0)
                 mlog_errno(err);
@@ -607,7 +651,7 @@ static void qsync_work_fn(struct work_struct *work)
  
         dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
         queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
-                          oinfo->dqi_syncjiff);
+                          msecs_to_jiffies(oinfo->dqi_syncms));
  }
  
  /*
@@ -635,20 +679,18 @@ out:
         return status;
  }
  
-int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
+static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
  {
-       struct ocfs2_mem_dqinfo *oinfo;
-       int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
-                                   OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
-
-       if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
-               return 0;
-
-       oinfo = sb_dqinfo(sb, type)->dqi_priv;
-       /* We modify tree, leaf block, global info, local chunk header,
-        * global and local inode */
-       return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
-              2 * OCFS2_INODE_UPDATE_CREDITS;
+       struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+       /*
+        * We modify tree, leaf block, global info, local chunk header,
+        * global and local inode; OCFS2_QINFO_WRITE_CREDITS already
+        * accounts for inode update
+        */
+       return (oinfo->dqi_gi.dqi_qtree_depth + 2) *
+              OCFS2_QUOTA_BLOCK_UPDATE_CREDITS +
+              OCFS2_QINFO_WRITE_CREDITS +
+              OCFS2_INODE_UPDATE_CREDITS;
  }
  
  static int ocfs2_release_dquot(struct dquot *dquot)
@@ -680,33 +722,10 @@ out:
         return status;
  }
  
-int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
-{
-       struct ocfs2_mem_dqinfo *oinfo;
-       int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
-                                   OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
-       struct ocfs2_dinode *lfe, *gfe;
-
-       if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
-               return 0;
-
-       oinfo = sb_dqinfo(sb, type)->dqi_priv;
-       gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
-       lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
-       /* We can extend local file + global file. In local file we
-        * can modify info, chunk header block and dquot block. In
-        * global file we can modify info, tree and leaf block */
-       return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
-              ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
-              3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
-}
-
  static int ocfs2_acquire_dquot(struct dquot *dquot)
  {
-       handle_t *handle;
         struct ocfs2_mem_dqinfo *oinfo =
                         sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
-       struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
         int status = 0;
  
         mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
@@ -715,16 +734,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
         status = ocfs2_lock_global_qf(oinfo, 1);
         if (status < 0)
                 goto out;
-       handle = ocfs2_start_trans(osb,
-               ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
-       if (IS_ERR(handle)) {
-               status = PTR_ERR(handle);
-               mlog_errno(status);
-               goto out_ilock;
-       }
         status = dquot_acquire(dquot);
-       ocfs2_commit_trans(osb, handle);
-out_ilock:
         ocfs2_unlock_global_qf(oinfo, 1);
  out:
         mlog_exit(status);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c

index 5a460fa8255384f615d2dd7ca8836d3091fc58b5..bdb09cb6e1fef216821b7e7a8d94503ce7d097fd 100644 (file)
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -20,6 +20,7 @@
  #include "sysfile.h"
  #include "dlmglue.h"
  #include "quota.h"
+#include "uptodate.h"
  
  /* Number of local quota structures per block */
  static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
         handle_t *handle;
         int status;
  
-       handle = ocfs2_start_trans(OCFS2_SB(sb), 1);
+       handle = ocfs2_start_trans(OCFS2_SB(sb),
+                                  OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
         if (IS_ERR(handle)) {
                 status = PTR_ERR(handle);
                 mlog_errno(status);
@@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
                         goto out_bh;
                 /* Mark quota file as clean if we are recovering quota file of
                  * some other node. */
-               handle = ocfs2_start_trans(osb, 1);
+               handle = ocfs2_start_trans(osb,
+                                          OCFS2_LOCAL_QINFO_WRITE_CREDITS);
                 if (IS_ERR(handle)) {
                         status = PTR_ERR(handle);
                         mlog_errno(status);
@@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
         struct ocfs2_local_disk_chunk *dchunk;
         int status;
         handle_t *handle;
-       struct buffer_head *bh = NULL;
+       struct buffer_head *bh = NULL, *dbh = NULL;
         u64 p_blkno;
  
         /* We are protected by dqio_sem so no locking needed */
@@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
                 mlog_errno(status);
                 goto out;
         }
+       /* Local quota info and two new blocks we initialize */
+       handle = ocfs2_start_trans(OCFS2_SB(sb),
+                       OCFS2_LOCAL_QINFO_WRITE_CREDITS +
+                       2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
+       if (IS_ERR(handle)) {
+               status = PTR_ERR(handle);
+               mlog_errno(status);
+               goto out;
+       }
  
+       /* Initialize chunk header */
         down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
         status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
                                              &p_blkno, NULL, NULL);
         up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
         if (status < 0) {
                 mlog_errno(status);
-               goto out;
+               goto out_trans;
         }
         bh = sb_getblk(sb, p_blkno);
         if (!bh) {
                 status = -ENOMEM;
                 mlog_errno(status);
-               goto out;
+               goto out_trans;
         }
         dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
-
-       handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
-       if (IS_ERR(handle)) {
-               status = PTR_ERR(handle);
-               mlog_errno(status);
-               goto out;
-       }
-
+       ocfs2_set_new_buffer_uptodate(lqinode, bh);
         status = ocfs2_journal_access_dq(handle, lqinode, bh,
-                                        OCFS2_JOURNAL_ACCESS_WRITE);
+                                        OCFS2_JOURNAL_ACCESS_CREATE);
         if (status < 0) {
                 mlog_errno(status);
                 goto out_trans;
@@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
         memset(dchunk->dqc_bitmap, 0,
                sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
                OCFS2_QBLK_RESERVED_SPACE);
-       set_buffer_uptodate(bh);
         unlock_buffer(bh);
         status = ocfs2_journal_dirty(handle, bh);
         if (status < 0) {
@@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
                 goto out_trans;
         }
  
+       /* Initialize new block with structures */
+       down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+       status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
+                                            &p_blkno, NULL, NULL);
+       up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out_trans;
+       }
+       dbh = sb_getblk(sb, p_blkno);
+       if (!dbh) {
+               status = -ENOMEM;
+               mlog_errno(status);
+               goto out_trans;
+       }
+       ocfs2_set_new_buffer_uptodate(lqinode, dbh);
+       status = ocfs2_journal_access_dq(handle, lqinode, dbh,
+                                        OCFS2_JOURNAL_ACCESS_CREATE);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out_trans;
+       }
+       lock_buffer(dbh);
+       memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
+       unlock_buffer(dbh);
+       status = ocfs2_journal_dirty(handle, dbh);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out_trans;
+       }
+
+       /* Update local quotafile info */
         oinfo->dqi_blocks += 2;
         oinfo->dqi_chunks++;
         status = ocfs2_local_write_info(sb, type);
@@ -1031,6 +1068,7 @@ out_trans:
         ocfs2_commit_trans(OCFS2_SB(sb), handle);
  out:
         brelse(bh);
+       brelse(dbh);
         kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
         return ERR_PTR(status);
  }
@@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
         struct ocfs2_local_disk_chunk *dchunk;
         int epb = ol_quota_entries_per_block(sb);
         unsigned int chunk_blocks;
+       struct buffer_head *bh;
+       u64 p_blkno;
         int status;
         handle_t *handle;
  
@@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
                 mlog_errno(status);
                 goto out;
         }
-       handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
+
+       /* Get buffer from the just added block */
+       down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+       status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
+                                            &p_blkno, NULL, NULL);
+       up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out;
+       }
+       bh = sb_getblk(sb, p_blkno);
+       if (!bh) {
+               status = -ENOMEM;
+               mlog_errno(status);
+               goto out;
+       }
+       ocfs2_set_new_buffer_uptodate(lqinode, bh);
+
+       /* Local quota info, chunk header and the new block we initialize */
+       handle = ocfs2_start_trans(OCFS2_SB(sb),
+                       OCFS2_LOCAL_QINFO_WRITE_CREDITS +
+                       2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
         if (IS_ERR(handle)) {
                 status = PTR_ERR(handle);
                 mlog_errno(status);
                 goto out;
         }
+       /* Zero created block */
+       status = ocfs2_journal_access_dq(handle, lqinode, bh,
+                                OCFS2_JOURNAL_ACCESS_CREATE);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out_trans;
+       }
+       lock_buffer(bh);
+       memset(bh->b_data, 0, sb->s_blocksize);
+       unlock_buffer(bh);
+       status = ocfs2_journal_dirty(handle, bh);
+       if (status < 0) {
+               mlog_errno(status);
+               goto out_trans;
+       }
+       /* Update chunk header */
         status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
                                  OCFS2_JOURNAL_ACCESS_WRITE);
         if (status < 0) {
@@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
                 mlog_errno(status);
                 goto out_trans;
         }
+       /* Update file header */
         oinfo->dqi_blocks++;
         status = ocfs2_local_write_info(sb, type);
         if (status < 0) {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c

index 3f661376a2ded76e08769f1766de3c1523d448d9..e49c41050264dfa9d36afc48e742b626ee9d2755 100644 (file)
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -17,6 +17,7 @@
   * General Public License for more details.
   */
  
+#include <linux/kernel.h>
  #include <linux/crc32.h>
  #include <linux/module.h>
  
@@ -153,7 +154,7 @@ static int status_map[] = {
  
  static int dlm_status_to_errno(enum dlm_status status)
  {
-       BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0])));
+       BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map));
  
         return status_map[status];
  }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c

index 7efb349fb9bdafe0678bd63786db244bba410b4a..b0ee0fdf799a514c9555b34f4983c734825badde 100644 (file)
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
                 }
                 di = (struct ocfs2_dinode *) (*bh)->b_data;
                 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
+               spin_lock_init(&stats->b_lock);
                 status = ocfs2_verify_volume(di, *bh, blksize, stats);
                 if (status >= 0)
                         goto bail;
@@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
         wake_up(&osb->osb_mount_event);
  
         /* Start this when the mount is almost sure of being successful */
-       ocfs2_orphan_scan_init(osb);
+       ocfs2_orphan_scan_start(osb);
  
         mlog_exit(status);
         return status;
@@ -1213,14 +1214,27 @@ static int ocfs2_get_sb(struct file_system_type *fs_type,
                            mnt);
  }
  
+static void ocfs2_kill_sb(struct super_block *sb)
+{
+       struct ocfs2_super *osb = OCFS2_SB(sb);
+
+       /* Prevent further queueing of inode drop events */
+       spin_lock(&dentry_list_lock);
+       ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
+       spin_unlock(&dentry_list_lock);
+       /* Wait for work to finish and/or remove it */
+       cancel_work_sync(&osb->dentry_lock_work);
+
+       kill_block_super(sb);
+}
+
  static struct file_system_type ocfs2_fs_type = {
         .owner          = THIS_MODULE,
         .name           = "ocfs2",
         .get_sb         = ocfs2_get_sb, /* is this called when we mount
                                         * the fs? */
-       .kill_sb        = kill_block_super, /* set to the generic one
-                                            * right now, but do we
-                                            * need to change that? */
+       .kill_sb        = ocfs2_kill_sb,
+
         .fs_flags       = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
         .next           = NULL
  };
@@ -1819,6 +1833,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
  
         debugfs_remove(osb->osb_ctxt);
  
+       /*
+        * Flush inode dropping work queue so that deletes are
+        * performed while the filesystem is still working
+        */
+       ocfs2_drop_all_dl_inodes(osb);
+
         /* Orphan scan should be stopped as early as possible */
         ocfs2_orphan_scan_stop(osb);
  
@@ -1981,6 +2001,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
         snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
                  MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
  
+       ocfs2_orphan_scan_init(osb);
+
         status = ocfs2_recovery_init(osb);
         if (status) {
                 mlog(ML_ERROR, "Unable to initialize recovery state\n");
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c

index ba320e250747fd01dedcd06a85df0d2e867dbe44..d1a27cda984f10811fb66fe7a7a53e76f6036d34 100644 (file)
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode,
         struct ocfs2_xattr_block *xb;
         struct ocfs2_xattr_value_root *xv;
         size_t size;
-       int ret = -ENODATA, name_offset, name_len, block_off, i;
+       int ret = -ENODATA, name_offset, name_len, i;
+       int uninitialized_var(block_off);
  
         xs->bucket = ocfs2_xattr_bucket_new(inode);
         if (!xs->bucket) {
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h

index a9d823a93fe89e9bbf11db6a48169ede758af39e..b53f7006cc4e0bb5e1ad5f09ae76249d150d2ca4 100644 (file)
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -115,7 +115,7 @@ enum perf_counter_sample_format {
         PERF_SAMPLE_TID                         = 1U << 1,
         PERF_SAMPLE_TIME                        = 1U << 2,
         PERF_SAMPLE_ADDR                        = 1U << 3,
-       PERF_SAMPLE_GROUP                       = 1U << 4,
+       PERF_SAMPLE_READ                        = 1U << 4,
         PERF_SAMPLE_CALLCHAIN                   = 1U << 5,
         PERF_SAMPLE_ID                          = 1U << 6,
         PERF_SAMPLE_CPU                         = 1U << 7,
@@ -127,16 +127,32 @@ enum perf_counter_sample_format {
  };
  
  /*
- * Bits that can be set in attr.read_format to request that
- * reads on the counter should return the indicated quantities,
- * in increasing order of bit value, after the counter value.
+ * The format of the data returned by read() on a perf counter fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ *     { u64           value;
+ *       { u64         time_enabled; } && PERF_FORMAT_ENABLED
+ *       { u64         time_running; } && PERF_FORMAT_RUNNING
+ *       { u64         id;           } && PERF_FORMAT_ID
+ *     } && !PERF_FORMAT_GROUP
+ *
+ *     { u64           nr;
+ *       { u64         time_enabled; } && PERF_FORMAT_ENABLED
+ *       { u64         time_running; } && PERF_FORMAT_RUNNING
+ *       { u64         value;
+ *         { u64       id;           } && PERF_FORMAT_ID
+ *       }             cntr[nr];
+ *     } && PERF_FORMAT_GROUP
+ * };
   */
  enum perf_counter_read_format {
         PERF_FORMAT_TOTAL_TIME_ENABLED          = 1U << 0,
         PERF_FORMAT_TOTAL_TIME_RUNNING          = 1U << 1,
         PERF_FORMAT_ID                          = 1U << 2,
+       PERF_FORMAT_GROUP                       = 1U << 3,
  
-       PERF_FORMAT_MAX = 1U << 3,              /* non-ABI */
+       PERF_FORMAT_MAX = 1U << 4,              /* non-ABI */
  };
  
  #define PERF_ATTR_SIZE_VER0    64      /* sizeof first published struct */
@@ -343,10 +359,8 @@ enum perf_event_type {
          * struct {
          *      struct perf_event_header        header;
          *      u32                             pid, tid;
-        *      u64                             value;
-        *      { u64           time_enabled;   } && PERF_FORMAT_ENABLED
-        *      { u64           time_running;   } && PERF_FORMAT_RUNNING
-        *      { u64           parent_id;      } && PERF_FORMAT_ID
+        *
+        *      struct read_format              values;
          * };
          */
         PERF_EVENT_READ                 = 8,
@@ -364,11 +378,22 @@ enum perf_event_type {
          *      { u32                   cpu, res; } && PERF_SAMPLE_CPU
          *      { u64                   period;   } && PERF_SAMPLE_PERIOD
          *
-        *      { u64                   nr;
-        *        { u64 id, val; }      cnt[nr];  } && PERF_SAMPLE_GROUP
+        *      { struct read_format    values;   } && PERF_SAMPLE_READ
          *
          *      { u64                   nr,
          *        u64                   ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+        *
+        *      #
+        *      # The RAW record below is opaque data wrt the ABI
+        *      #
+        *      # That is, the ABI doesn't make any promises wrt to
+        *      # the stability of its content, it may vary depending
+        *      # on event, hardware, kernel version and phase of
+        *      # the moon.
+        *      #
+        *      # In other words, PERF_SAMPLE_RAW contents are not an ABI.
+        *      #
+        *
          *      { u32                   size;
          *        char                  data[size];}&& PERF_SAMPLE_RAW
          * };
@@ -694,6 +719,8 @@ struct perf_sample_data {
  
  extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
                                  struct perf_sample_data *data);
+extern void perf_counter_output(struct perf_counter *counter, int nmi,
+                               struct perf_sample_data *data);
  
  /*
   * Return 1 for a software counter, 0 for a hardware counter
diff --git a/include/linux/wait.h b/include/linux/wait.h

index 6788e1a4d4ca63e8cfc3b288f87d456334064c29..cf3c2f5dba51834215b137e5385b2654f220fc42 100644 (file)
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -77,7 +77,14 @@ struct task_struct;
  #define __WAIT_BIT_KEY_INITIALIZER(word, bit)                          \
         { .flags = word, .bit_nr = bit, }
  
-extern void init_waitqueue_head(wait_queue_head_t *q);
+extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *);
+
+#define init_waitqueue_head(q)                         \
+       do {                                            \
+               static struct lock_class_key __key;     \
+                                                       \
+               __init_waitqueue_head((q), &__key);     \
+       } while (0)
  
  #ifdef CONFIG_LOCKDEP
  # define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
diff --git a/kernel/futex.c b/kernel/futex.c

index 0672ff88f159f3041206750b3427fdd68aa0afc1..e18cfbdc71904d0260d505bd73a41d06a38982d9 100644 (file)
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1010,15 +1010,19 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
   * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
   * q:  the futex_q
   * key:        the key of the requeue target futex
+ * hb:  the hash_bucket of the requeue target futex
   *
   * During futex_requeue, with requeue_pi=1, it is possible to acquire the
   * target futex if it is uncontended or via a lock steal.  Set the futex_q key
   * to the requeue target futex so the waiter can detect the wakeup on the right
   * futex, but remove it from the hb and NULL the rt_waiter so it can detect
- * atomic lock acquisition.  Must be called with the q->lock_ptr held.
+ * atomic lock acquisition.  Set the q->lock_ptr to the requeue target hb->lock
+ * to protect access to the pi_state to fixup the owner later.  Must be called
+ * with both q->lock_ptr and hb->lock held.
   */
  static inline
-void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
+                          struct futex_hash_bucket *hb)
  {
         drop_futex_key_refs(&q->key);
         get_futex_key_refs(key);
@@ -1030,6 +1034,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
         WARN_ON(!q->rt_waiter);
         q->rt_waiter = NULL;
  
+       q->lock_ptr = &hb->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+       q->list.plist.lock = &hb->lock;
+#endif
+
         wake_up_state(q->task, TASK_NORMAL);
  }
  
@@ -1088,7 +1097,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
         ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
                                    set_waiters);
         if (ret == 1)
-               requeue_pi_wake_futex(top_waiter, key2);
+               requeue_pi_wake_futex(top_waiter, key2, hb2);
  
         return ret;
  }
@@ -1247,8 +1256,15 @@ retry_private:
                 if (!match_futex(&this->key, &key1))
                         continue;
  
-               WARN_ON(!requeue_pi && this->rt_waiter);
-               WARN_ON(requeue_pi && !this->rt_waiter);
+               /*
+                * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
+                * be paired with each other and no other futex ops.
+                */
+               if ((requeue_pi && !this->rt_waiter) ||
+                   (!requeue_pi && this->rt_waiter)) {
+                       ret = -EINVAL;
+                       break;
+               }
  
                 /*
                  * Wake nr_wake waiters.  For requeue_pi, if we acquired the
@@ -1273,7 +1289,7 @@ retry_private:
                                                         this->task, 1);
                         if (ret == 1) {
                                 /* We got the lock. */
-                               requeue_pi_wake_futex(this, &key2);
+                               requeue_pi_wake_futex(this, &key2, hb2);
                                 continue;
                         } else if (ret) {
                                 /* -EDEADLK */
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c

index d607a5b9ee29923617d462288e9853327c7d1527..235716556bf16ed9b97c19a9578ef24dec19edb1 100644 (file)
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -180,7 +180,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
         int cmd = op & FUTEX_CMD_MASK;
  
         if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
-                     cmd == FUTEX_WAIT_BITSET)) {
+                     cmd == FUTEX_WAIT_BITSET ||
+                     cmd == FUTEX_WAIT_REQUEUE_PI)) {
                 if (get_compat_timespec(&ts, utime))
                         return -EFAULT;
                 if (!timespec_valid(&ts))
@@ -191,7 +192,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
                         t = ktime_add_safe(ktime_get(), t);
                 tp = &t;
         }
-       if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
+       if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+           cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
                 val2 = (int) (unsigned long) utime;
  
         return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c

index 61c679db4687a853c52f7d5b23b5ba5e267b4c17..d222515a5a062db18a1e38e5b75f8fd5cd38f745 100644 (file)
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -761,7 +761,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
  {
         struct irq_desc *desc = irq_to_desc(irq);
         struct irqaction *action, **action_ptr;
-       struct task_struct *irqthread;
         unsigned long flags;
  
         WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -809,9 +808,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
                         desc->chip->disable(irq);
         }
  
-       irqthread = action->thread;
-       action->thread = NULL;
-
         spin_unlock_irqrestore(&desc->lock, flags);
  
         unregister_handler_proc(irq, action);
@@ -819,12 +815,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
         /* Make sure it's not being used on another CPU: */
         synchronize_irq(irq);
  
-       if (irqthread) {
-               if (!test_bit(IRQTF_DIED, &action->thread_flags))
-                       kthread_stop(irqthread);
-               put_task_struct(irqthread);
-       }
-
  #ifdef CONFIG_DEBUG_SHIRQ
         /*
          * It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -840,6 +830,13 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
                 local_irq_restore(flags);
         }
  #endif
+
+       if (action->thread) {
+               if (!test_bit(IRQTF_DIED, &action->thread_flags))
+                       kthread_stop(action->thread);
+               put_task_struct(action->thread);
+       }
+
         return action;
  }
  
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c

index b0b20a07f3942422f614a4d0f429a23d19db396e..534e20d14d631b44cefc136f99200b2257af378f 100644 (file)
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -88,6 +88,7 @@ void __weak hw_perf_disable(void)             { barrier(); }
  void __weak hw_perf_enable(void)               { barrier(); }
  
  void __weak hw_perf_counter_setup(int cpu)     { barrier(); }
+void __weak hw_perf_counter_setup_online(int cpu)      { barrier(); }
  
  int __weak
  hw_perf_group_sched_in(struct perf_counter *group_leader,
@@ -306,6 +307,10 @@ counter_sched_out(struct perf_counter *counter,
                 return;
  
         counter->state = PERF_COUNTER_STATE_INACTIVE;
+       if (counter->pending_disable) {
+               counter->pending_disable = 0;
+               counter->state = PERF_COUNTER_STATE_OFF;
+       }
         counter->tstamp_stopped = ctx->time;
         counter->pmu->disable(counter);
         counter->oncpu = -1;
@@ -1691,7 +1696,32 @@ static int perf_release(struct inode *inode, struct file *file)
         return 0;
  }
  
-static u64 perf_counter_read_tree(struct perf_counter *counter)
+static int perf_counter_read_size(struct perf_counter *counter)
+{
+       int entry = sizeof(u64); /* value */
+       int size = 0;
+       int nr = 1;
+
+       if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+               size += sizeof(u64);
+
+       if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+               size += sizeof(u64);
+
+       if (counter->attr.read_format & PERF_FORMAT_ID)
+               entry += sizeof(u64);
+
+       if (counter->attr.read_format & PERF_FORMAT_GROUP) {
+               nr += counter->group_leader->nr_siblings;
+               size += sizeof(u64);
+       }
+
+       size += entry * nr;
+
+       return size;
+}
+
+static u64 perf_counter_read_value(struct perf_counter *counter)
  {
         struct perf_counter *child;
         u64 total = 0;
@@ -1703,14 +1733,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter)
         return total;
  }
  
+static int perf_counter_read_entry(struct perf_counter *counter,
+                                  u64 read_format, char __user *buf)
+{
+       int n = 0, count = 0;
+       u64 values[2];
+
+       values[n++] = perf_counter_read_value(counter);
+       if (read_format & PERF_FORMAT_ID)
+               values[n++] = primary_counter_id(counter);
+
+       count = n * sizeof(u64);
+
+       if (copy_to_user(buf, values, count))
+               return -EFAULT;
+
+       return count;
+}
+
+static int perf_counter_read_group(struct perf_counter *counter,
+                                  u64 read_format, char __user *buf)
+{
+       struct perf_counter *leader = counter->group_leader, *sub;
+       int n = 0, size = 0, err = -EFAULT;
+       u64 values[3];
+
+       values[n++] = 1 + leader->nr_siblings;
+       if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+               values[n++] = leader->total_time_enabled +
+                       atomic64_read(&leader->child_total_time_enabled);
+       }
+       if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+               values[n++] = leader->total_time_running +
+                       atomic64_read(&leader->child_total_time_running);
+       }
+
+       size = n * sizeof(u64);
+
+       if (copy_to_user(buf, values, size))
+               return -EFAULT;
+
+       err = perf_counter_read_entry(leader, read_format, buf + size);
+       if (err < 0)
+               return err;
+
+       size += err;
+
+       list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+               err = perf_counter_read_entry(counter, read_format,
+                               buf + size);
+               if (err < 0)
+                       return err;
+
+               size += err;
+       }
+
+       return size;
+}
+
+static int perf_counter_read_one(struct perf_counter *counter,
+                                u64 read_format, char __user *buf)
+{
+       u64 values[4];
+       int n = 0;
+
+       values[n++] = perf_counter_read_value(counter);
+       if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+               values[n++] = counter->total_time_enabled +
+                       atomic64_read(&counter->child_total_time_enabled);
+       }
+       if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+               values[n++] = counter->total_time_running +
+                       atomic64_read(&counter->child_total_time_running);
+       }
+       if (read_format & PERF_FORMAT_ID)
+               values[n++] = primary_counter_id(counter);
+
+       if (copy_to_user(buf, values, n * sizeof(u64)))
+               return -EFAULT;
+
+       return n * sizeof(u64);
+}
+
  /*
   * Read the performance counter - simple non blocking version for now
   */
  static ssize_t
  perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
  {
-       u64 values[4];
-       int n;
+       u64 read_format = counter->attr.read_format;
+       int ret;
  
         /*
          * Return end-of-file for a read on a counter that is in
@@ -1720,28 +1832,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
         if (counter->state == PERF_COUNTER_STATE_ERROR)
                 return 0;
  
+       if (count < perf_counter_read_size(counter))
+               return -ENOSPC;
+
         WARN_ON_ONCE(counter->ctx->parent_ctx);
         mutex_lock(&counter->child_mutex);
-       values[0] = perf_counter_read_tree(counter);
-       n = 1;
-       if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-               values[n++] = counter->total_time_enabled +
-                       atomic64_read(&counter->child_total_time_enabled);
-       if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-               values[n++] = counter->total_time_running +
-                       atomic64_read(&counter->child_total_time_running);
-       if (counter->attr.read_format & PERF_FORMAT_ID)
-               values[n++] = primary_counter_id(counter);
+       if (read_format & PERF_FORMAT_GROUP)
+               ret = perf_counter_read_group(counter, read_format, buf);
+       else
+               ret = perf_counter_read_one(counter, read_format, buf);
         mutex_unlock(&counter->child_mutex);
  
-       if (count < n * sizeof(u64))
-               return -EINVAL;
-       count = n * sizeof(u64);
-
-       if (copy_to_user(buf, values, count))
-               return -EFAULT;
-
-       return count;
+       return ret;
  }
  
  static ssize_t
@@ -2245,7 +2347,7 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
  
         if (counter->pending_disable) {
                 counter->pending_disable = 0;
-               perf_counter_disable(counter);
+               __perf_counter_disable(counter);
         }
  
         if (counter->pending_wakeup) {
@@ -2630,7 +2732,80 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
         return task_pid_nr_ns(p, counter->ns);
  }
  
-static void perf_counter_output(struct perf_counter *counter, int nmi,
+static void perf_output_read_one(struct perf_output_handle *handle,
+                                struct perf_counter *counter)
+{
+       u64 read_format = counter->attr.read_format;
+       u64 values[4];
+       int n = 0;
+
+       values[n++] = atomic64_read(&counter->count);
+       if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+               values[n++] = counter->total_time_enabled +
+                       atomic64_read(&counter->child_total_time_enabled);
+       }
+       if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+               values[n++] = counter->total_time_running +
+                       atomic64_read(&counter->child_total_time_running);
+       }
+       if (read_format & PERF_FORMAT_ID)
+               values[n++] = primary_counter_id(counter);
+
+       perf_output_copy(handle, values, n * sizeof(u64));
+}
+
+/*
+ * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
+ */
+static void perf_output_read_group(struct perf_output_handle *handle,
+                           struct perf_counter *counter)
+{
+       struct perf_counter *leader = counter->group_leader, *sub;
+       u64 read_format = counter->attr.read_format;
+       u64 values[5];
+       int n = 0;
+
+       values[n++] = 1 + leader->nr_siblings;
+
+       if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+               values[n++] = leader->total_time_enabled;
+
+       if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+               values[n++] = leader->total_time_running;
+
+       if (leader != counter)
+               leader->pmu->read(leader);
+
+       values[n++] = atomic64_read(&leader->count);
+       if (read_format & PERF_FORMAT_ID)
+               values[n++] = primary_counter_id(leader);
+
+       perf_output_copy(handle, values, n * sizeof(u64));
+
+       list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+               n = 0;
+
+               if (sub != counter)
+                       sub->pmu->read(sub);
+
+               values[n++] = atomic64_read(&sub->count);
+               if (read_format & PERF_FORMAT_ID)
+                       values[n++] = primary_counter_id(sub);
+
+               perf_output_copy(handle, values, n * sizeof(u64));
+       }
+}
+
+static void perf_output_read(struct perf_output_handle *handle,
+                            struct perf_counter *counter)
+{
+       if (counter->attr.read_format & PERF_FORMAT_GROUP)
+               perf_output_read_group(handle, counter);
+       else
+               perf_output_read_one(handle, counter);
+}
+
+void perf_counter_output(struct perf_counter *counter, int nmi,
                                 struct perf_sample_data *data)
  {
         int ret;
@@ -2641,10 +2816,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
         struct {
                 u32 pid, tid;
         } tid_entry;
-       struct {
-               u64 id;
-               u64 counter;
-       } group_entry;
         struct perf_callchain_entry *callchain = NULL;
         int callchain_size = 0;
         u64 time;
@@ -2699,10 +2870,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
         if (sample_type & PERF_SAMPLE_PERIOD)
                 header.size += sizeof(u64);
  
-       if (sample_type & PERF_SAMPLE_GROUP) {
-               header.size += sizeof(u64) +
-                       counter->nr_siblings * sizeof(group_entry);
-       }
+       if (sample_type & PERF_SAMPLE_READ)
+               header.size += perf_counter_read_size(counter);
  
         if (sample_type & PERF_SAMPLE_CALLCHAIN) {
                 callchain = perf_callchain(data->regs);
@@ -2759,26 +2928,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
         if (sample_type & PERF_SAMPLE_PERIOD)
                 perf_output_put(&handle, data->period);
  
-       /*
-        * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
-        */
-       if (sample_type & PERF_SAMPLE_GROUP) {
-               struct perf_counter *leader, *sub;
-               u64 nr = counter->nr_siblings;
-
-               perf_output_put(&handle, nr);
-
-               leader = counter->group_leader;
-               list_for_each_entry(sub, &leader->sibling_list, list_entry) {
-                       if (sub != counter)
-                               sub->pmu->read(sub);
-
-                       group_entry.id = primary_counter_id(sub);
-                       group_entry.counter = atomic64_read(&sub->count);
-
-                       perf_output_put(&handle, group_entry);
-               }
-       }
+       if (sample_type & PERF_SAMPLE_READ)
+               perf_output_read(&handle, counter);
  
         if (sample_type & PERF_SAMPLE_CALLCHAIN) {
                 if (callchain)
@@ -2817,8 +2968,6 @@ struct perf_read_event {
  
         u32                             pid;
         u32                             tid;
-       u64                             value;
-       u64                             format[3];
  };
  
  static void
@@ -2830,34 +2979,20 @@ perf_counter_read_event(struct perf_counter *counter,
                 .header = {
                         .type = PERF_EVENT_READ,
                         .misc = 0,
-                       .size = sizeof(event) - sizeof(event.format),
+                       .size = sizeof(event) + perf_counter_read_size(counter),
                 },
                 .pid = perf_counter_pid(counter, task),
                 .tid = perf_counter_tid(counter, task),
-               .value = atomic64_read(&counter->count),
         };
-       int ret, i = 0;
-
-       if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-               event.header.size += sizeof(u64);
-               event.format[i++] = counter->total_time_enabled;
-       }
-
-       if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-               event.header.size += sizeof(u64);
-               event.format[i++] = counter->total_time_running;
-       }
-
-       if (counter->attr.read_format & PERF_FORMAT_ID) {
-               event.header.size += sizeof(u64);
-               event.format[i++] = primary_counter_id(counter);
-       }
+       int ret;
  
         ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
         if (ret)
                 return;
  
-       perf_output_copy(&handle, &event, event.header.size);
+       perf_output_put(&handle, event);
+       perf_output_read(&handle, counter);
+
         perf_output_end(&handle);
  }
  
@@ -2893,10 +3028,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
                 return;
  
         task_event->event.pid = perf_counter_pid(counter, task);
-       task_event->event.ppid = perf_counter_pid(counter, task->real_parent);
+       task_event->event.ppid = perf_counter_pid(counter, current);
  
         task_event->event.tid = perf_counter_tid(counter, task);
-       task_event->event.ptid = perf_counter_tid(counter, task->real_parent);
+       task_event->event.ptid = perf_counter_tid(counter, current);
  
         perf_output_put(&handle, task_event->event);
         perf_output_end(&handle);
@@ -3443,40 +3578,32 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
  
  static int perf_swcounter_is_counting(struct perf_counter *counter)
  {
-       struct perf_counter_context *ctx;
-       unsigned long flags;
-       int count;
-
+       /*
+        * The counter is active, we're good!
+        */
         if (counter->state == PERF_COUNTER_STATE_ACTIVE)
                 return 1;
  
+       /*
+        * The counter is off/error, not counting.
+        */
         if (counter->state != PERF_COUNTER_STATE_INACTIVE)
                 return 0;
  
         /*
-        * If the counter is inactive, it could be just because
-        * its task is scheduled out, or because it's in a group
-        * which could not go on the PMU.  We want to count in
-        * the first case but not the second.  If the context is
-        * currently active then an inactive software counter must
-        * be the second case.  If it's not currently active then
-        * we need to know whether the counter was active when the
-        * context was last active, which we can determine by
-        * comparing counter->tstamp_stopped with ctx->time.
-        *
-        * We are within an RCU read-side critical section,
-        * which protects the existence of *ctx.
+        * The counter is inactive, if the context is active
+        * we're part of a group that didn't make it on the 'pmu',
+        * not counting.
          */
-       ctx = counter->ctx;
-       spin_lock_irqsave(&ctx->lock, flags);
-       count = 1;
-       /* Re-check state now we have the lock */
-       if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
-           counter->ctx->is_active ||
-           counter->tstamp_stopped < ctx->time)
-               count = 0;
-       spin_unlock_irqrestore(&ctx->lock, flags);
-       return count;
+       if (counter->ctx->is_active)
+               return 0;
+
+       /*
+        * We're inactive and the context is too, this means the
+        * task is scheduled out, we're counting events that happen
+        * to us, like migration events.
+        */
+       return 1;
  }
  
  static int perf_swcounter_match(struct perf_counter *counter,
@@ -3928,9 +4055,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
         atomic64_set(&hwc->period_left, hwc->sample_period);
  
         /*
-        * we currently do not support PERF_SAMPLE_GROUP on inherited counters
+        * we currently do not support PERF_FORMAT_GROUP on inherited counters
          */
-       if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
+       if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
                 goto done;
  
         switch (attr->type) {
@@ -4592,6 +4719,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
                 perf_counter_init_cpu(cpu);
                 break;
  
+       case CPU_ONLINE:
+       case CPU_ONLINE_FROZEN:
+               hw_perf_counter_setup_online(cpu);
+               break;
+
         case CPU_DOWN_PREPARE:
         case CPU_DOWN_PREPARE_FROZEN:
                 perf_counter_exit_cpu(cpu);
@@ -4616,6 +4748,8 @@ void __init perf_counter_init(void)
  {
         perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
                         (void *)(long)smp_processor_id());
+       perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
+                       (void *)(long)smp_processor_id());
         register_cpu_notifier(&perf_cpu_nb);
  }
  
diff --git a/kernel/wait.c b/kernel/wait.c

index ea7c3b4275cf362f75cee939aa7861f7673b95cc..c4bd3d825f35c8c8dd7ed4831cd5fee5fe6a6b9a 100644 (file)
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,13 +10,14 @@
  #include <linux/wait.h>
  #include <linux/hash.h>
  
-void init_waitqueue_head(wait_queue_head_t *q)
+void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
  {
         spin_lock_init(&q->lock);
+       lockdep_set_class(&q->lock, key);
         INIT_LIST_HEAD(&q->task_list);
  }
  
-EXPORT_SYMBOL(init_waitqueue_head);
+EXPORT_SYMBOL(__init_waitqueue_head);
  
  void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
  {
diff --git a/net/socket.c b/net/socket.c

index 791d71a36a93dfec5166fe05e2e0cb394cfa904b..6d47165590473daa4990bf69b0435d5c49b41302 100644 (file)
--- a/net/socket.c
+++ b/net/socket.c
@@ -736,7 +736,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
         if (more)
                 flags |= MSG_MORE;
  
-       return sock->ops->sendpage(sock, page, offset, size, flags);
+       return kernel_sendpage(sock, page, offset, size, flags);
  }
  
  static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
diff --git a/tools/perf/Makefile b/tools/perf/Makefile

index 60411e94113be47ee778f2d5f3e54b396945850e..c045b4271e57250fc7c983f810b77aeaced80525 100644 (file)
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -382,22 +382,29 @@ endif
  ifdef NO_DEMANGLE
         BASIC_CFLAGS += -DNO_DEMANGLE
  else
-
         has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y")
  
-       has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
-
-       has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
-
         ifeq ($(has_bfd),y)
                 EXTLIBS += -lbfd
-       else ifeq ($(has_bfd_iberty),y)
-               EXTLIBS += -lbfd -liberty
-       else ifeq ($(has_bfd_iberty_z),y)
-               EXTLIBS += -lbfd -liberty -lz
         else
-               msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
-               BASIC_CFLAGS += -DNO_DEMANGLE
+               has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
+               ifeq ($(has_bfd_iberty),y)
+                       EXTLIBS += -lbfd -liberty
+               else
+                       has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
+                       ifeq ($(has_bfd_iberty_z),y)
+                               EXTLIBS += -lbfd -liberty -lz
+                       else
+                               has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y")
+                               ifeq ($(has_cplus_demangle),y)
+                                       EXTLIBS += -liberty
+                                       BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
+                               else
+                                       msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
+                                       BASIC_CFLAGS += -DNO_DEMANGLE
+                               endif
+                       endif
+               endif
         endif
  endif
  
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c

index f990fa8a35c9d3312a20f3dcc1498203574c9407..d88c6961274cf2e961eec9d1b420dc0621965a6a 100644 (file)
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -10,11 +10,12 @@
  
  #include "perf.h"
  
-#include "util/parse-options.h"
  #include "util/parse-events.h"
+#include "util/cache.h"
  
  int cmd_list(int argc __used, const char **argv __used, const char *prefix __used)
  {
+       setup_pager();
         print_events();
         return 0;
  }
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c

index 0345aad8eba50c35422062f4337c51f234d66b7a..3d051b9cf25ff18790edbb1cd33a7f644ece2dc0 100644 (file)
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,7 +34,9 @@ static int                    output;
  static const char              *output_name                    = "perf.data";
  static int                     group                           = 0;
  static unsigned int            realtime_prio                   = 0;
+static int                     raw_samples                     = 0;
  static int                     system_wide                     = 0;
+static int                     profile_cpu                     = -1;
  static pid_t                   target_pid                      = -1;
  static int                     inherit                         = 1;
  static int                     force                           = 0;
@@ -203,46 +205,48 @@ static void sig_atexit(void)
         kill(getpid(), signr);
  }
  
-static void pid_synthesize_comm_event(pid_t pid, int full)
+static pid_t pid_synthesize_comm_event(pid_t pid, int full)
  {
         struct comm_event comm_ev;
         char filename[PATH_MAX];
         char bf[BUFSIZ];
-       int fd;
-       size_t size;
-       char *field, *sep;
+       FILE *fp;
+       size_t size = 0;
         DIR *tasks;
         struct dirent dirent, *next;
+       pid_t tgid = 0;
  
-       snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
+       snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
  
-       fd = open(filename, O_RDONLY);
-       if (fd < 0) {
+       fp = fopen(filename, "r");
+       if (fd == NULL) {
                 /*
                  * We raced with a task exiting - just return:
                  */
                 if (verbose)
                         fprintf(stderr, "couldn't open %s\n", filename);
-               return;
-       }
-       if (read(fd, bf, sizeof(bf)) < 0) {
-               fprintf(stderr, "couldn't read %s\n", filename);
-               exit(EXIT_FAILURE);
+               return 0;
         }
-       close(fd);
  
-       /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
         memset(&comm_ev, 0, sizeof(comm_ev));
-       field = strchr(bf, '(');
-       if (field == NULL)
-               goto out_failure;
-       sep = strchr(++field, ')');
-       if (sep == NULL)
-               goto out_failure;
-       size = sep - field;
-       memcpy(comm_ev.comm, field, size++);
-
-       comm_ev.pid = pid;
+       while (!comm_ev.comm[0] || !comm_ev.pid) {
+               if (fgets(bf, sizeof(bf), fp) == NULL)
+                       goto out_failure;
+
+               if (memcmp(bf, "Name:", 5) == 0) {
+                       char *name = bf + 5;
+                       while (*name && isspace(*name))
+                               ++name;
+                       size = strlen(name) - 1;
+                       memcpy(comm_ev.comm, name, size++);
+               } else if (memcmp(bf, "Tgid:", 5) == 0) {
+                       char *tgids = bf + 5;
+                       while (*tgids && isspace(*tgids))
+                               ++tgids;
+                       tgid = comm_ev.pid = atoi(tgids);
+               }
+       }
+
         comm_ev.header.type = PERF_EVENT_COMM;
         size = ALIGN(size, sizeof(u64));
         comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
@@ -251,7 +255,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
                 comm_ev.tid = pid;
  
                 write_output(&comm_ev, comm_ev.header.size);
-               return;
+               goto out_fclose;
         }
  
         snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -268,7 +272,10 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
                 write_output(&comm_ev, comm_ev.header.size);
         }
         closedir(tasks);
-       return;
+
+out_fclose:
+       fclose(fp);
+       return tgid;
  
  out_failure:
         fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
@@ -276,7 +283,7 @@ out_failure:
         exit(EXIT_FAILURE);
  }
  
-static void pid_synthesize_mmap_samples(pid_t pid)
+static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
  {
         char filename[PATH_MAX];
         FILE *fp;
@@ -328,7 +335,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
                         mmap_ev.len -= mmap_ev.start;
                         mmap_ev.header.size = (sizeof(mmap_ev) -
                                                (sizeof(mmap_ev.filename) - size));
-                       mmap_ev.pid = pid;
+                       mmap_ev.pid = tgid;
                         mmap_ev.tid = pid;
  
                         write_output(&mmap_ev, mmap_ev.header.size);
@@ -347,14 +354,14 @@ static void synthesize_all(void)
  
         while (!readdir_r(proc, &dirent, &next) && next) {
                 char *end;
-               pid_t pid;
+               pid_t pid, tgid;
  
                 pid = strtol(dirent.d_name, &end, 10);
                 if (*end) /* only interested in proper numerical dirents */
                         continue;
  
-               pid_synthesize_comm_event(pid, 1);
-               pid_synthesize_mmap_samples(pid);
+               tgid = pid_synthesize_comm_event(pid, 1);
+               pid_synthesize_mmap_samples(pid, tgid);
         }
  
         closedir(proc);
@@ -392,7 +399,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
                                   PERF_FORMAT_TOTAL_TIME_RUNNING |
                                   PERF_FORMAT_ID;
  
-       attr->sample_type       = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+       attr->sample_type       |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
  
         if (freq) {
                 attr->sample_type       |= PERF_SAMPLE_PERIOD;
@@ -412,6 +419,8 @@ static void create_counter(int counter, int cpu, pid_t pid)
         if (call_graph)
                 attr->sample_type       |= PERF_SAMPLE_CALLCHAIN;
  
+       if (raw_samples)
+               attr->sample_type       |= PERF_SAMPLE_RAW;
  
         attr->mmap              = track;
         attr->comm              = track;
@@ -426,6 +435,8 @@ try_again:
  
                 if (err == EPERM)
                         die("Permission error - are you root?\n");
+               else if (err ==  ENODEV && profile_cpu != -1)
+                       die("No such device - did you specify an out-of-range profile CPU?\n");
  
                 /*
                  * If it's cycles then fall back to hrtimer
@@ -559,16 +570,22 @@ static int __cmd_record(int argc, const char **argv)
                 if (pid == -1)
                         pid = getpid();
  
-               open_counters(-1, pid);
-       } else for (i = 0; i < nr_cpus; i++)
-               open_counters(i, target_pid);
+               open_counters(profile_cpu, pid);
+       } else {
+               if (profile_cpu != -1) {
+                       open_counters(profile_cpu, target_pid);
+               } else {
+                       for (i = 0; i < nr_cpus; i++)
+                               open_counters(i, target_pid);
+               }
+       }
  
         if (file_new)
                 perf_header__write(header, output);
  
         if (!system_wide) {
-               pid_synthesize_comm_event(pid, 0);
-               pid_synthesize_mmap_samples(pid);
+               pid_t tgid = pid_synthesize_comm_event(pid, 0);
+               pid_synthesize_mmap_samples(pid, tgid);
         } else
                 synthesize_all();
  
@@ -636,10 +653,14 @@ static const struct option options[] = {
                     "record events on existing pid"),
         OPT_INTEGER('r', "realtime", &realtime_prio,
                     "collect data with this RT SCHED_FIFO priority"),
+       OPT_BOOLEAN('R', "raw-samples", &raw_samples,
+                   "collect raw sample records from all opened counters"),
         OPT_BOOLEAN('a', "all-cpus", &system_wide,
                             "system-wide collection from all CPUs"),
         OPT_BOOLEAN('A', "append", &append_file,
                             "append to the output file to do incremental profiling"),
+       OPT_INTEGER('C', "profile_cpu", &profile_cpu,
+                           "CPU to profile on"),
         OPT_BOOLEAN('f', "force", &force,
                         "overwrite existing data file"),
         OPT_LONG('c', "count", &default_interval,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c

index 99274cec0adb7515e00802000445bb548778ee04..b53a60fc12de77115218a275235df61fdb61da09 100644 (file)
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1526,11 +1526,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
                 more_data += sizeof(u64);
         }
  
-       dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n",
+       dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
                 (void *)(offset + head),
                 (void *)(long)(event->header.size),
                 event->header.misc,
-               event->ip.pid,
+               event->ip.pid, event->ip.tid,
                 (void *)(long)ip,
                 (long long)period);
  
@@ -1590,10 +1590,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
         if (show & show_mask) {
                 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
  
-               if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name))
+               if (dso_list && (!dso || !dso->name ||
+                                !strlist__has_entry(dso_list, dso->name)))
                         return 0;
  
-               if (sym_list && sym && !strlist__has_entry(sym_list, sym->name))
+               if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
                         return 0;
  
                 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
@@ -1612,10 +1613,11 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
         struct thread *thread = threads__findnew(event->mmap.pid);
         struct map *map = map__new(&event->mmap);
  
-       dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+       dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
                 (void *)(offset + head),
                 (void *)(long)(event->header.size),
                 event->mmap.pid,
+               event->mmap.tid,
                 (void *)(long)event->mmap.start,
                 (void *)(long)event->mmap.len,
                 (void *)(long)event->mmap.pgoff,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c

index 4858d83b3b6746ea50756e58ffcdb90b61f8a5d4..044178408783004c66429f48f75e937d61e32e8a 100644 (file)
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -379,6 +379,7 @@ static int parse_tracepoint_event(const char **strp,
                                     struct perf_counter_attr *attr)
  {
         const char *evt_name;
+       char *flags;
         char sys_name[MAX_EVENT_LENGTH];
         char id_buf[4];
         int fd;
@@ -400,6 +401,15 @@ static int parse_tracepoint_event(const char **strp,
         strncpy(sys_name, *strp, sys_length);
         sys_name[sys_length] = '\0';
         evt_name = evt_name + 1;
+
+       flags = strchr(evt_name, ':');
+       if (flags) {
+               *flags = '\0';
+               flags++;
+               if (!strncmp(flags, "record", strlen(flags)))
+                       attr->sample_type |= PERF_SAMPLE_RAW;
+       }
+
         evt_length = strlen(evt_name);
         if (evt_length >= MAX_EVENT_LENGTH)
                 return 0;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c

index f1dcede1430753c6b61f5a9d2b696da206dc16c6..5c0f42e6b33b90ee8960ccb91845ae74da6b725e 100644 (file)
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -7,23 +7,8 @@
  #include <gelf.h>
  #include <elf.h>
  
-#ifndef NO_DEMANGLE
-#include <bfd.h>
-#else
-static inline
-char *bfd_demangle(void __used *v, const char __used *c, int __used i)
-{
-       return NULL;
-}
-#endif
-
  const char *sym_hist_filter;
  
-#ifndef DMGL_PARAMS
-#define DMGL_PARAMS      (1 << 0)       /* Include function args */
-#define DMGL_ANSI        (1 << 1)       /* Include const, volatile, etc */
-#endif
-
  enum dso_origin {
         DSO__ORIG_KERNEL = 0,
         DSO__ORIG_JAVA_JIT,
@@ -816,6 +801,8 @@ more:
         }
  out:
         free(name);
+       if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
+               return 0;
         return ret;
  }
  
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h

index 1e003ec2f4b104ad4ce7c55aee22a36bacf0cea5..b53bf0125c1bcaff4d88d2ddc9dd7c766f7dfb57 100644 (file)
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -7,6 +7,30 @@
  #include <linux/rbtree.h>
  #include "module.h"
  
+#ifdef HAVE_CPLUS_DEMANGLE
+extern char *cplus_demangle(const char *, int);
+
+static inline char *bfd_demangle(void __used *v, const char *c, int i)
+{
+       return cplus_demangle(c, i);
+}
+#else
+#ifdef NO_DEMANGLE
+static inline char *bfd_demangle(void __used *v, const char __used *c,
+                                int __used i)
+{
+       return NULL;
+}
+#else
+#include <bfd.h>
+#endif
+#endif
+
+#ifndef DMGL_PARAMS
+#define DMGL_PARAMS      (1 << 0)       /* Include function args */
+#define DMGL_ANSI        (1 << 1)       /* Include const, volatile, etc */
+#endif
+
  struct symbol {
         struct rb_node  rb_node;
         u64             start;
author	Paul Mundt <lethal@linux-sh.org>
	Sat, 15 Aug 2009 03:59:42 +0000 (12:59 +0900)
committer	Paul Mundt <lethal@linux-sh.org>
	Sat, 15 Aug 2009 03:59:42 +0000 (12:59 +0900)
Makefile		patch \| blob \| history
arch/sh/boards/mach-se/7724/setup.c		patch \| blob \| history
arch/sh/kernel/cpu/shmobile/sleep.S		patch \| blob \| history
arch/x86/Kconfig		patch \| blob \| history
arch/x86/kernel/cpu/amd.c		patch \| blob \| history
arch/x86/kernel/cpu/common.c		patch \| blob \| history
arch/x86/kernel/cpu/mcheck/therm_throt.c		patch \| blob \| history
arch/x86/kernel/cpu/perf_counter.c		patch \| blob \| history
arch/x86/kernel/reboot.c		patch \| blob \| history
drivers/clocksource/sh_cmt.c		patch \| blob \| history
drivers/md/md.c		patch \| blob \| history
drivers/md/md.h		patch \| blob \| history
drivers/md/raid5.c		patch \| blob \| history
drivers/video/sh_mobile_lcdcfb.c		patch \| blob \| history
fs/ocfs2/alloc.c		patch \| blob \| history
fs/ocfs2/aops.c		patch \| blob \| history
fs/ocfs2/dcache.c		patch \| blob \| history
fs/ocfs2/dcache.h		patch \| blob \| history
fs/ocfs2/dlm/dlmast.c		patch \| blob \| history
fs/ocfs2/dlm/dlmrecovery.c		patch \| blob \| history
fs/ocfs2/file.c		patch \| blob \| history
fs/ocfs2/journal.c		patch \| blob \| history
fs/ocfs2/journal.h		patch \| blob \| history
fs/ocfs2/ocfs2.h		patch \| blob \| history
fs/ocfs2/quota.h		patch \| blob \| history
fs/ocfs2/quota_global.c		patch \| blob \| history
fs/ocfs2/quota_local.c		patch \| blob \| history
fs/ocfs2/stack_o2cb.c		patch \| blob \| history
fs/ocfs2/super.c		patch \| blob \| history
fs/ocfs2/xattr.c		patch \| blob \| history
include/linux/perf_counter.h		patch \| blob \| history
include/linux/wait.h		patch \| blob \| history
kernel/futex.c		patch \| blob \| history
kernel/futex_compat.c		patch \| blob \| history
kernel/irq/manage.c		patch \| blob \| history
kernel/perf_counter.c		patch \| blob \| history
kernel/wait.c		patch \| blob \| history
net/socket.c		patch \| blob \| history
tools/perf/Makefile		patch \| blob \| history
tools/perf/builtin-list.c		patch \| blob \| history
tools/perf/builtin-record.c		patch \| blob \| history
tools/perf/builtin-report.c		patch \| blob \| history
tools/perf/util/parse-events.c		patch \| blob \| history
tools/perf/util/symbol.c		patch \| blob \| history
tools/perf/util/symbol.h		patch \| blob \| history