sched: re-tune balancing
authorIngo Molnar <mingo@elte.hu>
Wed, 5 Nov 2008 15:52:08 +0000 (16:52 +0100)
committerIngo Molnar <mingo@elte.hu>
Wed, 5 Nov 2008 17:04:38 +0000 (18:04 +0100)
Impact: improve wakeup affinity on NUMA systems, tweak SMP systems

Given the fixes+tweaks to the wakeup-buddy code, re-tweak the domain
balancing defaults on NUMA and SMP systems.

Turn on SD_WAKE_AFFINE which was off on x86 NUMA - there's no reason
why we would not want to have wakeup affinity across nodes as well.
(we already do this in the standard NUMA template.)

lat_ctx on a NUMA box is particularly happy about this change:

before:

 |   phoenix:~/l> ./lat_ctx -s 0 2
 |   "size=0k ovr=2.60
 |   2 5.70

after:

 |   phoenix:~/l> ./lat_ctx -s 0 2
 |   "size=0k ovr=2.65
 |   2 2.07

a 2.75x speedup.

pipe-test is similarly happy about it too:

 |  phoenix:~/sched-tests> ./pipe-test
 |   18.26 usecs/loop.
 |   14.70 usecs/loop.
 |   14.38 usecs/loop.
 |   10.55 usecs/loop.              # +WAKE_AFFINE on domain0+domain1
 |   8.63 usecs/loop.
 |   8.59 usecs/loop.
 |   9.03 usecs/loop.
 |   8.94 usecs/loop.
 |   8.96 usecs/loop.
 |   8.63 usecs/loop.

Also:

 - disable SD_BALANCE_NEWIDLE on NUMA and SMP domains (keep it for siblings)
 - enable SD_WAKE_BALANCE on SMP domains

Sysbench+postgresql improves all around the board, quite significantly:

           .28-rc3-11474e2c  .28-rc3-11474e2c-tune
-------------------------------------------------
    1:             571              688    +17.08%
    2:            1236             1206    -2.55%
    4:            2381             2642    +9.89%
    8:            4958             5164    +3.99%
   16:            9580             9574    -0.07%
   32:            7128             8118    +12.20%
   64:            7342             8266    +11.18%
  128:            7342             8064    +8.95%
  256:            7519             7884    +4.62%
  512:            7350             7731    +4.93%
-------------------------------------------------
  SUM:           55412            59341    +6.62%

So it's a win both for the runup portion, the peak area and the tail.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/include/asm/topology.h
include/linux/topology.h

index 90ac7718469a5b416bfd90350b24ade716698925..4850e4b02b61b255c14db559feb136e80a2f32c9 100644 (file)
@@ -154,7 +154,7 @@ extern unsigned long node_remap_size[];
 
 #endif
 
-/* sched_domains SD_NODE_INIT for NUMAQ machines */
+/* sched_domains SD_NODE_INIT for NUMA machines */
 #define SD_NODE_INIT (struct sched_domain) {           \
        .min_interval           = 8,                    \
        .max_interval           = 32,                   \
@@ -169,8 +169,9 @@ extern unsigned long node_remap_size[];
        .flags                  = SD_LOAD_BALANCE       \
                                | SD_BALANCE_EXEC       \
                                | SD_BALANCE_FORK       \
-                               | SD_SERIALIZE          \
-                               | SD_WAKE_BALANCE,      \
+                               | SD_WAKE_AFFINE        \
+                               | SD_WAKE_BALANCE       \
+                               | SD_SERIALIZE,         \
        .last_balance           = jiffies,              \
        .balance_interval       = 1,                    \
 }
index 2158fc0d5a56448001fa8a0cb569b66790cdd60a..34a7ee0ebed2968e3a4833e18bcb92c720e51b32 100644 (file)
@@ -146,10 +146,10 @@ void arch_update_cpu_topology(void);
        .wake_idx               = 1,                    \
        .forkexec_idx           = 1,                    \
        .flags                  = SD_LOAD_BALANCE       \
-                               | SD_BALANCE_NEWIDLE    \
-                               | SD_BALANCE_FORK       \
                                | SD_BALANCE_EXEC       \
+                               | SD_BALANCE_FORK       \
                                | SD_WAKE_AFFINE        \
+                               | SD_WAKE_BALANCE       \
                                | BALANCE_FOR_PKG_POWER,\
        .last_balance           = jiffies,              \
        .balance_interval       = 1,                    \