sched/numa: Do statistics calculation using local variables only
authorRik van Riel <riel@redhat.com>
Mon, 27 Jan 2014 22:03:46 +0000 (17:03 -0500)
committerIngo Molnar <mingo@kernel.org>
Tue, 28 Jan 2014 14:03:17 +0000 (15:03 +0100)
The current code in task_numa_placement calculates the difference
between the old and the new value, but also temporarily stores half
of the old value in the per-process variables.

The NUMA balancing code looks at those per-process variables, and
having other tasks temporarily see halved statistics could lead to
unwanted numa migrations. This can be avoided by doing all the math
in local variables.

This change also simplifies the code a little.

Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Chegu Vinod <chegu_vinod@hp.com>
Link: http://lkml.kernel.org/r/1390860228-21539-8-git-send-email-riel@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/sched/fair.c

index 8fc3a823481744514b6678957689ac19fc8030ee..4c449907a10e3661e2e481a7b123ea427f91424f 100644 (file)
@@ -1513,12 +1513,9 @@ static void task_numa_placement(struct task_struct *p)
                        long diff, f_diff, f_weight;
 
                        i = task_faults_idx(nid, priv);
-                       diff = -p->numa_faults_memory[i];
-                       f_diff = -p->numa_faults_cpu[i];
 
                        /* Decay existing window, copy faults since last scan */
-                       p->numa_faults_memory[i] >>= 1;
-                       p->numa_faults_memory[i] += p->numa_faults_buffer_memory[i];
+                       diff = p->numa_faults_buffer_memory[i] - p->numa_faults_memory[i] / 2;
                        fault_types[priv] += p->numa_faults_buffer_memory[i];
                        p->numa_faults_buffer_memory[i] = 0;
 
@@ -1532,13 +1529,12 @@ static void task_numa_placement(struct task_struct *p)
                        f_weight = div64_u64(runtime << 16, period + 1);
                        f_weight = (f_weight * p->numa_faults_buffer_cpu[i]) /
                                   (total_faults + 1);
-                       p->numa_faults_cpu[i] >>= 1;
-                       p->numa_faults_cpu[i] += f_weight;
+                       f_diff = f_weight - p->numa_faults_cpu[i] / 2;
                        p->numa_faults_buffer_cpu[i] = 0;
 
+                       p->numa_faults_memory[i] += diff;
+                       p->numa_faults_cpu[i] += f_diff;
                        faults += p->numa_faults_memory[i];
-                       diff += p->numa_faults_memory[i];
-                       f_diff += p->numa_faults_cpu[i];
                        p->total_numa_faults += diff;
                        if (p->numa_group) {
                                /* safe because we can only change our own group */