kernel/irq/affinity.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2016 Thomas Gleixner.
   4  * Copyright (C) 2016-2017 Christoph Hellwig.
   5  */
   6 #include <linux/interrupt.h>
   7 #include <linux/kernel.h>
   8 #include <linux/slab.h>
   9 #include <linux/cpu.h>
  10
  11 static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
  12                                 unsigned int cpus_per_vec)
  13 {
  14         const struct cpumask *siblmsk;
  15         int cpu, sibl;
  16
  17         for ( ; cpus_per_vec > 0; ) {
  18                 cpu = cpumask_first(nmsk);
  19
  20                 /* Should not happen, but I'm too lazy to think about it */
  21                 if (cpu >= nr_cpu_ids)
  22                         return;
  23
  24                 cpumask_clear_cpu(cpu, nmsk);
  25                 cpumask_set_cpu(cpu, irqmsk);
  26                 cpus_per_vec--;
  27
  28                 /* If the cpu has siblings, use them first */
  29                 siblmsk = topology_sibling_cpumask(cpu);
  30                 for (sibl = -1; cpus_per_vec > 0; ) {
  31                         sibl = cpumask_next(sibl, siblmsk);
  32                         if (sibl >= nr_cpu_ids)
  33                                 break;
  34                         if (!cpumask_test_and_clear_cpu(sibl, nmsk))
  35                                 continue;
  36                         cpumask_set_cpu(sibl, irqmsk);
  37                         cpus_per_vec--;
  38                 }
  39         }
  40 }
  41
  42 static cpumask_var_t *alloc_node_to_cpumask(void)
  43 {
  44         cpumask_var_t *masks;
  45         int node;
  46
  47         masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
  48         if (!masks)
  49                 return NULL;
  50
  51         for (node = 0; node < nr_node_ids; node++) {
  52                 if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
  53                         goto out_unwind;
  54         }
  55
  56         return masks;
  57
  58 out_unwind:
  59         while (--node >= 0)
  60                 free_cpumask_var(masks[node]);
  61         kfree(masks);
  62         return NULL;
  63 }
  64
  65 static void free_node_to_cpumask(cpumask_var_t *masks)
  66 {
  67         int node;
  68
  69         for (node = 0; node < nr_node_ids; node++)
  70                 free_cpumask_var(masks[node]);
  71         kfree(masks);
  72 }
  73
  74 static void build_node_to_cpumask(cpumask_var_t *masks)
  75 {
  76         int cpu;
  77
  78         for_each_possible_cpu(cpu)
  79                 cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
  80 }
  81
  82 static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
  83                                 const struct cpumask *mask, nodemask_t *nodemsk)
  84 {
  85         int n, nodes = 0;
  86
  87         /* Calculate the number of nodes in the supplied affinity mask */
  88         for_each_node(n) {
  89                 if (cpumask_intersects(mask, node_to_cpumask[n])) {
  90                         node_set(n, *nodemsk);
  91                         nodes++;
  92                 }
  93         }
  94         return nodes;
  95 }
  96
  97 static int __irq_build_affinity_masks(const struct irq_affinity *affd,
  98                                       unsigned int startvec,
  99                                       unsigned int numvecs,
 100                                       unsigned int firstvec,
 101                                       cpumask_var_t *node_to_cpumask,
 102                                       const struct cpumask *cpu_mask,
 103                                       struct cpumask *nmsk,
 104                                       struct irq_affinity_desc *masks)
 105 {
 106         unsigned int n, nodes, cpus_per_vec, extra_vecs, done = 0;
 107         unsigned int last_affv = firstvec + numvecs;
 108         unsigned int curvec = startvec;
 109         nodemask_t nodemsk = NODE_MASK_NONE;
 110
 111         if (!cpumask_weight(cpu_mask))
 112                 return 0;
 113
 114         nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
 115
 116         /*
 117          * If the number of nodes in the mask is greater than or equal the
 118          * number of vectors we just spread the vectors across the nodes.
 119          */
 120         if (numvecs <= nodes) {
 121                 for_each_node_mask(n, nodemsk) {
 122                         cpumask_or(&masks[curvec].mask, &masks[curvec].mask,
 123                                    node_to_cpumask[n]);
 124                         if (++curvec == last_affv)
 125                                 curvec = firstvec;
 126                 }
 127                 return numvecs;
 128         }
 129
 130         for_each_node_mask(n, nodemsk) {
 131                 unsigned int ncpus, v, vecs_to_assign, vecs_per_node;
 132
 133                 /* Spread the vectors per node */
 134                 vecs_per_node = (numvecs - (curvec - firstvec)) / nodes;
 135
 136                 /* Get the cpus on this node which are in the mask */
 137                 cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
 138
 139                 /* Calculate the number of cpus per vector */
 140                 ncpus = cpumask_weight(nmsk);
 141                 vecs_to_assign = min(vecs_per_node, ncpus);
 142
 143                 /* Account for rounding errors */
 144                 extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);
 145
 146                 for (v = 0; curvec < last_affv && v < vecs_to_assign;
 147                      curvec++, v++) {
 148                         cpus_per_vec = ncpus / vecs_to_assign;
 149
 150                         /* Account for extra vectors to compensate rounding errors */
 151                         if (extra_vecs) {
 152                                 cpus_per_vec++;
 153                                 --extra_vecs;
 154                         }
 155                         irq_spread_init_one(&masks[curvec].mask, nmsk,
 156                                                 cpus_per_vec);
 157                 }
 158
 159                 done += v;
 160                 if (done >= numvecs)
 161                         break;
 162                 if (curvec >= last_affv)
 163                         curvec = firstvec;
 164                 --nodes;
 165         }
 166         return done;
 167 }
 168
 169 /*
 170  * build affinity in two stages:
 171  *      1) spread present CPU on these vectors
 172  *      2) spread other possible CPUs on these vectors
 173  */
 174 static int irq_build_affinity_masks(const struct irq_affinity *affd,
 175                                     unsigned int startvec, unsigned int numvecs,
 176                                     unsigned int firstvec,
 177                                     struct irq_affinity_desc *masks)
 178 {
 179         unsigned int curvec = startvec, nr_present, nr_others;
 180         cpumask_var_t *node_to_cpumask;
 181         cpumask_var_t nmsk, npresmsk;
 182         int ret = -ENOMEM;
 183
 184         if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
 185                 return ret;
 186
 187         if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
 188                 goto fail_nmsk;
 189
 190         node_to_cpumask = alloc_node_to_cpumask();
 191         if (!node_to_cpumask)
 192                 goto fail_npresmsk;
 193
 194         ret = 0;
 195         /* Stabilize the cpumasks */
 196         get_online_cpus();
 197         build_node_to_cpumask(node_to_cpumask);
 198
 199         /* Spread on present CPUs starting from affd->pre_vectors */
 200         nr_present = __irq_build_affinity_masks(affd, curvec, numvecs,
 201                                                 firstvec, node_to_cpumask,
 202                                                 cpu_present_mask, nmsk, masks);
 203
 204         /*
 205          * Spread on non present CPUs starting from the next vector to be
 206          * handled. If the spreading of present CPUs already exhausted the
 207          * vector space, assign the non present CPUs to the already spread
 208          * out vectors.
 209          */
 210         if (nr_present >= numvecs)
 211                 curvec = firstvec;
 212         else
 213                 curvec = firstvec + nr_present;
 214         cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
 215         nr_others = __irq_build_affinity_masks(affd, curvec, numvecs,
 216                                                firstvec, node_to_cpumask,
 217                                                npresmsk, nmsk, masks);
 218         put_online_cpus();
 219
 220         if (nr_present < numvecs)
 221                 WARN_ON(nr_present + nr_others < numvecs);
 222
 223         free_node_to_cpumask(node_to_cpumask);
 224
 225  fail_npresmsk:
 226         free_cpumask_var(npresmsk);
 227
 228  fail_nmsk:
 229         free_cpumask_var(nmsk);
 230         return ret;
 231 }
 232
 233 static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
 234 {
 235         affd->nr_sets = 1;
 236         affd->set_size[0] = affvecs;
 237 }
 238
 239 /**
 240  * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
 241  * @nvecs:      The total number of vectors
 242  * @affd:       Description of the affinity requirements
 243  *
 244  * Returns the irq_affinity_desc pointer or NULL if allocation failed.
 245  */
 246 struct irq_affinity_desc *
 247 irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
 248 {
 249         unsigned int affvecs, curvec, usedvecs, i;
 250         struct irq_affinity_desc *masks = NULL;
 251
 252         /*
 253          * Determine the number of vectors which need interrupt affinities
 254          * assigned. If the pre/post request exhausts the available vectors
 255          * then nothing to do here except for invoking the calc_sets()
 256          * callback so the device driver can adjust to the situation. If there
 257          * is only a single vector, then managing the queue is pointless as
 258          * well.
 259          */
 260         if (nvecs > 1 && nvecs > affd->pre_vectors + affd->post_vectors)
 261                 affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
 262         else
 263                 affvecs = 0;
 264
 265         /*
 266          * Simple invocations do not provide a calc_sets() callback. Install
 267          * the generic one.
 268          */
 269         if (!affd->calc_sets)
 270                 affd->calc_sets = default_calc_sets;
 271
 272         /* Recalculate the sets */
 273         affd->calc_sets(affd, affvecs);
 274
 275         if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS))
 276                 return NULL;
 277
 278         /* Nothing to assign? */
 279         if (!affvecs)
 280                 return NULL;
 281
 282         masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
 283         if (!masks)
 284                 return NULL;
 285
 286         /* Fill out vectors at the beginning that don't need affinity */
 287         for (curvec = 0; curvec < affd->pre_vectors; curvec++)
 288                 cpumask_copy(&masks[curvec].mask, irq_default_affinity);
 289
 290         /*
 291          * Spread on present CPUs starting from affd->pre_vectors. If we
 292          * have multiple sets, build each sets affinity mask separately.
 293          */
 294         for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
 295                 unsigned int this_vecs = affd->set_size[i];
 296                 int ret;
 297
 298                 ret = irq_build_affinity_masks(affd, curvec, this_vecs,
 299                                                curvec, masks);
 300                 if (ret) {
 301                         kfree(masks);
 302                         return NULL;
 303                 }
 304                 curvec += this_vecs;
 305                 usedvecs += this_vecs;
 306         }
 307
 308         /* Fill out vectors at the end that don't need affinity */
 309         if (usedvecs >= affvecs)
 310                 curvec = affd->pre_vectors + affvecs;
 311         else
 312                 curvec = affd->pre_vectors + usedvecs;
 313         for (; curvec < nvecs; curvec++)
 314                 cpumask_copy(&masks[curvec].mask, irq_default_affinity);
 315
 316         /* Mark the managed interrupts */
 317         for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++)
 318                 masks[i].is_managed = 1;
 319
 320         return masks;
 321 }
 322
 323 /**
 324  * irq_calc_affinity_vectors - Calculate the optimal number of vectors
 325  * @minvec:     The minimum number of vectors available
 326  * @maxvec:     The maximum number of vectors available
 327  * @affd:       Description of the affinity requirements
 328  */
 329 unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
 330                                        const struct irq_affinity *affd)
 331 {
 332         unsigned int resv = affd->pre_vectors + affd->post_vectors;
 333         unsigned int set_vecs;
 334
 335         if (resv > minvec)
 336                 return 0;
 337
 338         if (affd->calc_sets) {
 339                 set_vecs = maxvec - resv;
 340         } else {
 341                 get_online_cpus();
 342                 set_vecs = cpumask_weight(cpu_possible_mask);
 343                 put_online_cpus();
 344         }
 345
 346         return resv + min(set_vecs, maxvec - resv);
 347 }