4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
19 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
21 * Copyright (c) 2012, 2015 Intel Corporation.
24 * This file is part of Lustre, http://www.lustre.org/
25 * Lustre is a trademark of Sun Microsystems, Inc.
27 * Author: liang@whamcloud.com
30 #define DEBUG_SUBSYSTEM S_LNET
32 #include <linux/cpu.h>
33 #include <linux/sched.h>
34 #include "../../../include/linux/libcfs/libcfs.h"
39 * modparam for setting number of partitions
41 * 0 : estimate best value based on cores or NUMA nodes
42 * 1 : disable multiple partitions
43 * >1 : specify number of partitions
45 static int cpu_npartitions;
46 module_param(cpu_npartitions, int, 0444);
47 MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
50 * modparam for setting CPU partitions patterns:
52 * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
53 * number in bracket is processor ID (core or HT)
55 * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
56 * are NUMA node ID, number before bracket is CPU partition ID.
58 * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology
60 * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
62 static char *cpu_pattern = "";
63 module_param(cpu_pattern, charp, 0444);
64 MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
67 /* serialize hotplug etc */
69 /* reserved for hotplug */
70 unsigned long cpt_version;
71 /* mutex to protect cpt_cpumask */
72 struct mutex cpt_mutex;
73 /* scratch buffer for set/unset_node */
74 cpumask_t *cpt_cpumask;
77 static struct cfs_cpt_data cpt_data;
80 cfs_node_to_cpumask(int node, cpumask_t *mask)
82 const cpumask_t *tmp = cpumask_of_node(node);
85 cpumask_copy(mask, tmp);
91 cfs_cpt_table_free(struct cfs_cpt_table *cptab)
95 if (cptab->ctb_cpu2cpt) {
96 LIBCFS_FREE(cptab->ctb_cpu2cpt,
98 sizeof(cptab->ctb_cpu2cpt[0]));
101 for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
102 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
104 if (part->cpt_nodemask) {
105 LIBCFS_FREE(part->cpt_nodemask,
106 sizeof(*part->cpt_nodemask));
109 if (part->cpt_cpumask)
110 LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
113 if (cptab->ctb_parts) {
114 LIBCFS_FREE(cptab->ctb_parts,
115 cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
118 if (cptab->ctb_nodemask)
119 LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
120 if (cptab->ctb_cpumask)
121 LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
123 LIBCFS_FREE(cptab, sizeof(*cptab));
125 EXPORT_SYMBOL(cfs_cpt_table_free);
127 struct cfs_cpt_table *
128 cfs_cpt_table_alloc(unsigned int ncpt)
130 struct cfs_cpt_table *cptab;
133 LIBCFS_ALLOC(cptab, sizeof(*cptab));
137 cptab->ctb_nparts = ncpt;
139 LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size());
140 LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
142 if (!cptab->ctb_cpumask || !cptab->ctb_nodemask)
145 LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
146 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
147 if (!cptab->ctb_cpu2cpt)
150 memset(cptab->ctb_cpu2cpt, -1,
151 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
153 LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
154 if (!cptab->ctb_parts)
157 for (i = 0; i < ncpt; i++) {
158 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
160 LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
161 LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
162 if (!part->cpt_cpumask || !part->cpt_nodemask)
166 spin_lock(&cpt_data.cpt_lock);
167 /* Reserved for hotplug */
168 cptab->ctb_version = cpt_data.cpt_version;
169 spin_unlock(&cpt_data.cpt_lock);
174 cfs_cpt_table_free(cptab);
177 EXPORT_SYMBOL(cfs_cpt_table_alloc);
180 cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
187 for (i = 0; i < cptab->ctb_nparts; i++) {
189 rc = snprintf(tmp, len, "%d\t: ", i);
199 for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
200 rc = snprintf(tmp, len, "%d ", j);
220 EXPORT_SYMBOL(cfs_cpt_table_print);
223 cfs_cpt_number(struct cfs_cpt_table *cptab)
225 return cptab->ctb_nparts;
227 EXPORT_SYMBOL(cfs_cpt_number);
230 cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
232 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
234 return cpt == CFS_CPT_ANY ?
235 cpumask_weight(cptab->ctb_cpumask) :
236 cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
238 EXPORT_SYMBOL(cfs_cpt_weight);
241 cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
243 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
245 return cpt == CFS_CPT_ANY ?
246 cpumask_any_and(cptab->ctb_cpumask,
247 cpu_online_mask) < nr_cpu_ids :
248 cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
249 cpu_online_mask) < nr_cpu_ids;
251 EXPORT_SYMBOL(cfs_cpt_online);
254 cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
256 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
258 return cpt == CFS_CPT_ANY ?
259 cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask;
261 EXPORT_SYMBOL(cfs_cpt_cpumask);
264 cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
266 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
268 return cpt == CFS_CPT_ANY ?
269 cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
271 EXPORT_SYMBOL(cfs_cpt_nodemask);
274 cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
278 LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
280 if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
281 CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
285 if (cptab->ctb_cpu2cpt[cpu] != -1) {
286 CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
287 cpu, cptab->ctb_cpu2cpt[cpu]);
291 cptab->ctb_cpu2cpt[cpu] = cpt;
293 LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
294 LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
296 cpumask_set_cpu(cpu, cptab->ctb_cpumask);
297 cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
299 node = cpu_to_node(cpu);
301 /* first CPU of @node in this CPT table */
302 if (!node_isset(node, *cptab->ctb_nodemask))
303 node_set(node, *cptab->ctb_nodemask);
305 /* first CPU of @node in this partition */
306 if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
307 node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
311 EXPORT_SYMBOL(cfs_cpt_set_cpu);
314 cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
319 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
321 if (cpu < 0 || cpu >= nr_cpu_ids) {
322 CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
326 if (cpt == CFS_CPT_ANY) {
327 /* caller doesn't know the partition ID */
328 cpt = cptab->ctb_cpu2cpt[cpu];
329 if (cpt < 0) { /* not set in this CPT-table */
330 CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
335 } else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
337 "CPU %d is not in cpu-partition %d\n", cpu, cpt);
341 LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
342 LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
344 cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
345 cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
346 cptab->ctb_cpu2cpt[cpu] = -1;
348 node = cpu_to_node(cpu);
350 LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
351 LASSERT(node_isset(node, *cptab->ctb_nodemask));
353 for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
354 /* this CPT has other CPU belonging to this node? */
355 if (cpu_to_node(i) == node)
360 node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
362 for_each_cpu(i, cptab->ctb_cpumask) {
363 /* this CPT-table has other CPU belonging to this node? */
364 if (cpu_to_node(i) == node)
369 node_clear(node, *cptab->ctb_nodemask);
371 EXPORT_SYMBOL(cfs_cpt_unset_cpu);
374 cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
378 if (!cpumask_weight(mask) ||
379 cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
380 CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
385 for_each_cpu(i, mask) {
386 if (!cfs_cpt_set_cpu(cptab, cpt, i))
392 EXPORT_SYMBOL(cfs_cpt_set_cpumask);
395 cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
399 for_each_cpu(i, mask)
400 cfs_cpt_unset_cpu(cptab, cpt, i);
402 EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
405 cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
410 if (node < 0 || node >= MAX_NUMNODES) {
412 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
416 mutex_lock(&cpt_data.cpt_mutex);
418 mask = cpt_data.cpt_cpumask;
419 cfs_node_to_cpumask(node, mask);
421 rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
423 mutex_unlock(&cpt_data.cpt_mutex);
427 EXPORT_SYMBOL(cfs_cpt_set_node);
430 cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
434 if (node < 0 || node >= MAX_NUMNODES) {
436 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
440 mutex_lock(&cpt_data.cpt_mutex);
442 mask = cpt_data.cpt_cpumask;
443 cfs_node_to_cpumask(node, mask);
445 cfs_cpt_unset_cpumask(cptab, cpt, mask);
447 mutex_unlock(&cpt_data.cpt_mutex);
449 EXPORT_SYMBOL(cfs_cpt_unset_node);
452 cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
456 for_each_node_mask(i, *mask) {
457 if (!cfs_cpt_set_node(cptab, cpt, i))
463 EXPORT_SYMBOL(cfs_cpt_set_nodemask);
466 cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
470 for_each_node_mask(i, *mask)
471 cfs_cpt_unset_node(cptab, cpt, i);
473 EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
476 cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
481 if (cpt == CFS_CPT_ANY) {
482 last = cptab->ctb_nparts - 1;
488 for (; cpt <= last; cpt++) {
489 for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
490 cfs_cpt_unset_cpu(cptab, cpt, i);
493 EXPORT_SYMBOL(cfs_cpt_clear);
496 cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
503 /* convert CPU partition ID to HW node id */
505 if (cpt < 0 || cpt >= cptab->ctb_nparts) {
506 mask = cptab->ctb_nodemask;
507 rotor = cptab->ctb_spread_rotor++;
509 mask = cptab->ctb_parts[cpt].cpt_nodemask;
510 rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
513 weight = nodes_weight(*mask);
518 for_each_node_mask(node, *mask) {
526 EXPORT_SYMBOL(cfs_cpt_spread_node);
529 cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
531 int cpu = smp_processor_id();
532 int cpt = cptab->ctb_cpu2cpt[cpu];
538 /* don't return negative value for safety of upper layer,
539 * instead we shadow the unknown cpu to a valid partition ID
541 cpt = cpu % cptab->ctb_nparts;
546 EXPORT_SYMBOL(cfs_cpt_current);
549 cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
551 LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
553 return cptab->ctb_cpu2cpt[cpu];
555 EXPORT_SYMBOL(cfs_cpt_of_cpu);
558 cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
561 nodemask_t *nodemask;
565 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
567 if (cpt == CFS_CPT_ANY) {
568 cpumask = cptab->ctb_cpumask;
569 nodemask = cptab->ctb_nodemask;
571 cpumask = cptab->ctb_parts[cpt].cpt_cpumask;
572 nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
575 if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) {
576 CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
581 for_each_online_cpu(i) {
582 if (cpumask_test_cpu(i, cpumask))
585 rc = set_cpus_allowed_ptr(current, cpumask);
586 set_mems_allowed(*nodemask);
588 schedule(); /* switch to allowed CPU */
593 /* don't need to set affinity because all online CPUs are covered */
596 EXPORT_SYMBOL(cfs_cpt_bind);
599 * Choose max to \a number CPUs from \a node and set them in \a cpt.
600 * We always prefer to choose CPU in the same core/socket.
603 cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
604 cpumask_t *node, int number)
606 cpumask_t *socket = NULL;
607 cpumask_t *core = NULL;
613 if (number >= cpumask_weight(node)) {
614 while (!cpumask_empty(node)) {
615 cpu = cpumask_first(node);
617 rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
620 cpumask_clear_cpu(cpu, node);
625 /* allocate scratch buffer */
626 LIBCFS_ALLOC(socket, cpumask_size());
627 LIBCFS_ALLOC(core, cpumask_size());
628 if (!socket || !core) {
633 while (!cpumask_empty(node)) {
634 cpu = cpumask_first(node);
636 /* get cpumask for cores in the same socket */
637 cpumask_copy(socket, topology_core_cpumask(cpu));
638 cpumask_and(socket, socket, node);
640 LASSERT(!cpumask_empty(socket));
642 while (!cpumask_empty(socket)) {
645 /* get cpumask for hts in the same core */
646 cpumask_copy(core, topology_sibling_cpumask(cpu));
647 cpumask_and(core, core, node);
649 LASSERT(!cpumask_empty(core));
651 for_each_cpu(i, core) {
652 cpumask_clear_cpu(i, socket);
653 cpumask_clear_cpu(i, node);
655 rc = cfs_cpt_set_cpu(cptab, cpt, i);
664 cpu = cpumask_first(socket);
670 LIBCFS_FREE(socket, cpumask_size());
672 LIBCFS_FREE(core, cpumask_size());
676 #define CPT_WEIGHT_MIN 4u
679 cfs_cpt_num_estimate(void)
681 unsigned int nnode = num_online_nodes();
682 unsigned int ncpu = num_online_cpus();
685 if (ncpu <= CPT_WEIGHT_MIN) {
690 /* generate reasonable number of CPU partitions based on total number
691 * of CPUs, Preferred N should be power2 and match this condition:
692 * 2 * (N - 1)^2 < NCPUS <= 2 * N^2
694 for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
697 if (ncpt <= nnode) { /* fat numa system */
701 } else { /* ncpt > nnode */
702 while ((nnode << 1) <= ncpt)
709 #if (BITS_PER_LONG == 32)
710 /* config many CPU partitions on 32-bit system could consume
713 ncpt = min(2U, ncpt);
716 ncpt--; /* worst case is 1 */
721 static struct cfs_cpt_table *
722 cfs_cpt_table_create(int ncpt)
724 struct cfs_cpt_table *cptab = NULL;
725 cpumask_t *mask = NULL;
731 rc = cfs_cpt_num_estimate();
735 if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
736 CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
740 if (num_online_cpus() % ncpt) {
741 CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
742 (int)num_online_cpus(), ncpt);
746 cptab = cfs_cpt_table_alloc(ncpt);
748 CERROR("Failed to allocate CPU map(%d)\n", ncpt);
752 num = num_online_cpus() / ncpt;
754 CERROR("CPU changed while setting CPU partition\n");
758 LIBCFS_ALLOC(mask, cpumask_size());
760 CERROR("Failed to allocate scratch cpumask\n");
764 for_each_online_node(i) {
765 cfs_node_to_cpumask(i, mask);
767 while (!cpumask_empty(mask)) {
768 struct cfs_cpu_partition *part;
772 * Each emulated NUMA node has all allowed CPUs in
774 * End loop when all partitions have assigned CPUs.
779 part = &cptab->ctb_parts[cpt];
781 n = num - cpumask_weight(part->cpt_cpumask);
784 rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
788 LASSERT(num >= cpumask_weight(part->cpt_cpumask));
789 if (num == cpumask_weight(part->cpt_cpumask))
795 num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
796 CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
797 cptab->ctb_nparts, num, cpt,
798 cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
802 LIBCFS_FREE(mask, cpumask_size());
807 CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
808 ncpt, num_online_nodes(), num_online_cpus());
811 LIBCFS_FREE(mask, cpumask_size());
814 cfs_cpt_table_free(cptab);
819 static struct cfs_cpt_table *
820 cfs_cpt_table_create_pattern(char *pattern)
822 struct cfs_cpt_table *cptab;
832 str = cfs_trimwhite(pattern);
833 if (*str == 'n' || *str == 'N') {
835 if (*pattern != '\0') {
837 } else { /* shortcut to create CPT from NUMA & CPU topology */
839 ncpt = num_online_nodes();
843 if (!ncpt) { /* scanning bracket which is mark of partition */
844 for (str = pattern;; str++, ncpt++) {
845 str = strchr(str, '[');
852 (node && ncpt > num_online_nodes()) ||
853 (!node && ncpt > num_online_cpus())) {
854 CERROR("Invalid pattern %s, or too many partitions %d\n",
859 cptab = cfs_cpt_table_alloc(ncpt);
861 CERROR("Failed to allocate cpu partition table\n");
865 if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */
868 for_each_online_node(i) {
870 CERROR("CPU changed while setting CPU partition table, %d/%d\n",
875 rc = cfs_cpt_set_node(cptab, cpt++, i);
882 high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
884 for (str = cfs_trimwhite(pattern), c = 0;; c++) {
885 struct cfs_range_expr *range;
886 struct cfs_expr_list *el;
887 char *bracket = strchr(str, '[');
892 CERROR("Invalid pattern %s\n", str);
896 CERROR("expect %d partitions but found %d\n",
903 if (sscanf(str, "%d%n", &cpt, &n) < 1) {
904 CERROR("Invalid cpu pattern %s\n", str);
908 if (cpt < 0 || cpt >= ncpt) {
909 CERROR("Invalid partition id %d, total partitions %d\n",
914 if (cfs_cpt_weight(cptab, cpt)) {
915 CERROR("Partition %d has already been set.\n", cpt);
919 str = cfs_trimwhite(str + n);
920 if (str != bracket) {
921 CERROR("Invalid pattern %s\n", str);
925 bracket = strchr(str, ']');
927 CERROR("missing right bracket for cpt %d, %s\n",
932 if (cfs_expr_list_parse(str, (bracket - str) + 1,
934 CERROR("Can't parse number range: %s\n", str);
938 list_for_each_entry(range, &el->el_exprs, re_link) {
939 for (i = range->re_lo; i <= range->re_hi; i++) {
940 if ((i - range->re_lo) % range->re_stride)
943 rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
944 cfs_cpt_set_cpu(cptab, cpt, i);
946 cfs_expr_list_free(el);
952 cfs_expr_list_free(el);
954 if (!cfs_cpt_online(cptab, cpt)) {
955 CERROR("No online CPU is found on partition %d\n", cpt);
959 str = cfs_trimwhite(bracket + 1);
965 cfs_cpt_table_free(cptab);
969 #ifdef CONFIG_HOTPLUG_CPU
970 static enum cpuhp_state lustre_cpu_online;
972 static void cfs_cpu_incr_cpt_version(void)
974 spin_lock(&cpt_data.cpt_lock);
975 cpt_data.cpt_version++;
976 spin_unlock(&cpt_data.cpt_lock);
979 static int cfs_cpu_online(unsigned int cpu)
981 cfs_cpu_incr_cpt_version();
985 static int cfs_cpu_dead(unsigned int cpu)
989 cfs_cpu_incr_cpt_version();
991 mutex_lock(&cpt_data.cpt_mutex);
992 /* if all HTs in a core are offline, it may break affinity */
993 cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu));
994 warn = cpumask_any_and(cpt_data.cpt_cpumask,
995 cpu_online_mask) >= nr_cpu_ids;
996 mutex_unlock(&cpt_data.cpt_mutex);
997 CDEBUG(warn ? D_WARNING : D_INFO,
998 "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n",
1008 cfs_cpt_table_free(cfs_cpt_table);
1010 #ifdef CONFIG_HOTPLUG_CPU
1011 if (lustre_cpu_online > 0)
1012 cpuhp_remove_state_nocalls(lustre_cpu_online);
1013 cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD);
1015 if (cpt_data.cpt_cpumask)
1016 LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size());
1024 LASSERT(!cfs_cpt_table);
1026 memset(&cpt_data, 0, sizeof(cpt_data));
1028 LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size());
1029 if (!cpt_data.cpt_cpumask) {
1030 CERROR("Failed to allocate scratch buffer\n");
1034 spin_lock_init(&cpt_data.cpt_lock);
1035 mutex_init(&cpt_data.cpt_mutex);
1037 #ifdef CONFIG_HOTPLUG_CPU
1038 ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD,
1039 "staging/lustre/cfe:dead", NULL,
1043 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
1044 "staging/lustre/cfe:online",
1045 cfs_cpu_online, NULL);
1048 lustre_cpu_online = ret;
1053 cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
1054 if (!cfs_cpt_table) {
1055 CERROR("Failed to create cptab from pattern %s\n",
1061 cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
1062 if (!cfs_cpt_table) {
1063 CERROR("Failed to create ptable with npartitions %d\n",
1069 spin_lock(&cpt_data.cpt_lock);
1070 if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
1071 spin_unlock(&cpt_data.cpt_lock);
1072 CERROR("CPU hotplug/unplug during setup\n");
1075 spin_unlock(&cpt_data.cpt_lock);
1077 LCONSOLE(0, "HW CPU cores: %d, npartitions: %d\n",
1078 num_online_cpus(), cfs_cpt_number(cfs_cpt_table));