2 Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
3 This file is part of GlusterFS.
5 This file is licensed to you under your choice of the GNU Lesser
6 General Public License, version 3 or any later version (LGPLv3 or
7 later), or the GNU General Public License, version 2 (GPLv2), in all
8 cases as published by the Free Software Foundation.
12 /* TODO: add NS locking */
13 #include "statedump.h"
14 #include "dht-common.h"
15 #include "dht-messages.h"
18 #define MAX(a, b) (((a) > (b))?(a):(b))
21 #define GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf) { \
23 pthread_mutex_lock (&conf->defrag->dfq_mutex); \
25 if (!strcasecmp (conf->dthrottle, "lazy")) \
26 conf->defrag->recon_thread_count = 1; \
29 MAX ((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4); \
31 if (!strcasecmp (conf->dthrottle, "normal")) \
32 conf->defrag->recon_thread_count = \
33 (throttle_count / 2); \
35 if (!strcasecmp (conf->dthrottle, "aggressive")) \
36 conf->defrag->recon_thread_count = \
39 pthread_mutex_unlock (&conf->defrag->dfq_mutex); \
43 - use volumename in xattr instead of "dht"
45 - handle all cases in self heal layout reconstruction
46 - complete linkfile selfheal
48 struct volume_options options[];
50 extern dht_methods_t dht_methods;
53 dht_layout_dump (dht_layout_t *layout, const char *prefix)
56 char key[GF_DUMP_MAX_BUF_LEN];
64 gf_proc_dump_build_key(key, prefix, "cnt");
65 gf_proc_dump_write(key, "%d", layout->cnt);
66 gf_proc_dump_build_key(key, prefix, "preset");
67 gf_proc_dump_write(key, "%d", layout->preset);
68 gf_proc_dump_build_key(key, prefix, "gen");
69 gf_proc_dump_write(key, "%d", layout->gen);
70 if (layout->type != IA_INVAL) {
71 gf_proc_dump_build_key(key, prefix, "inode type");
72 gf_proc_dump_write(key, "%d", layout->type);
75 if (!IA_ISDIR (layout->type))
78 for (i = 0; i < layout->cnt; i++) {
79 gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
80 gf_proc_dump_write(key, "%d", layout->list[i].err);
81 gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
82 gf_proc_dump_write(key, "%u", layout->list[i].start);
83 gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
84 gf_proc_dump_write(key, "%u", layout->list[i].stop);
85 if (layout->list[i].xlator) {
86 gf_proc_dump_build_key(key, prefix,
87 "list[%d].xlator.type", i);
88 gf_proc_dump_write(key, "%s",
89 layout->list[i].xlator->type);
90 gf_proc_dump_build_key(key, prefix,
91 "list[%d].xlator.name", i);
92 gf_proc_dump_write(key, "%s",
93 layout->list[i].xlator->name);
103 dht_priv_dump (xlator_t *this)
105 char key_prefix[GF_DUMP_MAX_BUF_LEN];
106 char key[GF_DUMP_MAX_BUF_LEN];
108 dht_conf_t *conf = NULL;
114 conf = this->private;
118 ret = TRY_LOCK(&conf->subvolume_lock);
123 gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
124 gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
126 gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
127 for (i = 0; i < conf->subvolume_cnt; i++) {
128 snprintf (key, sizeof (key), "subvolumes[%d]", i);
129 gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
130 conf->subvolumes[i]->name);
131 if (conf->file_layouts && conf->file_layouts[i]){
132 snprintf (key, sizeof (key), "file_layouts[%d]", i);
133 dht_layout_dump(conf->file_layouts[i], key);
135 if (conf->dir_layouts && conf->dir_layouts[i]) {
136 snprintf (key, sizeof (key), "dir_layouts[%d]", i);
137 dht_layout_dump(conf->dir_layouts[i], key);
139 if (conf->subvolume_status) {
141 snprintf (key, sizeof (key), "subvolume_status[%d]", i);
142 gf_proc_dump_write(key, "%d",
143 (int)conf->subvolume_status[i]);
148 gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
149 gf_proc_dump_write("gen", "%d", conf->gen);
150 gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
151 gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
152 gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
153 gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
154 gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
155 gf_proc_dump_write("use-readdirp", "%d", conf->use_readdirp);
157 if (conf->du_stats && conf->subvolume_status) {
158 for (i = 0; i < conf->subvolume_cnt; i++) {
159 if (!conf->subvolume_status[i])
162 snprintf (key, sizeof (key), "subvolumes[%d]", i);
163 gf_proc_dump_write (key, "%s",
164 conf->subvolumes[i]->name);
166 snprintf (key, sizeof (key),
167 "du_stats[%d].avail_percent", i);
168 gf_proc_dump_write (key, "%lf",
169 conf->du_stats[i].avail_percent);
171 snprintf (key, sizeof (key), "du_stats[%d].avail_space",
173 gf_proc_dump_write (key, "%lu",
174 conf->du_stats[i].avail_space);
176 snprintf (key, sizeof (key),
177 "du_stats[%d].avail_inodes", i);
178 gf_proc_dump_write (key, "%lf",
179 conf->du_stats[i].avail_inodes);
181 snprintf (key, sizeof (key), "du_stats[%d].log", i);
182 gf_proc_dump_write (key, "%lu",
183 conf->du_stats[i].log);
187 if (conf->last_stat_fetch.tv_sec)
188 gf_proc_dump_write("last_stat_fetch", "%s",
189 ctime(&conf->last_stat_fetch.tv_sec));
191 UNLOCK(&conf->subvolume_lock);
198 dht_inodectx_dump (xlator_t *this, inode_t *inode)
201 dht_layout_t *layout = NULL;
208 ret = dht_inode_ctx_layout_get (inode, this, &layout);
210 if ((ret != 0) || !layout)
213 gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
214 dht_layout_dump(layout, "layout");
221 dht_fini (xlator_t *this)
224 dht_conf_t *conf = NULL;
226 GF_VALIDATE_OR_GOTO ("dht", this, out);
228 conf = this->private;
229 this->private = NULL;
231 if (conf->file_layouts) {
232 for (i = 0; i < conf->subvolume_cnt; i++) {
233 GF_FREE (conf->file_layouts[i]);
235 GF_FREE (conf->file_layouts);
238 dict_destroy(conf->leaf_to_subvol);
240 GF_FREE (conf->subvolumes);
242 GF_FREE (conf->subvolume_status);
245 mem_pool_destroy (conf->lock_pool);
254 mem_acct_init (xlator_t *this)
258 GF_VALIDATE_OR_GOTO ("dht", this, out);
260 ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
263 gf_msg (this->name, GF_LOG_ERROR, 0,
265 "Memory accounting init failed");
274 dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
280 char *dup_brick = NULL;
283 if (!conf || !bricks)
286 dup_brick = gf_strdup (bricks);
287 node = strtok_r (dup_brick, ",", &tmpstr);
289 for (i = 0; i < conf->subvolume_cnt; i++) {
290 if (!strcmp (conf->subvolumes[i]->name, node)) {
291 conf->decommissioned_bricks[i] =
293 conf->decommission_subvols_cnt++;
294 gf_msg (this->name, GF_LOG_INFO, 0,
295 DHT_MSG_SUBVOL_DECOMMISSION_INFO,
296 "decommissioning subvolume %s",
297 conf->subvolumes[i]->name);
301 if (i == conf->subvolume_cnt) {
302 /* Wrong node given. */
305 node = strtok_r (NULL, ",", &tmpstr);
309 conf->decommission_in_progress = 1;
317 dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf)
325 for (i = 0; i < conf->subvolume_cnt; i++) {
326 if (conf->decommissioned_bricks[i]) {
327 conf->decommissioned_bricks[i] = NULL;
328 conf->decommission_subvols_cnt--;
338 dht_init_regex (xlator_t *this, dict_t *odict, char *name,
339 regex_t *re, gf_boolean_t *re_valid)
343 if (dict_get_str (odict, name, &temp_str) != 0) {
344 if (strcmp(name,"rsync-hash-regex")) {
347 temp_str = "^\\.(.+)\\.[^.]+$";
352 *re_valid = _gf_false;
355 if (!strcmp(temp_str,"none")) {
359 if (regcomp(re,temp_str,REG_EXTENDED) == 0) {
360 gf_msg_debug (this->name, 0,
361 "using regex %s = %s", name, temp_str);
362 *re_valid = _gf_true;
365 gf_msg (this->name, GF_LOG_WARNING, 0,
367 "compiling regex %s failed", temp_str);
372 dht_set_subvol_range(xlator_t *this)
375 dht_conf_t *conf = NULL;
377 conf = this->private;
382 conf->leaf_to_subvol = dict_new();
383 if (!conf->leaf_to_subvol)
386 ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol);
393 dht_reconfigure (xlator_t *this, dict_t *options)
395 dht_conf_t *conf = NULL;
396 char *temp_str = NULL;
397 gf_boolean_t search_unhashed;
399 int throttle_count = 0;
401 GF_VALIDATE_OR_GOTO ("dht", this, out);
402 GF_VALIDATE_OR_GOTO ("dht", options, out);
404 conf = this->private;
408 if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
409 /* If option is not "auto", other options _should_ be boolean*/
410 if (strcasecmp (temp_str, "auto")) {
411 if (!gf_string2boolean (temp_str, &search_unhashed)) {
412 gf_msg_debug(this->name, 0, "Reconfigure: "
413 "lookup-unhashed reconfigured(%s)",
415 conf->search_unhashed = search_unhashed;
417 gf_msg(this->name, GF_LOG_ERROR, 0,
418 DHT_MSG_INVALID_OPTION,
419 "Invalid option: Reconfigure: "
420 "lookup-unhashed should be boolean,"
421 " not (%s), defaulting to (%d)",
422 temp_str, conf->search_unhashed);
427 gf_msg_debug(this->name, 0, "Reconfigure:"
428 " lookup-unhashed reconfigured auto ");
429 conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
433 GF_OPTION_RECONF ("lookup-optimize", conf->lookup_optimize, options,
436 GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
437 percent_or_size, out);
438 /* option can be any one of percent or bytes */
440 if (conf->min_free_disk < 100.0)
441 conf->disk_unit = 'p';
443 GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
446 GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
447 options, uint32, out);
449 GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options,
451 GF_OPTION_RECONF ("randomize-hash-range-by-gfid",
452 conf->randomize_by_gfid,
455 GF_OPTION_RECONF ("rebal-throttle", conf->dthrottle, options,
459 GF_DECIDE_DEFRAG_THROTTLE_COUNT (throttle_count, conf);
460 gf_msg ("DHT", GF_LOG_INFO, 0,
461 DHT_MSG_REBAL_THROTTLE_INFO,
462 "conf->dthrottle: %s, "
463 "conf->defrag->recon_thread_count: %d",
464 conf->dthrottle, conf->defrag->recon_thread_count);
468 GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats,
472 if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
473 ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
477 ret = dht_decommissioned_remove (this, conf);
482 dht_init_regex (this, options, "rsync-hash-regex",
483 &conf->rsync_regex, &conf->rsync_regex_valid);
484 dht_init_regex (this, options, "extra-hash-regex",
485 &conf->extra_regex, &conf->extra_regex_valid);
487 GF_OPTION_RECONF ("weighted-rebalance", conf->do_weighting, options,
490 GF_OPTION_RECONF ("use-readdirp", conf->use_readdirp, options,
498 gf_defrag_pattern_list_fill (xlator_t *this, gf_defrag_info_t *defrag, char *data)
501 char *tmp_str = NULL;
502 char *tmp_str1 = NULL;
503 char *dup_str = NULL;
505 char *pattern_str = NULL;
506 char *pattern = NULL;
507 gf_defrag_pattern_list_t *temp_list = NULL;
508 gf_defrag_pattern_list_t *pattern_list = NULL;
510 if (!this || !defrag || !data)
513 /* Get the pattern for pattern list. "pattern:<optional-size>"
514 * eg: *avi, *pdf:10MB, *:1TB
516 pattern_str = strtok_r (data, ",", &tmp_str);
517 while (pattern_str) {
518 dup_str = gf_strdup (pattern_str);
519 pattern_list = GF_CALLOC (1, sizeof (gf_defrag_pattern_list_t),
524 pattern = strtok_r (dup_str, ":", &tmp_str1);
525 num = strtok_r (NULL, ":", &tmp_str1);
529 if (gf_string2bytesize_uint64(pattern, &pattern_list->size)
533 } else if (gf_string2bytesize_uint64 (num, &pattern_list->size) != 0) {
534 gf_msg (this->name, GF_LOG_ERROR, 0,
535 DHT_MSG_INVALID_OPTION,
536 "Invalid option. Defrag pattern:"
537 " Invalid number format \"%s\"", num);
540 memcpy (pattern_list->path_pattern, pattern, strlen (dup_str));
542 if (!defrag->defrag_pattern)
545 temp_list = defrag->defrag_pattern;
547 pattern_list->next = temp_list;
549 defrag->defrag_pattern = pattern_list;
555 pattern_str = strtok_r (NULL, ",", &tmp_str);
561 GF_FREE (pattern_list);
568 dht_init (xlator_t *this)
570 dht_conf_t *conf = NULL;
571 char *temp_str = NULL;
574 gf_defrag_info_t *defrag = NULL;
576 char *node_uuid = NULL;
577 int throttle_count = 0;
578 uint32_t commit_hash = 0;
580 GF_VALIDATE_OR_GOTO ("dht", this, err);
582 if (!this->children) {
583 gf_msg (this->name, GF_LOG_CRITICAL, 0,
584 DHT_MSG_INVALID_CONFIGURATION,
585 "Distribute needs more than one subvolume");
589 if (!this->parents) {
590 gf_msg (this->name, GF_LOG_WARNING, 0,
591 DHT_MSG_INVALID_CONFIGURATION,
592 "dangling volume. check volfile");
595 conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
600 /* We get the commit-hash to set only for rebalance process */
601 if (dict_get_uint32 (this->options,
602 "commit-hash", &commit_hash) == 0) {
603 gf_msg (this->name, GF_LOG_INFO, 0,
604 DHT_MSG_COMMIT_HASH_INFO, "%s using commit hash %u",
605 __func__, commit_hash);
606 conf->vol_commit_hash = commit_hash;
607 conf->vch_forced = _gf_true;
610 ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
613 defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
616 GF_VALIDATE_OR_GOTO (this->name, defrag, err);
618 LOCK_INIT (&defrag->lock);
620 defrag->is_exiting = 0;
622 conf->defrag = defrag;
624 ret = dict_get_str (this->options, "node-uuid", &node_uuid);
626 gf_msg (this->name, GF_LOG_ERROR, 0,
627 DHT_MSG_INVALID_CONFIGURATION,
628 "Invalid volume configuration: "
629 "node-uuid not specified");
633 if (gf_uuid_parse (node_uuid, defrag->node_uuid)) {
634 gf_msg (this->name, GF_LOG_ERROR, 0,
635 DHT_MSG_INVALID_OPTION, "Invalid option:"
636 " Cannot parse glusterd node uuid");
642 defrag->stats = _gf_false;
644 defrag->queue = NULL;
646 defrag->crawl_done = 0;
648 defrag->global_error = 0;
650 defrag->q_entry_count = 0;
652 defrag->wakeup_crawler = 0;
654 synclock_init (&defrag->link_lock, SYNC_LOCK_DEFAULT);
655 pthread_mutex_init (&defrag->dfq_mutex, 0);
656 pthread_cond_init (&defrag->parallel_migration_cond, 0);
657 pthread_cond_init (&defrag->rebalance_crawler_alarm, 0);
658 pthread_cond_init (&defrag->df_wakeup_thread, 0);
660 defrag->global_error = 0;
664 conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
665 if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
666 /* If option is not "auto", other options _should_ be boolean */
667 if (strcasecmp (temp_str, "auto")) {
668 ret = gf_string2boolean (temp_str,
669 &conf->search_unhashed);
674 conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
677 GF_OPTION_INIT ("lookup-optimize", conf->lookup_optimize, bool, err);
679 GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
682 GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
684 GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
687 GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
690 conf->dir_spread_cnt = conf->subvolume_cnt;
691 GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
694 GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
697 GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err);
700 GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err);
701 if (dict_get_str (this->options, "rebalance-filter", &temp_str)
703 if (gf_defrag_pattern_list_fill (this, defrag, temp_str)
705 gf_msg (this->name, GF_LOG_ERROR, 0,
706 DHT_MSG_INVALID_OPTION,
708 " Cannot parse rebalance-filter (%s)",
716 /* option can be any one of percent or bytes */
718 if (conf->min_free_disk < 100)
719 conf->disk_unit = 'p';
721 ret = dht_init_subvolumes (this, conf);
727 ret = dht_init_local_subvolumes (this, conf);
729 gf_msg (this->name, GF_LOG_ERROR, 0,
730 DHT_MSG_INIT_LOCAL_SUBVOL_FAILED,
731 "dht_init_local_subvolumes failed");
736 if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
737 ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
742 dht_init_regex (this, this->options, "rsync-hash-regex",
743 &conf->rsync_regex, &conf->rsync_regex_valid);
744 dht_init_regex (this, this->options, "extra-hash-regex",
745 &conf->extra_regex, &conf->extra_regex_valid);
747 ret = dht_layouts_init (this, conf);
752 LOCK_INIT (&conf->subvolume_lock);
753 LOCK_INIT (&conf->layout_lock);
757 this->local_pool = mem_pool_new (dht_local_t, 512);
758 if (!this->local_pool) {
759 gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
761 " DHT initialisation failed. "
762 "failed to create local_t's memory pool");
766 GF_OPTION_INIT ("randomize-hash-range-by-gfid",
767 conf->randomize_by_gfid, bool, err);
770 GF_OPTION_INIT ("rebal-throttle",
771 conf->dthrottle, str, err);
773 GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf);
775 gf_msg_debug ("DHT", 0, "conf->dthrottle: %s, "
776 "conf->defrag->recon_thread_count: %d",
778 conf->defrag->recon_thread_count);
781 GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err);
782 gf_asprintf (&conf->link_xattr_name, "%s."DHT_LINKFILE_STR,
784 gf_asprintf (&conf->commithash_xattr_name, "%s."DHT_COMMITHASH_STR,
786 gf_asprintf (&conf->wild_xattr_name, "%s*", conf->xattr_name);
787 if (!conf->link_xattr_name || !conf->wild_xattr_name) {
791 GF_OPTION_INIT ("weighted-rebalance", conf->do_weighting, bool, err);
793 conf->lock_pool = mem_pool_new (dht_lock_t, 512);
794 if (!conf->lock_pool) {
795 gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_INIT_FAILED,
796 "failed to create lock mem_pool, failing "
801 this->private = conf;
803 if (dht_set_subvol_range(this))
806 conf->methods = &dht_methods;
812 if (conf->file_layouts) {
813 for (i = 0; i < conf->subvolume_cnt; i++) {
814 GF_FREE (conf->file_layouts[i]);
816 GF_FREE (conf->file_layouts);
819 GF_FREE (conf->subvolumes);
821 GF_FREE (conf->subvolume_status);
823 GF_FREE (conf->du_stats);
825 GF_FREE (conf->defrag);
827 GF_FREE (conf->xattr_name);
828 GF_FREE (conf->link_xattr_name);
829 GF_FREE (conf->wild_xattr_name);
832 mem_pool_destroy (conf->lock_pool);
841 struct volume_options options[] = {
842 { .key = {"lookup-unhashed"},
843 .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
845 .type = GF_OPTION_TYPE_STR,
846 .default_value = "on",
847 .description = "This option if set to ON, does a lookup through "
848 "all the sub-volumes, in case a lookup didn't return any result "
849 "from the hash subvolume. If set to OFF, it does not do a lookup "
850 "on the remaining subvolumes."
852 { .key = {"lookup-optimize"},
853 .type = GF_OPTION_TYPE_BOOL,
854 .default_value = "off",
855 .description = "This option if set to ON enables the optimization "
856 "of -ve lookups, by not doing a lookup on non-hashed subvolumes for "
857 "files, in case the hashed subvolume does not return any result. "
858 "This option disregards the lookup-unhashed setting, when enabled."
860 { .key = {"min-free-disk"},
861 .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
862 .default_value = "10%",
863 .description = "Percentage/Size of disk space, after which the "
864 "process starts balancing out the cluster, and logs will appear "
867 { .key = {"min-free-inodes"},
868 .type = GF_OPTION_TYPE_PERCENT,
869 .default_value = "5%",
870 .description = "after system has only N% of inodes, warnings "
871 "starts to appear in log files",
873 { .key = {"unhashed-sticky-bit"},
874 .type = GF_OPTION_TYPE_BOOL,
875 .default_value = "off",
877 { .key = {"use-readdirp"},
878 .type = GF_OPTION_TYPE_BOOL,
879 .default_value = "on",
880 .description = "This option if set to ON, forces the use of "
881 "readdirp, and hence also displays the stats of the files."
883 { .key = {"assert-no-child-down"},
884 .type = GF_OPTION_TYPE_BOOL,
885 .default_value = "off",
886 .description = "This option if set to ON, in the event of "
887 "CHILD_DOWN, will call exit."
889 { .key = {"directory-layout-spread"},
890 .type = GF_OPTION_TYPE_INT,
892 .validate = GF_OPT_VALIDATE_MIN,
893 .description = "Specifies the directory layout spread. Takes number "
894 "of subvolumes as default value."
896 { .key = {"decommissioned-bricks"},
897 .type = GF_OPTION_TYPE_ANY,
898 .description = "This option if set to ON, decommissions "
899 "the brick, so that no new data is allowed to be created "
902 { .key = {"rebalance-cmd"},
903 .type = GF_OPTION_TYPE_INT,
905 { .key = {"commit-hash"},
906 .type = GF_OPTION_TYPE_INT,
908 { .key = {"node-uuid"},
909 .type = GF_OPTION_TYPE_STR,
911 { .key = {"rebalance-stats"},
912 .type = GF_OPTION_TYPE_BOOL,
913 .default_value = "off",
914 .description = "This option if set to ON displays and logs the "
915 " time taken for migration of each file, during the rebalance "
916 "process. If set to OFF, the rebalance logs will only display the "
917 "time spent in each directory."
919 { .key = {"readdir-optimize"},
920 .type = GF_OPTION_TYPE_BOOL,
921 .default_value = "off",
922 .description = "This option if set to ON enables the optimization "
923 "that allows DHT to requests non-first subvolumes to filter out "
926 { .key = {"rsync-hash-regex"},
927 .type = GF_OPTION_TYPE_STR,
928 /* Setting a default here doesn't work. See dht_init_regex. */
929 .description = "Regular expression for stripping temporary-file "
930 "suffix and prefix used by rsync, to prevent relocation when the "
933 { .key = {"extra-hash-regex"},
934 .type = GF_OPTION_TYPE_STR,
935 /* Setting a default here doesn't work. See dht_init_regex. */
936 .description = "Regular expression for stripping temporary-file "
937 "suffix and prefix used by an application, to prevent relocation when "
938 "the file is renamed."
940 { .key = {"rebalance-filter"},
941 .type = GF_OPTION_TYPE_STR,
944 { .key = {"xattr-name"},
945 .type = GF_OPTION_TYPE_STR,
946 .default_value = "trusted.glusterfs.dht",
947 .description = "Base for extended attributes used by this "
948 "translator instance, to avoid conflicts with others above or "
952 { .key = {"weighted-rebalance"},
953 .type = GF_OPTION_TYPE_BOOL,
954 .default_value = "on",
955 .description = "When enabled, files will be allocated to bricks "
956 "with a probability proportional to their size. Otherwise, all "
957 "bricks will have the same probability (legacy behavior)."
961 { .key = {"local-volume-name"},
962 .type = GF_OPTION_TYPE_XLATOR
966 { .key = {"tier-promote-frequency"},
967 .type = GF_OPTION_TYPE_INT,
968 .default_value = "120",
969 .description = "Frequency to promote files to fast tier"
972 { .key = {"tier-demote-frequency"},
973 .type = GF_OPTION_TYPE_INT,
974 .default_value = "120",
975 .description = "Frequency to demote files to slow tier"
978 { .key = {"write-freq-threshold"},
979 .type = GF_OPTION_TYPE_INT,
980 .default_value = "0",
983 { .key = {"read-freq-threshold"},
984 .type = GF_OPTION_TYPE_INT,
985 .default_value = "0",
987 { .key = {"watermark-hi"},
988 .type = GF_OPTION_TYPE_PERCENT,
989 .default_value = "90",
991 { .key = {"watermark-low"},
992 .type = GF_OPTION_TYPE_PERCENT,
993 .default_value = "75",
995 { .key = {"tier-mode"},
996 .type = GF_OPTION_TYPE_STR,
997 .default_value = "test",
999 { .key = {"tier-max-mb"},
1000 .type = GF_OPTION_TYPE_INT,
1001 .default_value = "1000",
1003 { .key = {"tier-max-files"},
1004 .type = GF_OPTION_TYPE_INT,
1005 .default_value = "5000",
1008 { .key = {"pattern.switch.case"},
1009 .type = GF_OPTION_TYPE_ANY
1012 { .key = {"randomize-hash-range-by-gfid"},
1013 .type = GF_OPTION_TYPE_BOOL,
1014 .default_value = "off",
1015 .description = "Use gfid of directory to determine the subvolume "
1016 "from which hash ranges are allocated starting with 0. "
1017 "Note that we still use a directory/file's name to determine the "
1018 "subvolume to which it hashes"
1021 { .key = {"rebal-throttle"},
1022 .type = GF_OPTION_TYPE_STR,
1023 .default_value = "normal",
1024 .description = " Sets the maximum number of parallel file migrations "
1025 "allowed on a node during the rebalance operation. The"
1026 " default value is normal and allows a max of "
1027 "[($(processing units) - 4) / 2), 2] files to be "
1028 "migrated at a time. Lazy will allow only one file to "
1029 "be migrated at a time and aggressive will allow "
1030 "max of [($(processing units) - 4) / 2), 4]"