2 test program to heavily stress a TSM/HSM system
4 Andrew Tridgell January 2008
8 gcc -Wall -g -DWITH_GPFS=1 -o tsm_torture{,.c} -lgpfs_gpl -lrt
10 If you want to use the -L or -S switches then you must symlink tsm_torture to smbd as
11 otherwise it won't have permission to set share modes or leases
13 ln -s tsm_torture smbd
17 ./smbd /gpfs/data/tsmtest
19 where /gpfs/data/tsmtest is the directory to test on
23 #define _XOPEN_SOURCE 500
46 /* The signal we'll use to signify aio done. */
48 #define RT_SIGNAL_AIO (SIGRTMIN+3)
59 const char *migrate_cmd;
62 bool skip_file_creation;
63 bool exit_child_on_error;
67 .use_sharemode = false,
72 .migrate_cmd = "dsmmigrate",
74 .skip_file_creation = false,
75 .exit_child_on_error = false,
76 .die_on_error = false,
79 static pid_t parent_pid;
81 enum offline_op {OP_LOADFILE, OP_SAVEFILE, OP_MIGRATE, OP_GETOFFLINE, OP_ENDOFLIST};
85 unsigned offline_count;
86 unsigned online_count;
87 unsigned migrate_fail_count;
88 unsigned io_fail_count;
89 unsigned migrate_ok_count;
92 struct timeval tv_start;
93 double latencies[OP_ENDOFLIST];
94 double worst_latencies[OP_ENDOFLIST];
99 static struct timeval tv_start;
100 static struct child *children;
102 static unsigned char *buf;
104 /* return a pointer to a /dev/zero shared memory segment of size "size"
105 which will persist across fork() but will disappear when all processes
108 The memory is zeroed automatically
110 This relies on /dev/zero being shared mmap capable, which it is
111 only under some OSes (Linux 2.1 _not_ included)
113 void *shm_setup(int size)
118 fd = open("/dev/zero", O_RDWR);
122 ret = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
124 /* note that we don't need to keep the file open */
126 if (ret == (void *)-1) return NULL;
131 static struct timeval timeval_current(void)
134 gettimeofday(&tv, NULL);
138 static double timeval_elapsed(struct timeval *tv)
140 struct timeval tv2 = timeval_current();
141 return (tv2.tv_sec - tv->tv_sec) +
142 (tv2.tv_usec - tv->tv_usec)*1.0e-6;
146 file name given a number
148 static char *filename(int i)
151 asprintf(&s, "%s/file%u.dat", options.dir, (unsigned)i);
155 static void sigio_handler(int sig)
157 printf("Got SIGIO\n");
160 static volatile bool signal_received;
162 static void signal_handler(int sig)
164 signal_received = true;
167 /* simulate pread using aio */
168 static ssize_t pread_aio(int fd, void *buf, size_t count, off_t offset)
173 memset(&acb, 0, sizeof(acb));
177 acb.aio_nbytes = count;
178 acb.aio_offset = offset;
179 acb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
180 acb.aio_sigevent.sigev_signo = RT_SIGNAL_AIO;
181 acb.aio_sigevent.sigev_value.sival_int = 1;
183 signal(RT_SIGNAL_AIO, signal_handler);
186 if (options.io_uid) {
187 if (seteuid(options.io_uid) != 0) {
188 printf("Failed to become uid %u\n", options.io_uid);
192 if (aio_read(&acb) != 0) {
195 if (options.io_uid) {
196 if (seteuid(0) != 0) {
197 printf("Failed to become root\n");
202 while (signal_received == 0) {
206 ret = aio_error(&acb);
208 printf("aio operation failed - %s\n", strerror(ret));
212 return aio_return(&acb);
216 /* simulate pwrite using aio */
217 static ssize_t pwrite_aio(int fd, void *buf, size_t count, off_t offset)
222 memset(&acb, 0, sizeof(acb));
226 acb.aio_nbytes = count;
227 acb.aio_offset = offset;
228 acb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
229 acb.aio_sigevent.sigev_signo = RT_SIGNAL_AIO;
230 acb.aio_sigevent.sigev_value.sival_int = 1;
232 signal(RT_SIGNAL_AIO, signal_handler);
235 if (options.io_uid) {
236 if (seteuid(options.io_uid) != 0) {
237 printf("Failed to become uid %u\n", options.io_uid);
241 if (aio_write(&acb) != 0) {
244 if (options.io_uid) {
245 if (seteuid(0) != 0) {
246 printf("Failed to become root\n");
251 while (signal_received == 0) {
255 ret = aio_error(&acb);
257 printf("aio operation failed - %s\n", strerror(ret));
261 return aio_return(&acb);
267 static void child_loadfile(struct child *child, const char *fname, unsigned fnumber)
272 signal(SIGIO, sigio_handler);
274 fd = open(fname, O_RDONLY);
277 if (options.exit_child_on_error) {
280 child->io_fail_count++;
285 if (options.use_lease && gpfs_set_lease(fd, GPFS_LEASE_READ) != 0) {
286 printf("gpfs_set_lease on '%s' - %s\n", fname, strerror(errno));
290 if (options.use_sharemode && gpfs_set_share(fd, 1, 2) != 0) {
291 printf("gpfs_set_share on '%s' - %s\n", fname, strerror(errno));
297 if (options.use_aio) {
298 ret = pread_aio(fd, buf, options.fsize, 0);
300 ret = pread(fd, buf, options.fsize, 0);
302 if (ret != options.fsize) {
303 printf("pread failed on '%s' - %s\n", fname, strerror(errno));
304 child->io_fail_count++;
309 for (i=0;i<options.fsize;i++) {
310 if (buf[i] != 1+(fnumber % 255)) {
311 printf("Bad data %u - expected %u for '%s'\n",
312 buf[i], 1+(fnumber%255), fname);
313 if (options.exit_child_on_error) {
316 child->io_fail_count++;
328 static void child_savefile(struct child *child, const char *fname, unsigned fnumber)
333 signal(SIGIO, sigio_handler);
335 fd = open(fname, O_WRONLY);
342 if (options.use_lease && gpfs_set_lease(fd, GPFS_LEASE_WRITE) != 0) {
343 printf("gpfs_set_lease on '%s' - %s\n", fname, strerror(errno));
347 if (options.use_sharemode && gpfs_set_share(fd, 1, 2) != 0) {
348 printf("gpfs_set_share on '%s' - %s\n", fname, strerror(errno));
354 memset(buf, 1+(fnumber%255), options.fsize);
356 if (options.use_aio) {
357 ret = pwrite_aio(fd, buf, options.fsize, 0);
359 ret = pwrite(fd, buf, options.fsize, 0);
361 if (ret != options.fsize) {
362 printf("pwrite failed on '%s' - %s\n", fname, strerror(errno));
363 child->io_fail_count++;
373 get file offline status
375 static void child_getoffline(struct child *child, const char *fname)
378 if (stat(fname, &st) != 0) {
379 printf("Failed to stat '%s' - %s\n", fname, strerror(errno));
380 if (options.exit_child_on_error) {
383 child->io_fail_count++;
386 if (st.st_size != options.fsize) {
387 printf("Wrong file size for '%s' - %u\n", fname, (unsigned)st.st_size);
388 if (options.exit_child_on_error) {
391 child->io_fail_count++;
394 if (st.st_blocks == 0) {
395 child->offline_count++;
396 if (strcmp(options.migrate_cmd, "/bin/true") == 0) {
397 printf("File '%s' is offline with no migration command\n", fname);
400 child->online_count++;
408 static void child_migrate(struct child *child, const char *fname)
415 if (stat(fname, &st) != 0) {
416 printf("Failed to stat '%s' - %s\n", fname, strerror(errno));
417 if (options.exit_child_on_error) {
420 child->io_fail_count++;
423 if (st.st_size != options.fsize) {
424 printf("Wrong file size for '%s' - %u\n", fname, (unsigned)st.st_size);
425 if (options.exit_child_on_error) {
428 child->io_fail_count++;
431 if (st.st_blocks == 0) {
432 /* already offline */
436 /* make the file a bit older so migation works */
438 t.modtime = time(NULL) - 60*60;
441 asprintf(&cmd, "%s %s > /dev/null 2>&1", options.migrate_cmd, fname);
444 ret = WEXITSTATUS(ret);
447 children->migrate_fail_count++;
449 children->migrate_ok_count++;
458 static void run_child(struct child *child)
460 srandom(time(NULL) ^ getpid());
464 unsigned fnumber = random() % options.nfiles;
465 char *fname = filename(fnumber);
467 if (kill(parent_pid, 0) != 0) {
468 /* parent has exited */
472 child->tv_start = timeval_current();
474 child->op = random() % OP_ENDOFLIST;
477 child_loadfile(child, fname, fnumber);
480 child_savefile(child, fname, fnumber);
483 child_migrate(child, fname);
486 child_getoffline(child, fname);
492 latency = timeval_elapsed(&child->tv_start);
493 if (latency > child->latencies[child->op]) {
494 child->latencies[child->op] = latency;
496 if (latency > child->worst_latencies[child->op]) {
497 child->worst_latencies[child->op] = latency;
505 static void sig_alarm(int sig)
508 unsigned total=0, total_offline=0, total_online=0,
509 total_migrate_failures=0, total_migrate_ok=0,
511 double latencies[OP_ENDOFLIST];
512 double worst_latencies[OP_ENDOFLIST];
514 if (timeval_elapsed(&tv_start) >= options.timelimit) {
515 printf("timelimit reached - killing children\n");
516 for (i=0;i<options.nprocesses;i++) {
517 kill(children[i].pid, SIGTERM);
521 for (op=0;op<OP_ENDOFLIST;op++) {
523 worst_latencies[op] = 0;
526 for (i=0;i<options.nprocesses;i++) {
527 if (kill(children[i].pid, 0) != 0) {
530 total += children[i].count - children[i].lastcount;
531 children[i].lastcount = children[i].count;
532 total_online += children[i].online_count;
533 total_offline += children[i].offline_count;
534 total_migrate_failures += children[i].migrate_fail_count;
535 total_io_failures += children[i].io_fail_count;
536 total_migrate_ok += children[i].migrate_ok_count;
537 for (op=0;op<OP_ENDOFLIST;op++) {
538 if (children[i].latencies[op] > latencies[op]) {
539 latencies[op] = children[i].latencies[op];
541 children[i].latencies[op] = 0;
543 if (timeval_elapsed(&children[i].tv_start) > latencies[children[i].op]) {
545 lat = timeval_elapsed(&children[i].tv_start);
546 latencies[children[i].op] = lat;
547 if (lat > worst_latencies[children[i].op]) {
548 worst_latencies[children[i].op] = lat;
551 for (op=0;op<OP_ENDOFLIST;op++) {
552 double lat = children[i].worst_latencies[op];
553 if (lat > worst_latencies[op]) {
554 worst_latencies[op] = lat;
559 printf("ops/s=%4u offline=%u/%u failures: mig=%u io=%u latencies: mig=%4.1f/%4.1f stat=%4.1f/%4.1f write=%4.1f/%4.1f read=%4.1f/%4.1f\n",
560 total, total_offline, total_online+total_offline,
561 total_migrate_failures,
563 latencies[OP_MIGRATE], worst_latencies[OP_MIGRATE],
564 latencies[OP_GETOFFLINE], worst_latencies[OP_GETOFFLINE],
565 latencies[OP_SAVEFILE], worst_latencies[OP_SAVEFILE],
566 latencies[OP_LOADFILE], worst_latencies[OP_LOADFILE]);
568 signal(SIGALRM, sig_alarm);
572 static void usage(void)
574 printf("Usage: (note, must run as 'smbd' to use leases or share modes)\n");
575 printf("ln -sf tsm_torture smbd\n");
576 printf("./smbd [options] <directory>\n");
577 printf("Options:\n");
578 printf(" -N <nprocs> number of child processes\n");
579 printf(" -F <nfiles> number of files\n");
580 printf(" -t <time> runtime (seconds)\n");
581 printf(" -s <fsize> file size (bytes)\n");
582 printf(" -M <migrate> set file migrate command\n");
583 printf(" -U <uid> do IO as the specified uid\n");
584 printf(" -L use gpfs leases\n");
585 printf(" -S use gpfs sharemodes\n");
586 printf(" -A use Posix async IO\n");
587 printf(" -C skip file creation\n");
588 printf(" -E exit child on IO error\n");
589 printf(" -D die on error\n");
593 int main(int argc, char * const argv[])
596 const char *progname = argv[0];
599 /* parse command-line options */
600 while ((opt = getopt(argc, argv, "LSN:F:t:s:M:U:AhCED")) != -1) {
603 options.use_lease = true;
606 options.use_sharemode = true;
609 options.use_aio = true;
612 options.skip_file_creation = true;
615 options.nprocesses = atoi(optarg);
618 options.nfiles = atoi(optarg);
621 options.migrate_cmd = strdup(optarg);
624 options.fsize = atoi(optarg);
627 options.io_uid = atoi(optarg);
630 options.timelimit = atoi(optarg);
633 options.exit_child_on_error = true;
636 options.die_on_error = true;
637 options.exit_child_on_error = true;
645 if ((options.use_lease || options.use_sharemode) && strstr(progname, "smbd") == NULL) {
646 printf("ERROR: you must invoke as smbd to use leases or share modes - use a symlink\n");
659 options.dir = argv[0];
661 if (stat(options.dir, &st) != 0 || !S_ISDIR(st.st_mode)) {
662 printf("'%s' must exist and be a directory\n", options.dir);
666 children = shm_setup(sizeof(*children) * options.nprocesses);
669 buf = malloc(options.fsize);
671 if (!options.skip_file_creation) {
672 printf("Creating %u files of size %u in '%s'\n",
673 options.nfiles, options.fsize, options.dir);
675 for (i=0;i<options.nfiles;i++) {
677 char *fname = filename(i);
678 fd = open(fname, O_CREAT|O_RDWR, 0600);
683 ftruncate(fd, options.fsize);
684 memset(buf, 1+(i%255), options.fsize);
685 if (write(fd, buf, options.fsize) != options.fsize) {
686 printf("Failed to write '%s'\n", fname);
695 parent_pid = getpid();
697 printf("Starting %u child processes for %u seconds\n",
698 options.nprocesses, options.timelimit);
699 printf("Results shown as: offline=numoffline/total latencies: current/worst\n");
701 for (i=0;i<options.nprocesses;i++) {
704 children[i].pid = getpid();
705 children[i].child_num = i;
706 run_child(&children[i]);
708 children[i].pid = pid;
712 /* show status once a second */
713 signal(SIGALRM, sig_alarm);
714 tv_start = timeval_current();
717 /* wait for the children to finish */
718 for (i=0;i<options.nprocesses;i++) {
720 while (waitpid(-1, &status, 0) != 0 && errno != ECHILD) ;
721 if (WEXITSTATUS(status) != 0 &&
722 options.die_on_error) {