fixes for Power64
[tridge/junkcode.git] / tsm_torture.c
1 /*
2   test program to heavily stress a TSM/HSM system
3
4   Andrew Tridgell January 2008
5
6   compile with:
7
8      gcc -Wall -g -DWITH_GPFS=1 -o tsm_torture{,.c} -lgpfs_gpl -lrt
9
10   If you want to use the -L or -S switches then you must symlink tsm_torture to smbd as 
11   otherwise it won't have permission to set share modes or leases
12
13      ln -s tsm_torture smbd
14
15   and run like this:
16
17     ./smbd /gpfs/data/tsmtest
18
19   where /gpfs/data/tsmtest is the directory to test on
20
21  */
22
23 #define _XOPEN_SOURCE 500
24 #define _GNU_SOURCE 
25
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <sys/stat.h>
31 #include <fcntl.h>
32 #include <string.h>
33 #include <getopt.h>
34 #include <signal.h>
35 #include <utime.h>
36 #include <stdbool.h>
37 #include <sys/mman.h>
38 #include <sys/time.h>
39 #include <sys/wait.h>
40 #include <time.h>
41 #include <aio.h>
42 #if WITH_GPFS
43 #include "gpfs_gpl.h"
44 #endif
45
46 /* The signal we'll use to signify aio done. */
47 #ifndef RT_SIGNAL_AIO
48 #define RT_SIGNAL_AIO (SIGRTMIN+3)
49 #endif
50
51 static struct {
52         bool use_lease;
53         bool use_sharemode;
54         unsigned nprocesses;
55         unsigned nfiles;
56         unsigned timelimit;
57         unsigned fsize;
58         const char *dir;
59         const char *migrate_cmd;
60         bool use_aio;
61         uid_t io_uid;
62         bool skip_file_creation;
63         bool exit_child_on_error;
64         bool die_on_error;
65 } options = {
66         .use_lease     = false,
67         .use_sharemode = false,
68         .nprocesses    = 10,
69         .nfiles        = 10,
70         .fsize         = 8192,
71         .timelimit     = 30,
72         .migrate_cmd   = "dsmmigrate",
73         .use_aio       = false,
74         .skip_file_creation = false,
75         .exit_child_on_error = false,
76         .die_on_error = false,
77 };
78
79 static pid_t parent_pid;
80
81 enum offline_op {OP_LOADFILE, OP_SAVEFILE, OP_MIGRATE, OP_GETOFFLINE, OP_ENDOFLIST};
82
83 struct child {
84         unsigned child_num;
85         unsigned offline_count;
86         unsigned online_count;
87         unsigned migrate_fail_count;
88         unsigned io_fail_count;
89         unsigned migrate_ok_count;
90         unsigned count;
91         unsigned lastcount;
92         struct timeval tv_start;
93         double latencies[OP_ENDOFLIST];
94         double worst_latencies[OP_ENDOFLIST];
95         pid_t pid;
96         enum offline_op op;
97 };
98
99 static struct timeval tv_start;
100 static struct child *children;
101
102 static unsigned char *buf;
103
104 /* return a pointer to a /dev/zero shared memory segment of size "size"
105    which will persist across fork() but will disappear when all processes
106    exit 
107
108    The memory is zeroed automatically
109
110    This relies on /dev/zero being shared mmap capable, which it is
111    only under some OSes (Linux 2.1 _not_ included)
112  */
113 void *shm_setup(int size)
114 {
115         void *ret;
116         int fd;
117
118         fd = open("/dev/zero", O_RDWR);
119         if (fd == -1) {
120                 return NULL;
121         }
122         ret = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
123
124         /* note that we don't need to keep the file open */
125         close(fd);
126         if (ret == (void *)-1) return NULL;
127         return ret;
128 }
129
130
131 static struct timeval timeval_current(void)
132 {
133         struct timeval tv;
134         gettimeofday(&tv, NULL);
135         return tv;
136 }
137
138 static double timeval_elapsed(struct timeval *tv)
139 {
140         struct timeval tv2 = timeval_current();
141         return (tv2.tv_sec - tv->tv_sec) + 
142                (tv2.tv_usec - tv->tv_usec)*1.0e-6;
143 }
144
145 /*
146   file name given a number
147  */
148 static char *filename(int i)
149 {
150         char *s = NULL;
151         asprintf(&s, "%s/file%u.dat", options.dir, (unsigned)i);
152         return s;
153 }
154
155 static void sigio_handler(int sig)
156 {
157         printf("Got SIGIO\n");
158 }
159
160 static volatile bool signal_received;
161
162 static void signal_handler(int sig)
163 {
164         signal_received = true;
165 }
166
167 /* simulate pread using aio */
168 static ssize_t pread_aio(int fd, void *buf, size_t count, off_t offset)
169 {
170         struct aiocb acb;
171         int ret;
172
173         memset(&acb, 0, sizeof(acb));
174
175         acb.aio_fildes = fd;
176         acb.aio_buf = buf;
177         acb.aio_nbytes = count;
178         acb.aio_offset = offset;
179         acb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
180         acb.aio_sigevent.sigev_signo  = RT_SIGNAL_AIO;
181         acb.aio_sigevent.sigev_value.sival_int = 1;
182
183         signal(RT_SIGNAL_AIO, signal_handler);
184         signal_received = 0;
185
186         if (options.io_uid) {
187                 if (seteuid(options.io_uid) != 0) {
188                         printf("Failed to become uid %u\n", options.io_uid);
189                         exit(1);
190                 }
191         }
192         if (aio_read(&acb) != 0) {
193                 return -1;
194         }
195         if (options.io_uid) {
196                 if (seteuid(0) != 0) {
197                         printf("Failed to become root\n");
198                         exit(1);
199                 }
200         }
201
202         while (signal_received == 0) {
203                 usleep(500);
204         }
205
206         ret = aio_error(&acb);
207         if (ret != 0) {
208                 printf("aio operation failed - %s\n", strerror(ret));
209                 return -1;
210         }
211
212         return aio_return(&acb);        
213 }
214
215
216 /* simulate pwrite using aio */
217 static ssize_t pwrite_aio(int fd, void *buf, size_t count, off_t offset)
218 {
219         struct aiocb acb;
220         int ret;
221
222         memset(&acb, 0, sizeof(acb));
223
224         acb.aio_fildes = fd;
225         acb.aio_buf = buf;
226         acb.aio_nbytes = count;
227         acb.aio_offset = offset;
228         acb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
229         acb.aio_sigevent.sigev_signo  = RT_SIGNAL_AIO;
230         acb.aio_sigevent.sigev_value.sival_int = 1;
231
232         signal(RT_SIGNAL_AIO, signal_handler);
233         signal_received = 0;
234
235         if (options.io_uid) {
236                 if (seteuid(options.io_uid) != 0) {
237                         printf("Failed to become uid %u\n", options.io_uid);
238                         exit(1);
239                 }
240         }
241         if (aio_write(&acb) != 0) {
242                 return -1;
243         }
244         if (options.io_uid) {
245                 if (seteuid(0) != 0) {
246                         printf("Failed to become root\n");
247                         exit(1);
248                 }
249         }
250
251         while (signal_received == 0) {
252                 usleep(500);
253         }
254
255         ret = aio_error(&acb);
256         if (ret != 0) {
257                 printf("aio operation failed - %s\n", strerror(ret));
258                 return -1;
259         }
260
261         return aio_return(&acb);        
262 }
263
264 /* 
265    load a file 
266  */
267 static void child_loadfile(struct child *child, const char *fname, unsigned fnumber)
268 {
269         int fd, i;
270         ssize_t ret;
271
272         signal(SIGIO, sigio_handler);
273
274         fd = open(fname, O_RDONLY);
275         if (fd == -1) {
276                 perror(fname);
277                 if (options.exit_child_on_error) {
278                         exit(1);
279                 }
280                 child->io_fail_count++;
281                 return;
282         }
283
284 #if WITH_GPFS
285         if (options.use_lease && gpfs_set_lease(fd, GPFS_LEASE_READ) != 0) {
286                 printf("gpfs_set_lease on '%s' - %s\n", fname, strerror(errno));
287                 close(fd);
288                 return;
289         }
290         if (options.use_sharemode && gpfs_set_share(fd, 1, 2) != 0) {
291                 printf("gpfs_set_share on '%s' - %s\n", fname, strerror(errno));
292                 close(fd);
293                 return;
294         }
295 #endif
296
297         if (options.use_aio) {
298                 ret = pread_aio(fd, buf, options.fsize, 0);
299         } else {
300                 ret = pread(fd, buf, options.fsize, 0);
301         }
302         if (ret != options.fsize) {
303                 printf("pread failed on '%s' - %s\n", fname, strerror(errno));
304                 child->io_fail_count++;
305                 close(fd);
306                 return;
307         }
308
309         for (i=0;i<options.fsize;i++) {
310                 if (buf[i] != 1+(fnumber % 255)) {
311                         printf("Bad data %u - expected %u for '%s'\n",
312                                buf[i], 1+(fnumber%255), fname);
313                         if (options.exit_child_on_error) {
314                                 exit(1);
315                         }
316                         child->io_fail_count++;
317                         break;
318                 }
319         }
320
321         close(fd);
322 }
323
324
325 /* 
326    save a file 
327  */
328 static void child_savefile(struct child *child, const char *fname, unsigned fnumber)
329 {
330         int fd;
331         int ret;
332
333         signal(SIGIO, sigio_handler);
334
335         fd = open(fname, O_WRONLY);
336         if (fd == -1) {
337                 perror(fname);
338                 exit(1);
339         }
340
341 #if WITH_GPFS
342         if (options.use_lease && gpfs_set_lease(fd, GPFS_LEASE_WRITE) != 0) {
343                 printf("gpfs_set_lease on '%s' - %s\n", fname, strerror(errno));
344                 close(fd);
345                 return;
346         }
347         if (options.use_sharemode && gpfs_set_share(fd, 1, 2) != 0) {
348                 printf("gpfs_set_share on '%s' - %s\n", fname, strerror(errno));
349                 close(fd);
350                 return;
351         }
352 #endif
353
354         memset(buf, 1+(fnumber%255), options.fsize);
355
356         if (options.use_aio) {
357                 ret = pwrite_aio(fd, buf, options.fsize, 0);
358         } else {
359                 ret = pwrite(fd, buf, options.fsize, 0);
360         }
361         if (ret != options.fsize) {
362                 printf("pwrite failed on '%s' - %s\n", fname, strerror(errno));
363                 child->io_fail_count++;
364                 close(fd);
365                 return;
366         }
367
368         fsync(fd);
369         close(fd);
370 }
371
372 /* 
373    get file offline status
374  */
375 static void child_getoffline(struct child *child, const char *fname)
376 {
377         struct stat st;
378         if (stat(fname, &st) != 0) {
379                 printf("Failed to stat '%s' - %s\n", fname, strerror(errno));
380                 if (options.exit_child_on_error) {
381                         exit(1);
382                 }
383                 child->io_fail_count++;
384                 return;
385         }
386         if (st.st_size != options.fsize) {
387                 printf("Wrong file size for '%s' - %u\n", fname, (unsigned)st.st_size);
388                 if (options.exit_child_on_error) {
389                         exit(1);
390                 }
391                 child->io_fail_count++;
392                 return;
393         }
394         if (st.st_blocks == 0) {
395                 child->offline_count++;
396                 if (strcmp(options.migrate_cmd, "/bin/true") == 0) {
397                         printf("File '%s' is offline with no migration command\n", fname);
398                 }
399         } else {
400                 child->online_count++;
401         }
402 }
403
404
405 /* 
406    set a file offline
407  */
408 static void child_migrate(struct child *child, const char *fname)
409 {
410         char *cmd = NULL;
411         int ret;
412         struct utimbuf t;
413         struct stat st;
414
415         if (stat(fname, &st) != 0) {
416                 printf("Failed to stat '%s' - %s\n", fname, strerror(errno));
417                 if (options.exit_child_on_error) {
418                         exit(1);
419                 }
420                 child->io_fail_count++;
421                 return;
422         }
423         if (st.st_size != options.fsize) {
424                 printf("Wrong file size for '%s' - %u\n", fname, (unsigned)st.st_size);
425                 if (options.exit_child_on_error) {
426                         exit(1);
427                 }
428                 child->io_fail_count++;
429                 return;
430         }
431         if (st.st_blocks == 0) {
432                 /* already offline */
433                 return;
434         }
435
436         /* make the file a bit older so migation works */
437         t.actime = 0;
438         t.modtime = time(NULL) - 60*60;
439         utime(fname, &t);
440
441         asprintf(&cmd, "%s %s > /dev/null 2>&1", options.migrate_cmd, fname);
442         ret = system(cmd);
443         if (ret != -1) {
444                 ret = WEXITSTATUS(ret);
445         }
446         if (ret != 0) {
447                 children->migrate_fail_count++;
448         } else {
449                 children->migrate_ok_count++;
450         }
451         free(cmd);
452 }
453
454
455 /*
456   main child loop
457  */
458 static void run_child(struct child *child)
459 {
460         srandom(time(NULL) ^ getpid());
461
462         while (1) {
463                 double latency;
464                 unsigned fnumber = random() % options.nfiles;
465                 char *fname = filename(fnumber);
466
467                 if (kill(parent_pid, 0) != 0) {
468                         /* parent has exited */
469                         exit(0);
470                 }
471
472                 child->tv_start = timeval_current();
473
474                 child->op = random() % OP_ENDOFLIST;
475                 switch (child->op) {
476                 case OP_LOADFILE:
477                         child_loadfile(child, fname, fnumber);
478                         break;
479                 case OP_SAVEFILE:
480                         child_savefile(child, fname, fnumber);
481                         break;
482                 case OP_MIGRATE:
483                         child_migrate(child, fname);
484                         break;
485                 case OP_GETOFFLINE:
486                         child_getoffline(child, fname);
487                         break;
488                 case OP_ENDOFLIST:
489                         break;
490                 }
491
492                 latency = timeval_elapsed(&child->tv_start);
493                 if (latency > child->latencies[child->op]) {
494                         child->latencies[child->op] = latency;
495                 }
496                 if (latency > child->worst_latencies[child->op]) {
497                         child->worst_latencies[child->op] = latency;
498                 }
499                 child->count++;
500
501                 free(fname);
502         }
503 }
504
505 static void sig_alarm(int sig)
506 {
507         int i, op;
508         unsigned total=0, total_offline=0, total_online=0, 
509                 total_migrate_failures=0, total_migrate_ok=0,
510                 total_io_failures=0;
511         double latencies[OP_ENDOFLIST];
512         double worst_latencies[OP_ENDOFLIST];
513         
514         if (timeval_elapsed(&tv_start) >= options.timelimit) {
515                 printf("timelimit reached - killing children\n");
516                 for (i=0;i<options.nprocesses;i++) {
517                         kill(children[i].pid, SIGTERM);
518                 }
519         }
520
521         for (op=0;op<OP_ENDOFLIST;op++) {
522                 latencies[op] = 0;
523                 worst_latencies[op] = 0;
524         }
525
526         for (i=0;i<options.nprocesses;i++) {
527                 if (kill(children[i].pid, 0) != 0) {
528                         continue;
529                 }
530                 total += children[i].count - children[i].lastcount;
531                 children[i].lastcount = children[i].count;              
532                 total_online += children[i].online_count;
533                 total_offline += children[i].offline_count;
534                 total_migrate_failures += children[i].migrate_fail_count;
535                 total_io_failures += children[i].io_fail_count;
536                 total_migrate_ok += children[i].migrate_ok_count;
537                 for (op=0;op<OP_ENDOFLIST;op++) {
538                         if (children[i].latencies[op] > latencies[op]) {
539                                 latencies[op] = children[i].latencies[op];
540                         }
541                         children[i].latencies[op] = 0;
542                 }
543                 if (timeval_elapsed(&children[i].tv_start) > latencies[children[i].op]) {
544                         double lat;
545                         lat = timeval_elapsed(&children[i].tv_start);
546                         latencies[children[i].op] = lat;
547                         if (lat > worst_latencies[children[i].op]) {
548                                 worst_latencies[children[i].op] = lat;
549                         }
550                 }
551                 for (op=0;op<OP_ENDOFLIST;op++) {
552                         double lat = children[i].worst_latencies[op];
553                         if (lat > worst_latencies[op]) {
554                                 worst_latencies[op] = lat;
555                         }
556                 }
557         }
558
559         printf("ops/s=%4u offline=%u/%u  failures: mig=%u io=%u  latencies: mig=%4.1f/%4.1f stat=%4.1f/%4.1f write=%4.1f/%4.1f read=%4.1f/%4.1f\n",
560                total, total_offline, total_online+total_offline, 
561                total_migrate_failures,
562                total_io_failures,
563                latencies[OP_MIGRATE], worst_latencies[OP_MIGRATE],
564                latencies[OP_GETOFFLINE], worst_latencies[OP_GETOFFLINE],
565                latencies[OP_SAVEFILE], worst_latencies[OP_SAVEFILE],
566                latencies[OP_LOADFILE], worst_latencies[OP_LOADFILE]);
567         fflush(stdout);
568         signal(SIGALRM, sig_alarm);
569         alarm(1);
570 }
571
572 static void usage(void)
573 {
574         printf("Usage: (note, must run as 'smbd' to use leases or share modes)\n");
575         printf("ln -sf tsm_torture smbd\n");
576         printf("./smbd [options] <directory>\n");
577         printf("Options:\n");
578         printf("  -N <nprocs>  number of child processes\n");
579         printf("  -F <nfiles>  number of files\n");
580         printf("  -t <time>    runtime (seconds)\n");
581         printf("  -s <fsize>   file size (bytes)\n");
582         printf("  -M <migrate> set file migrate command\n");
583         printf("  -U <uid>     do IO as the specified uid\n");
584         printf("  -L           use gpfs leases\n");
585         printf("  -S           use gpfs sharemodes\n");
586         printf("  -A           use Posix async IO\n");
587         printf("  -C           skip file creation\n");
588         printf("  -E           exit child on IO error\n");
589         printf("  -D           die on error\n");
590         exit(0);
591 }
592
593 int main(int argc, char * const argv[])
594 {
595         int opt, i;
596         const char *progname = argv[0];
597         struct stat st;
598
599         /* parse command-line options */
600         while ((opt = getopt(argc, argv, "LSN:F:t:s:M:U:AhCED")) != -1) {
601                 switch (opt){
602                 case 'L':
603                         options.use_lease = true;
604                         break;
605                 case 'S':
606                         options.use_sharemode = true;
607                         break;
608                 case 'A':
609                         options.use_aio = true;
610                         break;
611                 case 'C':
612                         options.skip_file_creation = true;
613                         break;
614                 case 'N':
615                         options.nprocesses = atoi(optarg);
616                         break;
617                 case 'F':
618                         options.nfiles = atoi(optarg);
619                         break;
620                 case 'M':
621                         options.migrate_cmd = strdup(optarg);
622                         break;
623                 case 's':
624                         options.fsize = atoi(optarg);
625                         break;
626                 case 'U':
627                         options.io_uid = atoi(optarg);
628                         break;
629                 case 't':
630                         options.timelimit = atoi(optarg);
631                         break;
632                 case 'E':
633                         options.exit_child_on_error = true;
634                         break;
635                 case 'D':
636                         options.die_on_error = true;
637                         options.exit_child_on_error = true;
638                         break;
639                 default:
640                         usage();
641                         break;
642                 }
643         }
644
645         if ((options.use_lease || options.use_sharemode) && strstr(progname, "smbd") == NULL) {
646                 printf("ERROR: you must invoke as smbd to use leases or share modes - use a symlink\n");
647                 exit(1);
648         }
649
650         setlinebuf(stdout);     
651
652         argv += optind;
653         argc -= optind;
654
655         if (argc == 0) {
656                 usage();
657         }
658
659         options.dir = argv[0];
660
661         if (stat(options.dir, &st) != 0 || !S_ISDIR(st.st_mode)) {
662                 printf("'%s' must exist and be a directory\n", options.dir);
663                 exit(1);
664         }
665
666         children = shm_setup(sizeof(*children) * options.nprocesses);
667
668
669         buf = malloc(options.fsize);
670
671         if (!options.skip_file_creation) {
672                 printf("Creating %u files of size %u in '%s'\n", 
673                        options.nfiles, options.fsize, options.dir);
674
675                 for (i=0;i<options.nfiles;i++) {
676                         int fd;
677                         char *fname = filename(i);
678                         fd = open(fname, O_CREAT|O_RDWR, 0600);
679                         if (fd == -1) {
680                                 perror(fname);
681                                 exit(1);
682                         }
683                         ftruncate(fd, options.fsize);
684                         memset(buf, 1+(i%255), options.fsize);
685                         if (write(fd, buf, options.fsize) != options.fsize) {
686                                 printf("Failed to write '%s'\n", fname);
687                                 exit(1);
688                         }
689                         fsync(fd);
690                         close(fd);
691                         free(fname);
692                 }
693         }
694
695         parent_pid = getpid();
696
697         printf("Starting %u child processes for %u seconds\n", 
698                options.nprocesses, options.timelimit);
699         printf("Results shown as: offline=numoffline/total latencies: current/worst\n");
700
701         for (i=0;i<options.nprocesses;i++) {
702                 pid_t pid = fork();
703                 if (pid == 0) {
704                         children[i].pid = getpid();
705                         children[i].child_num = i;
706                         run_child(&children[i]);
707                 } else {
708                         children[i].pid = pid;
709                 }
710         }
711
712         /* show status once a second */
713         signal(SIGALRM, sig_alarm);
714         tv_start = timeval_current();
715         alarm(1);
716
717         /* wait for the children to finish */
718         for (i=0;i<options.nprocesses;i++) {
719                 int status;
720                 while (waitpid(-1, &status, 0) != 0 && errno != ECHILD) ;
721                 if (WEXITSTATUS(status) != 0 &&
722                     options.die_on_error) {
723                         exit(1);
724                 }
725         }       
726
727         return 0;
728 }