added latency tool
[tridge/junkcode.git] / tsm_torture.c
1 /*
2   test program to heavily stress a TSM/HSM system
3
4   Andrew Tridgell January 2008
5
6   compile with:
7
8      gcc -Wall -g -DWITH_GPFS=1 -o tsm_torture{,.c} -lgpfs_gpl -lrt
9
10   If you want to use the -L or -S switches then you must symlink tsm_torture to smbd as 
11   otherwise it won't have permission to set share modes or leases
12
13      ln -s tsm_torture smbd
14
15   and run like this:
16
17     ./smbd /gpfs/data/tsmtest
18
19   where /gpfs/data/tsmtest is the directory to test on
20
21  */
22
23 #define _XOPEN_SOURCE 500
24 #define _GNU_SOURCE 
25
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <sys/stat.h>
31 #include <fcntl.h>
32 #include <string.h>
33 #include <getopt.h>
34 #include <signal.h>
35 #include <utime.h>
36 #include <stdbool.h>
37 #include <sys/mman.h>
38 #include <sys/time.h>
39 #include <sys/wait.h>
40 #include <time.h>
41 #include <aio.h>
42 #if WITH_GPFS
43 #include "gpfs_gpl.h"
44 #endif
45
46 /* The signal we'll use to signify aio done. */
47 #ifndef RT_SIGNAL_AIO
48 #define RT_SIGNAL_AIO (SIGRTMIN+3)
49 #endif
50
51 static struct {
52         bool use_lease;
53         bool use_sharemode;
54         unsigned nprocesses;
55         unsigned nfiles;
56         unsigned timelimit;
57         unsigned fsize;
58         const char *dir;
59         const char *migrate_cmd;
60         bool use_aio;
61         uid_t io_uid;
62         bool skip_file_creation;
63 } options = {
64         .use_lease     = false,
65         .use_sharemode = false,
66         .nprocesses    = 10,
67         .nfiles        = 10,
68         .fsize         = 8192,
69         .timelimit     = 30,
70         .migrate_cmd   = "dsmmigrate",
71         .use_aio       = false,
72         .skip_file_creation = false,
73 };
74
75 static pid_t parent_pid;
76
77 enum offline_op {OP_LOADFILE, OP_SAVEFILE, OP_MIGRATE, OP_GETOFFLINE, OP_ENDOFLIST};
78
79 struct child {
80         unsigned offline_count;
81         unsigned online_count;
82         unsigned migrate_fail_count;
83         unsigned io_fail_count;
84         unsigned migrate_ok_count;
85         unsigned count;
86         unsigned lastcount;
87         struct timeval tv_start;
88         double latencies[OP_ENDOFLIST];
89         double worst_latencies[OP_ENDOFLIST];
90         pid_t pid;
91         enum offline_op op;
92 };
93
94 static struct timeval tv_start;
95 static struct child *children;
96
97 static unsigned char *buf;
98
99 /* return a pointer to a /dev/zero shared memory segment of size "size"
100    which will persist across fork() but will disappear when all processes
101    exit 
102
103    The memory is zeroed automatically
104
105    This relies on /dev/zero being shared mmap capable, which it is
106    only under some OSes (Linux 2.1 _not_ included)
107  */
108 void *shm_setup(int size)
109 {
110         void *ret;
111         int fd;
112
113         fd = open("/dev/zero", O_RDWR);
114         if (fd == -1) {
115                 return NULL;
116         }
117         ret = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
118
119         /* note that we don't need to keep the file open */
120         close(fd);
121         if (ret == (void *)-1) return NULL;
122         return ret;
123 }
124
125
126 static struct timeval timeval_current(void)
127 {
128         struct timeval tv;
129         gettimeofday(&tv, NULL);
130         return tv;
131 }
132
133 static double timeval_elapsed(struct timeval *tv)
134 {
135         struct timeval tv2 = timeval_current();
136         return (tv2.tv_sec - tv->tv_sec) + 
137                (tv2.tv_usec - tv->tv_usec)*1.0e-6;
138 }
139
140 /*
141   file name given a number
142  */
143 static char *filename(int i)
144 {
145         char *s = NULL;
146         asprintf(&s, "%s/file%u.dat", options.dir, (unsigned)i);
147         return s;
148 }
149
150 static void sigio_handler(int sig)
151 {
152         printf("\nGot SIGIO\n");
153 }
154
155 static volatile bool signal_received;
156
157 static void signal_handler(int sig)
158 {
159         signal_received = true;
160 }
161
162 /* simulate pread using aio */
163 static ssize_t pread_aio(int fd, void *buf, size_t count, off_t offset)
164 {
165         struct aiocb acb;
166         int ret;
167
168         memset(&acb, 0, sizeof(acb));
169
170         acb.aio_fildes = fd;
171         acb.aio_buf = buf;
172         acb.aio_nbytes = count;
173         acb.aio_offset = offset;
174         acb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
175         acb.aio_sigevent.sigev_signo  = RT_SIGNAL_AIO;
176         acb.aio_sigevent.sigev_value.sival_int = 1;
177
178         signal(RT_SIGNAL_AIO, signal_handler);
179         signal_received = 0;
180
181         if (options.io_uid) {
182                 if (seteuid(options.io_uid) != 0) {
183                         printf("\nFailed to become uid %u\n", options.io_uid);
184                         exit(1);
185                 }
186         }
187         if (aio_read(&acb) != 0) {
188                 return -1;
189         }
190         if (options.io_uid) {
191                 if (seteuid(0) != 0) {
192                         printf("\nFailed to become root\n");
193                         exit(1);
194                 }
195         }
196
197         while (signal_received == 0) {
198                 usleep(500);
199         }
200
201         ret = aio_error(&acb);
202         if (ret != 0) {
203                 printf("\naio operation failed - %s\n", strerror(ret));
204                 return -1;
205         }
206
207         return aio_return(&acb);        
208 }
209
210
211 /* simulate pwrite using aio */
212 static ssize_t pwrite_aio(int fd, void *buf, size_t count, off_t offset)
213 {
214         struct aiocb acb;
215         int ret;
216
217         memset(&acb, 0, sizeof(acb));
218
219         acb.aio_fildes = fd;
220         acb.aio_buf = buf;
221         acb.aio_nbytes = count;
222         acb.aio_offset = offset;
223         acb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
224         acb.aio_sigevent.sigev_signo  = RT_SIGNAL_AIO;
225         acb.aio_sigevent.sigev_value.sival_int = 1;
226
227         signal(RT_SIGNAL_AIO, signal_handler);
228         signal_received = 0;
229
230         if (options.io_uid) {
231                 if (seteuid(options.io_uid) != 0) {
232                         printf("\nFailed to become uid %u\n", options.io_uid);
233                         exit(1);
234                 }
235         }
236         if (aio_write(&acb) != 0) {
237                 return -1;
238         }
239         if (options.io_uid) {
240                 if (seteuid(0) != 0) {
241                         printf("\nFailed to become root\n");
242                         exit(1);
243                 }
244         }
245
246         while (signal_received == 0) {
247                 usleep(500);
248         }
249
250         ret = aio_error(&acb);
251         if (ret != 0) {
252                 printf("\naio operation failed - %s\n", strerror(ret));
253                 return -1;
254         }
255
256         return aio_return(&acb);        
257 }
258
259 /* 
260    load a file 
261  */
262 static void child_loadfile(struct child *child, const char *fname, unsigned fnumber)
263 {
264         int fd, i;
265         ssize_t ret;
266
267         signal(SIGIO, sigio_handler);
268
269         fd = open(fname, O_RDONLY);
270         if (fd == -1) {
271                 perror(fname);
272                 exit(1);
273         }
274
275 #if WITH_GPFS
276         if (options.use_lease && gpfs_set_lease(fd, GPFS_LEASE_READ) != 0) {
277                 printf("\ngpfs_set_lease on '%s' - %s\n", fname, strerror(errno));
278                 close(fd);
279                 return;
280         }
281         if (options.use_sharemode && gpfs_set_share(fd, 1, 2) != 0) {
282                 printf("\ngpfs_set_share on '%s' - %s\n", fname, strerror(errno));
283                 close(fd);
284                 return;
285         }
286 #endif
287
288         if (options.use_aio) {
289                 ret = pread_aio(fd, buf, options.fsize, 0);
290         } else {
291                 ret = pread(fd, buf, options.fsize, 0);
292         }
293         if (ret != options.fsize) {
294                 if (child->io_fail_count == 0) {
295                         printf("\npread failed on '%s' - %s\n", fname, strerror(errno));
296                 }
297                 child->io_fail_count++;
298                 close(fd);
299                 return;
300         }
301
302         for (i=0;i<options.fsize;i++) {
303                 if (buf[i] != fnumber % 256) {
304                         printf("\nBad data %u - expected %u for '%s'\n",
305                                buf[i], fnumber%256, fname);
306                         exit(1);
307                 }
308         }
309
310         close(fd);
311 }
312
313
314 /* 
315    save a file 
316  */
317 static void child_savefile(struct child *child, const char *fname, unsigned fnumber)
318 {
319         int fd;
320         int ret;
321
322         signal(SIGIO, sigio_handler);
323
324         fd = open(fname, O_WRONLY);
325         if (fd == -1) {
326                 perror(fname);
327                 exit(1);
328         }
329
330 #if WITH_GPFS
331         if (options.use_lease && gpfs_set_lease(fd, GPFS_LEASE_WRITE) != 0) {
332                 printf("\ngpfs_set_lease on '%s' - %s\n", fname, strerror(errno));
333                 close(fd);
334                 return;
335         }
336         if (options.use_sharemode && gpfs_set_share(fd, 1, 2) != 0) {
337                 printf("\ngpfs_set_share on '%s' - %s\n", fname, strerror(errno));
338                 close(fd);
339                 return;
340         }
341 #endif
342
343         memset(buf, fnumber%256, options.fsize);
344
345         if (options.use_aio) {
346                 ret = pwrite_aio(fd, buf, options.fsize, 0);
347         } else {
348                 ret = pwrite(fd, buf, options.fsize, 0);
349         }
350         if (ret != options.fsize) {
351                 if (child->io_fail_count == 0) {
352                         printf("\npwrite failed on '%s' - %s\n", fname, strerror(errno));
353                 }
354                 child->io_fail_count++;
355                 close(fd);
356                 return;
357         }
358
359         fsync(fd);
360         close(fd);
361 }
362
363 /* 
364    get file offline status
365  */
366 static void child_getoffline(struct child *child, const char *fname)
367 {
368         struct stat st;
369         if (stat(fname, &st) != 0) {
370                 printf("\nFailed to stat '%s' - %s\n", fname, strerror(errno));
371                 exit(1);
372         }
373         if (st.st_size != options.fsize) {
374                 printf("\nWrong file size for '%s' - %u\n", fname, (unsigned)st.st_size);
375                 exit(1);
376         }
377         if (st.st_blocks == 0) {
378                 child->offline_count++;
379                 if (strcmp(options.migrate_cmd, "/bin/true") == 0) {
380                         printf("\nFile '%s' is offline with no migration command\n", fname);
381                 }
382         } else {
383                 child->online_count++;
384         }
385 }
386
387
388 /* 
389    set a file offline
390  */
391 static void child_migrate(struct child *child, const char *fname)
392 {
393         char *cmd = NULL;
394         int ret;
395         struct utimbuf t;
396         struct stat st;
397
398         if (stat(fname, &st) != 0) {
399                 printf("\nFailed to stat '%s' - %s\n", fname, strerror(errno));
400                 exit(1);
401         }
402         if (st.st_size != options.fsize) {
403                 printf("\nWrong file size for '%s' - %u\n", fname, (unsigned)st.st_size);
404                 exit(1);
405         }
406         if (st.st_blocks == 0) {
407                 /* already offline */
408                 return;
409         }
410
411         /* make the file a bit older so migation works */
412         t.actime = 0;
413         t.modtime = time(NULL) - 60*60;
414         utime(fname, &t);
415
416         asprintf(&cmd, "%s %s > /dev/null 2>&1", options.migrate_cmd, fname);
417         ret = system(cmd);
418         if (ret != -1) {
419                 ret = WEXITSTATUS(ret);
420         }
421         if (ret != 0) {
422                 children->migrate_fail_count++;
423         } else {
424                 children->migrate_ok_count++;
425         }
426         free(cmd);
427 }
428
429 /*
430   main child loop
431  */
432 static void run_child(struct child *child)
433 {
434         srandom(time(NULL) ^ getpid());
435
436         while (1) {
437                 double latency;
438                 unsigned fnumber = random() % options.nfiles;
439                 char *fname = filename(fnumber);
440
441                 if (kill(parent_pid, 0) != 0) {
442                         /* parent has exited */
443                         exit(0);
444                 }
445
446                 child->tv_start = timeval_current();
447
448                 child->op = random() % OP_ENDOFLIST;
449                 switch (child->op) {
450                 case OP_LOADFILE:
451                         child_loadfile(child, fname, fnumber);
452                         break;
453                 case OP_SAVEFILE:
454                         child_savefile(child, fname, fnumber);
455                         break;
456                 case OP_MIGRATE:
457                         child_migrate(child, fname);
458                         break;
459                 case OP_GETOFFLINE:
460                         child_getoffline(child, fname);
461                         break;
462                 case OP_ENDOFLIST:
463                         break;
464                 }
465
466                 latency = timeval_elapsed(&child->tv_start);
467                 if (latency > child->latencies[child->op]) {
468                         child->latencies[child->op] = latency;
469                 }
470                 if (latency > child->worst_latencies[child->op]) {
471                         child->worst_latencies[child->op] = latency;
472                 }
473                 child->count++;
474
475                 free(fname);
476         }
477 }
478
479 static void sig_alarm(int sig)
480 {
481         int i, op;
482         unsigned total=0, total_offline=0, total_online=0, 
483                 total_migrate_failures=0, total_migrate_ok=0,
484                 total_io_failures=0;
485         double latencies[OP_ENDOFLIST];
486         double worst_latencies[OP_ENDOFLIST];
487         
488         if (timeval_elapsed(&tv_start) >= options.timelimit) {
489                 printf("\ntimelimit reached - killing children\n");
490                 for (i=0;i<options.nprocesses;i++) {
491                         kill(children[i].pid, SIGTERM);
492                 }
493         }
494
495         for (op=0;op<OP_ENDOFLIST;op++) {
496                 latencies[op] = 0;
497                 worst_latencies[op] = 0;
498         }
499
500         for (i=0;i<options.nprocesses;i++) {
501                 total += children[i].count - children[i].lastcount;
502                 children[i].lastcount = children[i].count;              
503                 total_online += children[i].online_count;
504                 total_offline += children[i].offline_count;
505                 total_migrate_failures += children[i].migrate_fail_count;
506                 total_io_failures += children[i].io_fail_count;
507                 total_migrate_ok += children[i].migrate_ok_count;
508                 for (op=0;op<OP_ENDOFLIST;op++) {
509                         if (children[i].latencies[op] > latencies[op]) {
510                                 latencies[op] = children[i].latencies[op];
511                         }
512                         children[i].latencies[op] = 0;
513                 }
514                 if (timeval_elapsed(&children[i].tv_start) > latencies[children[i].op]) {
515                         double lat;
516                         lat = timeval_elapsed(&children[i].tv_start);
517                         latencies[children[i].op] = lat;
518                         if (lat > worst_latencies[children[i].op]) {
519                                 worst_latencies[children[i].op] = lat;
520                         }
521                 }
522                 for (op=0;op<OP_ENDOFLIST;op++) {
523                         double lat = children[i].worst_latencies[op];
524                         if (lat > worst_latencies[op]) {
525                                 worst_latencies[op] = lat;
526                         }
527                 }
528         }
529
530         printf("ops/s=%4u offline=%u/%u  failures: mig=%u io=%u  latencies: mig=%.1f/%.1f stat=%.1f/%.1f write=%.1f/%.1f read=%.1f/%.1f                \r",
531                total, total_offline, total_online+total_offline, 
532                total_migrate_failures,
533                total_io_failures,
534                latencies[OP_MIGRATE], worst_latencies[OP_MIGRATE],
535                latencies[OP_GETOFFLINE], worst_latencies[OP_GETOFFLINE],
536                latencies[OP_SAVEFILE], worst_latencies[OP_SAVEFILE],
537                latencies[OP_LOADFILE], worst_latencies[OP_LOADFILE]);
538         fflush(stdout);
539         signal(SIGALRM, sig_alarm);
540         alarm(1);
541 }
542
543 static void usage(void)
544 {
545         printf("Usage: (note, must run as 'smbd' to use leases or share modes)\n");
546         printf("ln -sf tsm_torture smbd\n");
547         printf("./smbd [options] <directory>\n");
548         printf("Options:\n");
549         printf("  -N <nprocs>  number of child processes\n");
550         printf("  -F <nfiles>  number of files\n");
551         printf("  -t <time>    runtime (seconds)\n");
552         printf("  -s <fsize>   file size (bytes)\n");
553         printf("  -M <migrate> set file migrate command\n");
554         printf("  -U <uid>     do IO as the specified uid\n");
555         printf("  -L           use gpfs leases\n");
556         printf("  -S           use gpfs sharemodes\n");
557         printf("  -A           use Posix async IO\n");
558         printf("  -C           skip file creation\n");
559         exit(0);
560 }
561
562 int main(int argc, char * const argv[])
563 {
564         int opt, i;
565         const char *progname = argv[0];
566         struct stat st;
567
568         /* parse command-line options */
569         while ((opt = getopt(argc, argv, "LSN:F:t:s:M:U:AhC")) != -1) {
570                 switch (opt){
571                 case 'L':
572                         options.use_lease = true;
573                         break;
574                 case 'S':
575                         options.use_sharemode = true;
576                         break;
577                 case 'A':
578                         options.use_aio = true;
579                         break;
580                 case 'C':
581                         options.skip_file_creation = true;
582                         break;
583                 case 'N':
584                         options.nprocesses = atoi(optarg);
585                         break;
586                 case 'F':
587                         options.nfiles = atoi(optarg);
588                         break;
589                 case 'M':
590                         options.migrate_cmd = strdup(optarg);
591                         break;
592                 case 's':
593                         options.fsize = atoi(optarg);
594                         break;
595                 case 'U':
596                         options.io_uid = atoi(optarg);
597                         break;
598                 case 't':
599                         options.timelimit = atoi(optarg);
600                         break;
601                 default:
602                         usage();
603                         break;
604                 }
605         }
606
607         if ((options.use_lease || options.use_sharemode) && strstr(progname, "smbd") == NULL) {
608                 printf("ERROR: you must invoke as smbd to use leases or share modes - use a symlink\n");
609                 exit(1);
610         }
611         
612
613         argv += optind;
614         argc -= optind;
615
616         if (argc == 0) {
617                 usage();
618         }
619
620         options.dir = argv[0];
621
622         if (stat(options.dir, &st) != 0 || !S_ISDIR(st.st_mode)) {
623                 printf("'%s' must exist and be a directory\n", options.dir);
624                 exit(1);
625         }
626
627         children = shm_setup(sizeof(*children) * options.nprocesses);
628
629
630         buf = malloc(options.fsize);
631
632         if (!options.skip_file_creation) {
633                 printf("Creating %u files of size %u in '%s'\n", 
634                        options.nfiles, options.fsize, options.dir);
635
636                 for (i=0;i<options.nfiles;i++) {
637                         int fd;
638                         char *fname = filename(i);
639                         fd = open(fname, O_CREAT|O_RDWR, 0600);
640                         if (fd == -1) {
641                                 perror(fname);
642                                 exit(1);
643                         }
644                         ftruncate(fd, options.fsize);
645                         memset(buf, i%256, options.fsize);
646                         if (write(fd, buf, options.fsize) != options.fsize) {
647                                 printf("Failed to write '%s'\n", fname);
648                                 exit(1);
649                         }
650                         fsync(fd);
651                         close(fd);
652                         free(fname);
653                 }
654         }
655
656         parent_pid = getpid();
657
658         printf("Starting %u child processes for %u seconds\n", 
659                options.nprocesses, options.timelimit);
660         printf("Results shown as: offline=numoffline/total latencies: current/worst\n");
661
662         for (i=0;i<options.nprocesses;i++) {
663                 pid_t pid = fork();
664                 if (pid == 0) {
665                         children[i].pid = getpid();
666                         run_child(&children[i]);
667                 } else {
668                         children[i].pid = pid;
669                 }
670         }
671
672         /* show status once a second */
673         signal(SIGALRM, sig_alarm);
674         tv_start = timeval_current();
675         alarm(1);
676
677         /* wait for the children to finish */
678         for (i=0;i<options.nprocesses;i++) {
679                 while (waitpid(children[i].pid, 0, 0) != 0 && errno != ECHILD) ;
680         }       
681
682         return 0;
683 }