add a mechanism to use random LBA in scsi read/write
[tridge/dbench.git] / dbench.c
1 /* 
2    Copyright (C) by Andrew Tridgell <tridge@samba.org> 1999-2007
3    Copyright (C) 2001 by Martin Pool <mbp@samba.org>
4    
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9    
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14    
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, see <http://www.gnu.org/licenses/>.
17 */
18
19 /* TODO: We could try allowing for different flavours of synchronous
20    operation: data sync and so on.  Linux apparently doesn't make any
21    distinction, however, and for practical purposes it probably
22    doesn't matter.  On NFSv4 it might be interesting, since the client
23    can choose what kind it wants for each OPEN operation. */
24
25 #include "dbench.h"
26 #include "popt.h"
27 #include <sys/sem.h>
28
29 struct options options = {
30         .backend             = "fileio",
31         .timelimit           = 600,
32         .loadfile            = DATADIR "/client.txt",
33         .directory           = ".",
34         .tcp_options         = TCP_OPTIONS,
35         .nprocs              = 10,
36         .sync_open           = 0,
37         .sync_dirs           = 0,
38         .do_fsync            = 0,
39         .fsync_frequency     = 0,
40         .warmup              = -1,
41         .targetrate          = 0.0,
42         .ea_enable           = 0,
43         .clients_per_process = 1,
44         .server              = "localhost",
45         .export              = "/tmp",
46         .protocol            = "tcp",
47         .run_once            = 0,
48         .allow_scsi_writes   = 0,
49         .trunc_io            = 0,
50 };
51
52 static struct timeval tv_start;
53 static struct timeval tv_end;
54 static int barrier=-1;
55 static double throughput;
56 struct nb_operations *nb_ops;
57
58 static FILE *open_loadfile(void)
59 {
60         FILE            *f;
61
62         if ((f = fopen(options.loadfile, "rt")) != NULL)
63                 return f;
64
65         fprintf(stderr,
66                 "dbench: error opening '%s': %s\n", options.loadfile,
67                 strerror(errno));
68
69         return NULL;
70 }
71
72
73 static struct child_struct *children;
74
75 static void sem_cleanup() {
76         if (!(barrier==-1)) 
77                 semctl(barrier,0,IPC_RMID);
78 }
79
80 static void sig_alarm(int sig)
81 {
82         double total_bytes = 0;
83         int total_lines = 0;
84         int i;
85         int nclients = options.nprocs * options.clients_per_process;
86         int in_warmup = 0;
87         double t;
88         static int in_cleanup;
89         double latency;
90         struct timeval tnow;
91         int num_active = 0;
92         int num_finished = 0;
93         (void)sig;
94
95         tnow = timeval_current();
96
97         for (i=0;i<nclients;i++) {
98                 total_bytes += children[i].bytes - children[i].bytes_done_warmup;
99                 if (children[i].bytes == 0) {
100                         in_warmup = 1;
101                 } else {
102                         num_active++;
103                 }
104                 total_lines += children[i].line;
105                 if (children[i].cleanup_finished) {
106                         num_finished++;
107                 }
108         }
109
110         t = timeval_elapsed(&tv_start);
111
112         if (!in_warmup && options.warmup>0 && t > options.warmup) {
113                 tv_start = tnow;
114                 options.warmup = 0;
115                 for (i=0;i<nclients;i++) {
116                         children[i].bytes_done_warmup = children[i].bytes;
117                         children[i].worst_latency = 0;
118                         memset(&children[i].ops, 0, sizeof(children[i].ops));
119                 }
120                 goto next;
121         }
122         if (t < options.warmup) {
123                 in_warmup = 1;
124         } else if (!in_warmup && !in_cleanup && t > options.timelimit) {
125                 for (i=0;i<nclients;i++) {
126                         children[i].done = 1;
127                 }
128                 tv_end = tnow;
129                 in_cleanup = 1;
130         }
131         if (t < 1) {
132                 goto next;
133         }
134
135         latency = 0;
136         if (!in_cleanup) {
137                 for (i=0;i<nclients;i++) {
138                         latency = MAX(children[i].max_latency, latency);
139                         latency = MAX(latency, timeval_elapsed2(&children[i].lasttime, &tnow));
140                         children[i].max_latency = 0;
141                         if (latency > children[i].worst_latency) {
142                                 children[i].worst_latency = latency;
143                         }
144                 }
145         }
146
147         if (in_warmup) {
148                 printf("%4d  %8d  %7.2f MB/sec  warmup %3.0f sec  latency %.03f ms\n", 
149                        num_active, total_lines/nclients, 
150                        1.0e-6 * total_bytes / t, t, latency*1000);
151         } else if (in_cleanup) {
152                 printf("%4d  cleanup %3.0f sec\n", nclients - num_finished, t);
153         } else {
154                 printf("%4d  %8d  %7.2f MB/sec  execute %3.0f sec  latency %.03f ms\n", 
155                        nclients, total_lines/nclients, 
156                        1.0e-6 * total_bytes / t, t, latency*1000);
157                 throughput = 1.0e-6 * total_bytes / t;
158         }
159
160         fflush(stdout);
161 next:
162         signal(SIGALRM, sig_alarm);
163         alarm(PRINT_FREQ);
164 }
165
166
167 static void show_one_latency(struct op *ops, struct op *ops_all)
168 {
169         int i;
170         printf(" Operation                Count    AvgLat    MaxLat\n");
171         printf(" --------------------------------------------------\n");
172         for (i=0;nb_ops->ops[i].name;i++) {
173                 struct op *op1, *op_all;
174                 op1    = &ops[i];
175                 op_all = &ops_all[i];
176                 if (op_all->count == 0) continue;
177                 printf(" %-22s %7u %9.03f %9.03f\n",
178                        nb_ops->ops[i].name, op1->count, 
179                        1000*op1->total_time/op1->count,
180                        op1->max_latency*1000);
181         }
182         printf("\n");
183 }
184
185 static void report_latencies(void)
186 {
187         struct op sum[MAX_OPS];
188         int i, j;
189         struct op *op1, *op2;
190         struct child_struct *child;
191
192         memset(sum, 0, sizeof(sum));
193         for (i=0;nb_ops->ops[i].name;i++) {
194                 op1 = &sum[i];
195                 for (j=0;j<options.nprocs * options.clients_per_process;j++) {
196                         child = &children[j];
197                         op2 = &child->ops[i];
198                         op1->count += op2->count;
199                         op1->total_time += op2->total_time;
200                         op1->max_latency = MAX(op1->max_latency, op2->max_latency);
201                 }
202         }
203         show_one_latency(sum, sum);
204
205         if (!options.per_client_results) {
206                 return;
207         }
208
209         printf("Per client results:\n");
210         for (i=0;i<options.nprocs * options.clients_per_process;i++) {
211                 child = &children[i];
212                 printf("Client %u did %u lines and %.0f bytes\n", 
213                        i, child->line, child->bytes - child->bytes_done_warmup);
214                 show_one_latency(child->ops, sum);              
215         }
216 }
217
218 /* this creates the specified number of child processes and runs fn()
219    in all of them */
220 static void create_procs(int nprocs, void (*fn)(struct child_struct *, const char *))
221 {
222         int nclients = nprocs * options.clients_per_process;
223         int i, status;
224         int synccount;
225         struct timeval tv;
226         FILE *load;
227         struct sembuf sbuf;
228         double t;
229
230         load = open_loadfile();
231         if (load == NULL) {
232                 exit(1);
233         }
234
235         if (nprocs < 1) {
236                 fprintf(stderr,
237                         "create %d procs?  you must be kidding.\n",
238                         nprocs);
239                 return;
240         }
241
242         children = shm_setup(sizeof(struct child_struct)*nclients);
243         if (!children) {
244                 printf("Failed to setup shared memory\n");
245                 return;
246         }
247
248         memset(children, 0, sizeof(*children)*nclients);
249
250         for (i=0;i<nclients;i++) {
251                 children[i].id = i;
252                 children[i].cleanup = 0;
253                 children[i].directory = options.directory;
254                 children[i].starttime = timeval_current();
255                 children[i].lasttime = timeval_current();
256         }
257
258         if (atexit(sem_cleanup) != 0) {
259                 printf("can't register cleanup function on exit\n");
260                 exit(1);
261         }
262         sbuf.sem_num =  0;
263         if ( !(barrier = semget(IPC_PRIVATE,1,IPC_CREAT | S_IRUSR | S_IWUSR)) ) {
264                 printf("failed to create barrier semaphore \n");
265         }
266         sbuf.sem_flg =  SEM_UNDO;
267         sbuf.sem_op  =  1;
268         if (semop(barrier, &sbuf, 1) == -1) {
269                 printf("failed to initialize the barrier semaphore\n");
270                 exit(1);
271         }
272         sbuf.sem_flg =  0;
273
274         for (i=0;i<nprocs;i++) {
275                 if (fork() == 0) {
276                         int j;
277
278                         setlinebuf(stdout);
279                         srandom(getpid() ^ time(NULL));
280
281                         for (j=0;j<options.clients_per_process;j++) {
282                                 nb_ops->setup(&children[i*options.clients_per_process + j]);
283                         }
284
285                         sbuf.sem_op = 0;
286                         if (semop(barrier, &sbuf, 1) == -1) {
287                                 printf("failed to use the barrier semaphore in child %d\n",getpid());
288                                 exit(1);
289                         }
290
291                         semctl(barrier,0,IPC_RMID);
292
293                         fn(&children[i*options.clients_per_process], options.loadfile);
294                         _exit(0);
295                 }
296         }
297
298         synccount = 0;
299         tv = timeval_current();
300         do {
301                 synccount = semctl(barrier,0,GETZCNT);
302                 t = timeval_elapsed(&tv);
303                 printf("%d of %d processes prepared for launch %3.0f sec\n", synccount, nprocs, t);
304                 if (synccount == nprocs) break;
305                 usleep(100*1000);
306         } while (timeval_elapsed(&tv) < 30);
307
308         if (synccount != nprocs) {
309                 printf("FAILED TO START %d CLIENTS (started %d)\n", nprocs, synccount);
310                 return;
311         }
312
313         printf("releasing clients\n");
314         tv_start = timeval_current();
315         sbuf.sem_op  =  -1;
316         if (semop(barrier, &sbuf, 1) == -1) {
317                 printf("failed to release barrier\n");
318                 exit(1);
319         }
320
321         semctl(barrier,0,IPC_RMID);
322
323         signal(SIGALRM, sig_alarm);
324         alarm(PRINT_FREQ);
325
326         for (i=0;i<nprocs;) {
327                 if (waitpid(0, &status, 0) == -1) continue;
328                 if (WEXITSTATUS(status) != 0) {
329                         printf("Child failed with status %d\n",
330                                WEXITSTATUS(status));
331                         exit(1);
332                 }
333                 i++;
334         }
335
336         alarm(0);
337         sig_alarm(SIGALRM);
338
339         printf("\n");
340
341         report_latencies();
342 }
343
344
345
346 static void process_opts(int argc, const char **argv)
347 {
348         const char **extra_argv;
349         int extra_argc = 0;
350         struct poptOption popt_options[] = {
351                 POPT_AUTOHELP
352                 { "backend", 'B', POPT_ARG_STRING, &options.backend, 0, 
353                   "dbench backend (fileio, sockio, nfs)", "string" },
354                 { "timelimit", 't', POPT_ARG_INT, &options.timelimit, 0, 
355                   "timelimit", "integer" },
356                 { "loadfile",  'c', POPT_ARG_STRING, &options.loadfile, 0, 
357                   "loadfile", "filename" },
358                 { "directory", 'D', POPT_ARG_STRING, &options.directory, 0, 
359                   "working directory", NULL },
360                 { "tcp-options", 'T', POPT_ARG_STRING, &options.tcp_options, 0, 
361                   "TCP socket options", NULL },
362                 { "target-rate", 'R', POPT_ARG_DOUBLE, &options.targetrate, 0, 
363                   "target throughput (MB/sec)", NULL },
364                 { "sync", 's', POPT_ARG_NONE, &options.sync_open, 0, 
365                   "use O_SYNC", NULL },
366                 { "sync-dir", 'S', POPT_ARG_NONE, &options.sync_dirs, 0, 
367                   "sync directory changes", NULL },
368                 { "fsync", 'F', POPT_ARG_NONE, &options.do_fsync, 0, 
369                   "fsync on write", NULL },
370                 { "xattr", 'x', POPT_ARG_NONE, &options.ea_enable, 0, 
371                   "use xattrs", NULL },
372                 { "no-resolve", 0, POPT_ARG_NONE, &options.no_resolve, 0, 
373                   "disable name resolution simulation", NULL },
374                 { "clients-per-process", 0, POPT_ARG_INT, &options.clients_per_process, 0, 
375                   "number of clients per process", NULL },
376                 { "trunc-io", 0, POPT_ARG_INT, &options.trunc_io, 0, 
377                   "truncate all io to this size", NULL },
378                 { "one-byte-write-fix", 0, POPT_ARG_NONE, &options.one_byte_write_fix, 0, 
379                   "try to fix 1 byte writes", NULL },
380                 { "stat-check", 0, POPT_ARG_NONE, &options.stat_check, 0, 
381                   "check for pointless calls with stat", NULL },
382                 { "fake-io", 0, POPT_ARG_NONE, &options.fake_io, 0, 
383                   "fake up read/write calls", NULL },
384                 { "skip-cleanup", 0, POPT_ARG_NONE, &options.skip_cleanup, 0, 
385                   "skip cleanup operations", NULL },
386                 { "per-client-results", 0, POPT_ARG_NONE, &options.per_client_results, 0, 
387                   "show results per client", NULL },
388                 { "server",  0, POPT_ARG_STRING, &options.server, 0, 
389                   "server", NULL },
390                 { "export",  0, POPT_ARG_STRING, &options.export, 0, 
391                   "export", NULL },
392                 { "protocol",  0, POPT_ARG_STRING, &options.protocol, 0, 
393                   "protocol", NULL },
394                 { "run-once", 0, POPT_ARG_NONE, &options.run_once, 0,
395                   "Stop once reaching the end of the loadfile", NULL},
396                 { "scsi",  0, POPT_ARG_STRING, &options.scsi_dev, 0, 
397                   "scsi device", NULL },
398                 { "allow-scsi-writes", 0, POPT_ARG_NONE, &options.allow_scsi_writes, 0,
399                   "Allow SCSI write command to the device", NULL},
400                 { "warmup", 0, POPT_ARG_INT, &options.warmup, 0, 
401                   "How meny seconds of warmup to run", NULL },
402                 POPT_TABLEEND
403         };
404         poptContext pc;
405         int opt;
406
407         pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
408
409         while ((opt = poptGetNextOpt(pc)) != -1) {
410                 if (strcmp(poptBadOption(pc, 0), "-h") == 0) {
411                         poptPrintHelp(pc, stdout, 0);
412                         exit(1);
413                 }
414                 fprintf(stderr, "Invalid option %s: %s\n", 
415                         poptBadOption(pc, 0), poptStrerror(opt));
416                 exit(1);
417         }
418
419         /* setup the remaining options for the main program to use */
420         extra_argv = poptGetArgs(pc);
421         if (extra_argv) {
422                 extra_argv++;
423                 while (extra_argv[extra_argc]) extra_argc++;
424         }
425
426         if (extra_argc < 1) {
427                 printf("You need to specify NPROCS\n");
428                 poptPrintHelp(pc, stdout, 0);
429                 exit(1);
430         }
431
432 #ifndef HAVE_EA_SUPPORT
433         if (options.ea_enable) {
434                 printf("EA suppport not compiled in\n");
435                 exit(1);
436         }
437 #endif
438         
439         options.nprocs = atoi(extra_argv[0]);
440
441         if (extra_argc >= 2) {
442                 options.server = extra_argv[1];
443         }
444 }
445
446
447
448  int main(int argc, const char *argv[])
449 {
450         double total_bytes = 0;
451         double t, latency=0;
452         int i;
453
454         setlinebuf(stdout);
455
456         printf("dbench version %s - Copyright Andrew Tridgell 1999-2004\n\n", VERSION);
457
458         if (strstr(argv[0], "dbench")) {
459                 options.backend = "fileio";
460         } else if (strstr(argv[0], "tbench")) {
461                 options.backend = "sockio";
462         } else if (strstr(argv[0], "nfsbench")) {
463                 options.backend = "nfs";
464         } else if (strstr(argv[0], "scsibench")) {
465                 options.backend = "scsi";
466         }
467
468         process_opts(argc, argv);
469
470         if (strcmp(options.backend, "fileio") == 0) {
471                 extern struct nb_operations fileio_ops;
472                 nb_ops = &fileio_ops;
473         } else if (strcmp(options.backend, "sockio") == 0) {
474                 extern struct nb_operations sockio_ops;
475                 nb_ops = &sockio_ops;
476         } else if (strcmp(options.backend, "nfs") == 0) {
477                 extern struct nb_operations nfs_ops;
478                 nb_ops = &nfs_ops;
479 #ifdef HAVE_LINUX_SCSI_SG
480         } else if (strcmp(options.backend, "scsi") == 0) {
481                 extern struct nb_operations scsi_ops;
482                 nb_ops = &scsi_ops;
483 #endif /* HAVE_LINUX_SCSI_SG */
484         } else {
485                 printf("Unknown backend '%s'\n", options.backend);
486                 exit(1);
487         }
488
489         if (options.warmup == -1) {
490                 options.warmup = options.timelimit / 5;
491         }
492
493         printf("Running for %d seconds with load '%s' and minimum warmup %d secs\n", 
494                options.timelimit, options.loadfile, options.warmup);
495
496         create_procs(options.nprocs, child_run);
497
498         for (i=0;i<options.nprocs*options.clients_per_process;i++) {
499                 total_bytes += children[i].bytes - children[i].bytes_done_warmup;
500                 latency = MAX(latency, children[i].worst_latency);
501         }
502
503         t = timeval_elapsed2(&tv_start, &tv_end);
504
505         printf("Throughput %g MB/sec%s%s  %d clients  %d procs  max_latency=%.03f ms\n", 
506                throughput,
507                options.sync_open ? " (sync open)" : "",
508                options.sync_dirs ? " (sync dirs)" : "", 
509                options.nprocs*options.clients_per_process,
510                options.nprocs, latency*1000);
511         return 0;
512 }