1 /* Tune the Karatsuba parameters
3 * Tom St Denis, tstdenis82@gmail.com
5 #include "../tommath.h"
6 #include "../tommath_private.h"
12 Please take in mind that both multiplicands are of the same size. The balancing
13 mechanism in mp_balance works well but has some overhead itself. You can test
14 the behaviour of it with the option "-o" followed by a (small) positive number 'x'
15 to generate ratios of the form 1:x.
18 static uint64_t s_timer_function(void);
19 static void s_timer_start(void);
20 static uint64_t s_timer_stop(void);
21 static uint64_t s_time_mul(int size);
22 static uint64_t s_time_sqr(int size);
23 static void s_usage(char *s);
25 static uint64_t s_timer_function(void)
27 #if _POSIX_C_SOURCE >= 199309L
28 #define LTM_BILLION 1000000000
31 /* TODO: Sets errno in case of error. Use? */
32 clock_gettime(CLOCK_MONOTONIC, &ts);
33 return (((uint64_t)ts.tv_sec) * LTM_BILLION + (uint64_t)ts.tv_nsec);
37 if (t < (clock_t)(0)) {
44 /* generic ISO C timer */
45 static uint64_t s_timer_tmp;
46 static void s_timer_start(void)
48 s_timer_tmp = s_timer_function();
50 static uint64_t s_timer_stop(void)
52 return s_timer_function() - s_timer_tmp;
56 static int s_check_result;
57 static int s_number_of_test_loops;
58 static int s_stabilization_extra;
59 static int s_offset = 1;
61 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
62 static uint64_t s_time_mul(int size)
69 if ((e = mp_init_multi(&a, &b, &c, &d, NULL)) != MP_OKAY) {
74 if ((e = mp_rand(&a, size * s_offset)) != MP_OKAY) {
78 if ((e = mp_rand(&b, size)) != MP_OKAY) {
84 for (x = 0; x < s_number_of_test_loops; x++) {
85 if ((e = mp_mul(&a,&b,&c)) != MP_OKAY) {
89 if (s_check_result == 1) {
90 if ((e = s_mp_mul(&a,&b,&d)) != MP_OKAY) {
94 if (mp_cmp(&c, &d) != MP_EQ) {
95 /* Time of 0 cannot happen (famous last words?) */
104 mp_clear_multi(&a, &b, &c, &d, NULL);
108 static uint64_t s_time_sqr(int size)
115 if ((e = mp_init_multi(&a, &b, &c, NULL)) != MP_OKAY) {
120 if ((e = mp_rand(&a, size)) != MP_OKAY) {
126 for (x = 0; x < s_number_of_test_loops; x++) {
127 if ((e = mp_sqr(&a,&b)) != MP_OKAY) {
131 if (s_check_result == 1) {
132 if ((e = s_mp_sqr(&a,&c)) != MP_OKAY) {
136 if (mp_cmp(&c, &b) != MP_EQ) {
145 mp_clear_multi(&a, &b, &c, NULL);
155 int upper_limit_print;
159 static void s_run(const char *name, uint64_t (*op)(int), int *cutoff)
163 if ((args.verbose == 1) || (args.testmode == 1)) {
164 printf("# %s.\n", name);
166 for (x = 8; x < args.upper_limit_print; x += args.increment_print) {
169 if ((t1 == 0uLL) || (t1 == UINT64_MAX)) {
170 fprintf(stderr,"%s failed at x = INT_MAX (%s)\n", name,
171 (t1 == 0uLL)?"wrong result":"internal error");
176 if ((t2 == 0uLL) || (t2 == UINT64_MAX)) {
177 fprintf(stderr,"%s failed (%s)\n", name,
178 (t2 == 0uLL)?"wrong result":"internal error");
181 if (args.verbose == 1) {
182 printf("%d: %9"PRIu64" %9"PRIu64", %9"PRIi64"\n", x, t1, t2, (int64_t)t2 - (int64_t)t1);
185 if (count == s_stabilization_extra) {
188 } else if (count < s_stabilization_extra) {
191 } else if (count > 0) {
195 *cutoff = x - s_stabilization_extra * args.increment_print;
198 static long s_strtol(const char *str, char **endptr, const char *err)
204 val = strtol(str, &_endptr, base);
205 if ((val > INT_MAX || val < 0) || (errno != 0)) {
206 fprintf(stderr, "Value %s not usable\n", str);
209 if (_endptr == str) {
210 fprintf(stderr, "%s\n", err);
213 if (endptr) *endptr = _endptr;
217 static int s_exit_code = EXIT_FAILURE;
218 static void s_usage(char *s)
220 fprintf(stderr,"Usage: %s [TvcpGbtrSLFfMmosh]\n",s);
221 fprintf(stderr," -T testmode, for use with testme.sh\n");
222 fprintf(stderr," -v verbose, print all timings\n");
223 fprintf(stderr," -c check results\n");
224 fprintf(stderr," -p print benchmark of final cutoffs in files \"multiplying\"\n");
225 fprintf(stderr," and \"squaring\"\n");
226 fprintf(stderr," -G [string] suffix for the filenames listed above\n");
227 fprintf(stderr," Implies '-p'\n");
228 fprintf(stderr," -b print benchmark of bncore.c\n");
229 fprintf(stderr," -t prints space (0x20) separated results\n");
230 fprintf(stderr," -r [64] number of rounds\n");
231 fprintf(stderr," -S [0xdeadbeef] seed for PRNG\n");
232 fprintf(stderr," -L [3] number of negative values accumulated until the result is accepted\n");
233 fprintf(stderr," -M [3000] upper limit of T-C tests/prints\n");
234 fprintf(stderr," -m [1] increment of T-C tests/prints\n");
235 fprintf(stderr," -o [1] multiplier for the second multiplicand\n");
236 fprintf(stderr," (Not for computing the cut-offs!)\n");
237 fprintf(stderr," -s 'preset' use values in 'preset' for printing.\n");
238 fprintf(stderr," 'preset' is a comma separated string with cut-offs for\n");
239 fprintf(stderr," ksm, kss, tc3m, tc3s in that order\n");
240 fprintf(stderr," ksm = karatsuba multiplication\n");
241 fprintf(stderr," kss = karatsuba squaring\n");
242 fprintf(stderr," tc3m = Toom-Cook 3-way multiplication\n");
243 fprintf(stderr," tc3s = Toom-Cook 3-way squaring\n");
244 fprintf(stderr," Implies '-p'\n");
245 fprintf(stderr," -h this message\n");
250 int KARATSUBA_MUL, KARATSUBA_SQR;
251 int TOOM_MUL, TOOM_SQR;
254 const struct cutoffs max_cutoffs =
255 { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
257 static void set_cutoffs(const struct cutoffs *c)
259 KARATSUBA_MUL_CUTOFF = c->KARATSUBA_MUL;
260 KARATSUBA_SQR_CUTOFF = c->KARATSUBA_SQR;
261 TOOM_MUL_CUTOFF = c->TOOM_MUL;
262 TOOM_SQR_CUTOFF = c->TOOM_SQR;
265 static void get_cutoffs(struct cutoffs *c)
267 c->KARATSUBA_MUL = KARATSUBA_MUL_CUTOFF;
268 c->KARATSUBA_SQR = KARATSUBA_SQR_CUTOFF;
269 c->TOOM_MUL = TOOM_MUL_CUTOFF;
270 c->TOOM_SQR = TOOM_SQR_CUTOFF;
274 int main(int argc, char **argv)
284 uint64_t seed = 0xdeadbeef;
287 struct cutoffs orig, updated;
289 FILE *squaring, *multiplying;
290 char mullog[256] = "multiplying";
291 char sqrlog[256] = "squaring";
292 s_number_of_test_loops = 64;
293 s_stabilization_extra = 3;
295 MP_ZERO_BUFFER(&args, sizeof(args));
303 args.upper_limit_print = 3000;
304 args.increment_print = 1;
306 /* Very simple option parser, please treat it nicely. */
308 for (opt = 1; (opt < argc) && (argv[opt][0] == '-'); opt++) {
309 switch (argv[opt][1]) {
313 args.upper_limit_print = 1000;
314 args.increment_print = 11;
315 s_number_of_test_loops = 1;
316 s_stabilization_extra = 1;
334 /* manual strcat() */
335 for (i = 0; i < 255; i++) {
336 if (mullog[i] == '\0') {
340 for (j = 0; i < 255; j++, i++) {
341 mullog[i] = argv[opt][j];
342 if (argv[opt][j] == '\0') {
346 for (i = 0; i < 255; i++) {
347 if (sqrlog[i] == '\0') {
351 for (j = 0; i < 255; j++, i++) {
352 sqrlog[i] = argv[opt][j];
353 if (argv[opt][j] == '\0') {
371 seed = (uint64_t)s_strtol(argv[opt], NULL, "No seed given?\n");
378 s_stabilization_extra = (int)s_strtol(argv[opt], NULL, "No value for option \"-L\"given");
385 s_offset = (int)s_strtol(argv[opt], NULL, "No value for the offset given");
392 s_number_of_test_loops = (int)s_strtol(argv[opt], NULL, "No value for the number of rounds given");
400 args.upper_limit_print = (int)s_strtol(argv[opt], NULL, "No value for the upper limit of T-C tests given");
407 args.increment_print = (int)s_strtol(argv[opt], NULL, "No value for the increment for the T-C tests given");
417 KARATSUBA_MUL_CUTOFF = (int)s_strtol(str, &endptr, "[1/4] No value for KARATSUBA_MUL_CUTOFF given");
419 KARATSUBA_SQR_CUTOFF = (int)s_strtol(str, &endptr, "[2/4] No value for KARATSUBA_SQR_CUTOFF given");
421 TOOM_MUL_CUTOFF = (int)s_strtol(str, &endptr, "[3/4] No value for TOOM_MUL_CUTOFF given");
423 TOOM_SQR_CUTOFF = (int)s_strtol(str, &endptr, "[4/4] No value for TOOM_SQR_CUTOFF given");
426 s_exit_code = EXIT_SUCCESS;
435 mp_rand uses the cryptographically secure
436 source of the OS by default. That is too expensive, too slow and
437 most important for a benchmark: it is not repeatable.
439 s_mp_rand_jenkins_init(seed);
440 mp_rand_source(s_mp_rand_jenkins);
444 updated = max_cutoffs;
445 if ((args.bncore == 0) && (printpreset == 0)) {
448 int *cutoff, *update;
451 #define T_MUL_SQR(n, o, f) { #n, &o##_CUTOFF, &(updated.o), MP_HAS(S_MP_##o) ? f : NULL }
453 The influence of the Comba multiplication cannot be
454 eradicated programmatically. It depends on the size
455 of the macro MP_WPARRAY in tommath.h which needs to
456 be changed manually (to 0 (zero)).
458 T_MUL_SQR("Karatsuba multiplication", KARATSUBA_MUL, s_time_mul),
459 T_MUL_SQR("Karatsuba squaring", KARATSUBA_SQR, s_time_sqr),
460 T_MUL_SQR("Toom-Cook 3-way multiplying", TOOM_MUL, s_time_mul),
461 T_MUL_SQR("Toom-Cook 3-way squaring", TOOM_SQR, s_time_sqr),
464 /* Turn all limits from bncore.c to the max */
465 set_cutoffs(&max_cutoffs);
466 for (n = 0; n < sizeof(test)/sizeof(test[0]); ++n) {
468 s_run(test[n].name, test[n].fn, test[n].cutoff);
469 *test[n].update = *test[n].cutoff;
470 *test[n].cutoff = INT_MAX;
474 if (args.terse == 1) {
475 printf("%d %d %d %d\n",
476 updated.KARATSUBA_MUL,
477 updated.KARATSUBA_SQR,
481 printf("KARATSUBA_MUL_CUTOFF = %d\n", updated.KARATSUBA_MUL);
482 printf("KARATSUBA_SQR_CUTOFF = %d\n", updated.KARATSUBA_SQR);
483 printf("TOOM_MUL_CUTOFF = %d\n", updated.TOOM_MUL);
484 printf("TOOM_SQR_CUTOFF = %d\n", updated.TOOM_SQR);
487 if (args.print == 1) {
488 printf("Printing data for graphing to \"%s\" and \"%s\"\n",mullog, sqrlog);
490 multiplying = fopen(mullog, "w+");
491 if (multiplying == NULL) {
492 fprintf(stderr, "Opening file \"%s\" failed\n", mullog);
496 squaring = fopen(sqrlog, "w+");
497 if (squaring == NULL) {
498 fprintf(stderr, "Opening file \"%s\" failed\n",sqrlog);
502 for (x = 8; x < args.upper_limit_print; x += args.increment_print) {
503 set_cutoffs(&max_cutoffs);
507 fprintf(multiplying, "%d: %9"PRIu64" %9"PRIu64", %9"PRIi64"\n", x, t1, t2, (int64_t)t2 - (int64_t)t1);
509 if (args.verbose == 1) {
510 printf("MUL %d: %9"PRIu64" %9"PRIu64", %9"PRIi64"\n", x, t1, t2, (int64_t)t2 - (int64_t)t1);
513 set_cutoffs(&max_cutoffs);
517 fprintf(squaring,"%d: %9"PRIu64" %9"PRIu64", %9"PRIi64"\n", x, t1, t2, (int64_t)t2 - (int64_t)t1);
519 if (args.verbose == 1) {
520 printf("SQR %d: %9"PRIu64" %9"PRIu64", %9"PRIi64"\n", x, t1, t2, (int64_t)t2 - (int64_t)t1);
524 printf("Finished. Data for graphing in \"%s\" and \"%s\"\n",mullog, sqrlog);
525 if (args.verbose == 1) {
527 if (args.terse == 1) {
528 printf("%d %d %d %d\n",
529 KARATSUBA_MUL_CUTOFF,
530 KARATSUBA_SQR_CUTOFF,
534 printf("KARATSUBA_MUL_CUTOFF = %d\n", KARATSUBA_MUL_CUTOFF);
535 printf("KARATSUBA_SQR_CUTOFF = %d\n", KARATSUBA_SQR_CUTOFF);
536 printf("TOOM_MUL_CUTOFF = %d\n", TOOM_MUL_CUTOFF);
537 printf("TOOM_SQR_CUTOFF = %d\n", TOOM_SQR_CUTOFF);