source4/heimdal/lib/hcrypto/imath/imath.c

   1 /*
   2   Name:     imath.c
   3   Purpose:  Arbitrary precision integer arithmetic routines.
   4   Author:   M. J. Fromberger <http://spinning-yarns.org/michael/>
   5   Info:     $Id: imath.c 826 2009-02-11 16:21:04Z sting $
   6
   7   Copyright (C) 2002-2007 Michael J. Fromberger, All Rights Reserved.
   8
   9   Permission is hereby granted, free of charge, to any person
  10   obtaining a copy of this software and associated documentation files
  11   (the "Software"), to deal in the Software without restriction,
  12   including without limitation the rights to use, copy, modify, merge,
  13   publish, distribute, sublicense, and/or sell copies of the Software,
  14   and to permit persons to whom the Software is furnished to do so,
  15   subject to the following conditions:
  16
  17   The above copyright notice and this permission notice shall be
  18   included in all copies or substantial portions of the Software.
  19
  20   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  24   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  26   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  27   SOFTWARE.
  28  */
  29
  30 #include "imath.h"
  31
  32 #if DEBUG
  33 #include <stdio.h>
  34 #endif
  35
  36 #include <stdlib.h>
  37 #include <string.h>
  38 #include <ctype.h>
  39
  40 #include <assert.h>
  41
  42 #if DEBUG
  43 #define STATIC /* public */
  44 #else
  45 #define STATIC static
  46 #endif
  47
  48 /* {{{ Constants */
  49
  50 const mp_result MP_OK     = 0;  /* no error, all is well  */
  51 const mp_result MP_FALSE  = 0;  /* boolean false          */
  52 const mp_result MP_TRUE   = -1; /* boolean true           */
  53 const mp_result MP_MEMORY = -2; /* out of memory          */
  54 const mp_result MP_RANGE  = -3; /* argument out of range  */
  55 const mp_result MP_UNDEF  = -4; /* result undefined       */
  56 const mp_result MP_TRUNC  = -5; /* output truncated       */
  57 const mp_result MP_BADARG = -6; /* invalid null argument  */
  58 const mp_result MP_MINERR = -6;
  59
  60 const mp_sign   MP_NEG  = 1;    /* value is strictly negative */
  61 const mp_sign   MP_ZPOS = 0;    /* value is non-negative      */
  62
  63 STATIC const char *s_unknown_err = "unknown result code";
  64 STATIC const char *s_error_msg[] = {
  65   "error code 0",
  66   "boolean true",
  67   "out of memory",
  68   "argument out of range",
  69   "result undefined",
  70   "output truncated",
  71   "invalid argument",
  72   NULL
  73 };
  74
  75 /* }}} */
  76
  77 /* Argument checking macros
  78    Use CHECK() where a return value is required; NRCHECK() elsewhere */
  79 #define CHECK(TEST)   assert(TEST)
  80 #define NRCHECK(TEST) assert(TEST)
  81
  82 /* {{{ Logarithm table for computing output sizes */
  83
  84 /* The ith entry of this table gives the value of log_i(2).
  85
  86    An integer value n requires ceil(log_i(n)) digits to be represented
  87    in base i.  Since it is easy to compute lg(n), by counting bits, we
  88    can compute log_i(n) = lg(n) * log_i(2).
  89
  90    The use of this table eliminates a dependency upon linkage against
  91    the standard math libraries.
  92  */
  93 STATIC const double s_log2[] = {
  94    0.000000000, 0.000000000, 1.000000000, 0.630929754,  /*  0  1  2  3 */
  95    0.500000000, 0.430676558, 0.386852807, 0.356207187,  /*  4  5  6  7 */
  96    0.333333333, 0.315464877, 0.301029996, 0.289064826,  /*  8  9 10 11 */
  97    0.278942946, 0.270238154, 0.262649535, 0.255958025,  /* 12 13 14 15 */
  98    0.250000000, 0.244650542, 0.239812467, 0.235408913,  /* 16 17 18 19 */
  99    0.231378213, 0.227670249, 0.224243824, 0.221064729,  /* 20 21 22 23 */
 100    0.218104292, 0.215338279, 0.212746054, 0.210309918,  /* 24 25 26 27 */
 101    0.208014598, 0.205846832, 0.203795047, 0.201849087,  /* 28 29 30 31 */
 102    0.200000000, 0.198239863, 0.196561632, 0.194959022,  /* 32 33 34 35 */
 103    0.193426404,                                         /* 36          */
 104 };
 105
 106 /* }}} */
 107 /* {{{ Various macros */
 108
 109 /* Return the number of digits needed to represent a static value */
 110 #define MP_VALUE_DIGITS(V) \
 111 ((sizeof(V)+(sizeof(mp_digit)-1))/sizeof(mp_digit))
 112
 113 /* Round precision P to nearest word boundary */
 114 #define ROUND_PREC(P) ((mp_size)(2*(((P)+1)/2)))
 115
 116 /* Set array P of S digits to zero */
 117 #define ZERO(P, S) \
 118 do{mp_size i__=(S)*sizeof(mp_digit);mp_digit *p__=(P);memset(p__,0,i__);}while(0)
 119
 120 /* Copy S digits from array P to array Q */
 121 #define COPY(P, Q, S) \
 122 do{mp_size i__=(S)*sizeof(mp_digit);mp_digit *p__=(P),*q__=(Q);\
 123 memcpy(q__,p__,i__);}while(0)
 124
 125 /* Reverse N elements of type T in array A */
 126 #define REV(T, A, N) \
 127 do{T *u_=(A),*v_=u_+(N)-1;while(u_<v_){T xch=*u_;*u_++=*v_;*v_--=xch;}}while(0)
 128
 129 #define CLAMP(Z) \
 130 do{mp_int z_=(Z);mp_size uz_=MP_USED(z_);mp_digit *dz_=MP_DIGITS(z_)+uz_-1;\
 131 while(uz_ > 1 && (*dz_-- == 0)) --uz_;MP_USED(z_)=uz_;}while(0)
 132
 133 /* Select min/max.  Do not provide expressions for which multiple
 134    evaluation would be problematic, e.g. x++ */
 135 #define MIN(A, B) ((B)<(A)?(B):(A))
 136 #define MAX(A, B) ((B)>(A)?(B):(A))
 137
 138 /* Exchange lvalues A and B of type T, e.g.
 139    SWAP(int, x, y) where x and y are variables of type int. */
 140 #define SWAP(T, A, B) do{T t_=(A);A=(B);B=t_;}while(0)
 141
 142 /* Used to set up and access simple temp stacks within functions. */
 143 #define TEMP(K) (temp + (K))
 144 #define SETUP(E, C) \
 145 do{if((res = (E)) != MP_OK) goto CLEANUP; ++(C);}while(0)
 146
 147 /* Compare value to zero. */
 148 #define CMPZ(Z) \
 149 (((Z)->used==1&&(Z)->digits[0]==0)?0:((Z)->sign==MP_NEG)?-1:1)
 150
 151 /* Multiply X by Y into Z, ignoring signs.  Requires that Z have
 152    enough storage preallocated to hold the result. */
 153 #define UMUL(X, Y, Z) \
 154 do{mp_size ua_=MP_USED(X),ub_=MP_USED(Y);mp_size o_=ua_+ub_;\
 155 ZERO(MP_DIGITS(Z),o_);\
 156 (void) s_kmul(MP_DIGITS(X),MP_DIGITS(Y),MP_DIGITS(Z),ua_,ub_);\
 157 MP_USED(Z)=o_;CLAMP(Z);}while(0)
 158
 159 /* Square X into Z.  Requires that Z have enough storage to hold the
 160    result. */
 161 #define USQR(X, Z) \
 162 do{mp_size ua_=MP_USED(X),o_=ua_+ua_;ZERO(MP_DIGITS(Z),o_);\
 163 (void) s_ksqr(MP_DIGITS(X),MP_DIGITS(Z),ua_);MP_USED(Z)=o_;CLAMP(Z);}while(0)
 164
 165 #define UPPER_HALF(W)           ((mp_word)((W) >> MP_DIGIT_BIT))
 166 #define LOWER_HALF(W)           ((mp_digit)(W))
 167 #define HIGH_BIT_SET(W)         ((W) >> (MP_WORD_BIT - 1))
 168 #define ADD_WILL_OVERFLOW(W, V) ((MP_WORD_MAX - (V)) < (W))
 169
 170 /* }}} */
 171 /* {{{ Default configuration settings */
 172
 173 /* Default number of digits allocated to a new mp_int */
 174 #if IMATH_TEST
 175 mp_size default_precision = MP_DEFAULT_PREC;
 176 #else
 177 STATIC const mp_size default_precision = MP_DEFAULT_PREC;
 178 #endif
 179
 180 /* Minimum number of digits to invoke recursive multiply */
 181 #if IMATH_TEST
 182 mp_size multiply_threshold = MP_MULT_THRESH;
 183 #else
 184 STATIC const mp_size multiply_threshold = MP_MULT_THRESH;
 185 #endif
 186
 187 /* }}} */
 188
 189 /* Allocate a buffer of (at least) num digits, or return
 190    NULL if that couldn't be done.  */
 191 STATIC mp_digit *s_alloc(mp_size num);
 192
 193 /* Release a buffer of digits allocated by s_alloc(). */
 194 STATIC void s_free(void *ptr);
 195
 196 /* Insure that z has at least min digits allocated, resizing if
 197    necessary.  Returns true if successful, false if out of memory. */
 198 STATIC int  s_pad(mp_int z, mp_size min);
 199
 200 /* Fill in a "fake" mp_int on the stack with a given value */
 201 STATIC void      s_fake(mp_int z, mp_small value, mp_digit vbuf[]);
 202
 203 /* Compare two runs of digits of given length, returns <0, 0, >0 */
 204 STATIC int       s_cdig(mp_digit *da, mp_digit *db, mp_size len);
 205
 206 /* Pack the unsigned digits of v into array t */
 207 STATIC int       s_vpack(mp_small v, mp_digit t[]);
 208
 209 /* Compare magnitudes of a and b, returns <0, 0, >0 */
 210 STATIC int       s_ucmp(mp_int a, mp_int b);
 211
 212 /* Compare magnitudes of a and v, returns <0, 0, >0 */
 213 STATIC int       s_vcmp(mp_int a, mp_small v);
 214
 215 /* Unsigned magnitude addition; assumes dc is big enough.
 216    Carry out is returned (no memory allocated). */
 217 STATIC mp_digit  s_uadd(mp_digit *da, mp_digit *db, mp_digit *dc,
 218                         mp_size size_a, mp_size size_b);
 219
 220 /* Unsigned magnitude subtraction.  Assumes dc is big enough. */
 221 STATIC void      s_usub(mp_digit *da, mp_digit *db, mp_digit *dc,
 222                         mp_size size_a, mp_size size_b);
 223
 224 /* Unsigned recursive multiplication.  Assumes dc is big enough. */
 225 STATIC int       s_kmul(mp_digit *da, mp_digit *db, mp_digit *dc,
 226                         mp_size size_a, mp_size size_b);
 227
 228 /* Unsigned magnitude multiplication.  Assumes dc is big enough. */
 229 STATIC void      s_umul(mp_digit *da, mp_digit *db, mp_digit *dc,
 230                         mp_size size_a, mp_size size_b);
 231
 232 /* Unsigned recursive squaring.  Assumes dc is big enough. */
 233 STATIC int       s_ksqr(mp_digit *da, mp_digit *dc, mp_size size_a);
 234
 235 /* Unsigned magnitude squaring.  Assumes dc is big enough. */
 236 STATIC void      s_usqr(mp_digit *da, mp_digit *dc, mp_size size_a);
 237
 238 /* Single digit addition.  Assumes a is big enough. */
 239 STATIC void      s_dadd(mp_int a, mp_digit b);
 240
 241 /* Single digit multiplication.  Assumes a is big enough. */
 242 STATIC void      s_dmul(mp_int a, mp_digit b);
 243
 244 /* Single digit multiplication on buffers; assumes dc is big enough. */
 245 STATIC void      s_dbmul(mp_digit *da, mp_digit b, mp_digit *dc,
 246                          mp_size size_a);
 247
 248 /* Single digit division.  Replaces a with the quotient,
 249    returns the remainder.  */
 250 STATIC mp_digit  s_ddiv(mp_int a, mp_digit b);
 251
 252 /* Quick division by a power of 2, replaces z (no allocation) */
 253 STATIC void      s_qdiv(mp_int z, mp_size p2);
 254
 255 /* Quick remainder by a power of 2, replaces z (no allocation) */
 256 STATIC void      s_qmod(mp_int z, mp_size p2);
 257
 258 /* Quick multiplication by a power of 2, replaces z.
 259    Allocates if necessary; returns false in case this fails. */
 260 STATIC int       s_qmul(mp_int z, mp_size p2);
 261
 262 /* Quick subtraction from a power of 2, replaces z.
 263    Allocates if necessary; returns false in case this fails. */
 264 STATIC int       s_qsub(mp_int z, mp_size p2);
 265
 266 /* Return maximum k such that 2^k divides z. */
 267 STATIC int       s_dp2k(mp_int z);
 268
 269 /* Return k >= 0 such that z = 2^k, or -1 if there is no such k. */
 270 STATIC int       s_isp2(mp_int z);
 271
 272 /* Set z to 2^k.  May allocate; returns false in case this fails. */
 273 STATIC int       s_2expt(mp_int z, mp_small k);
 274
 275 /* Normalize a and b for division, returns normalization constant */
 276 STATIC int       s_norm(mp_int a, mp_int b);
 277
 278 /* Compute constant mu for Barrett reduction, given modulus m, result
 279    replaces z, m is untouched. */
 280 STATIC mp_result s_brmu(mp_int z, mp_int m);
 281
 282 /* Reduce a modulo m, using Barrett's algorithm. */
 283 STATIC int       s_reduce(mp_int x, mp_int m, mp_int mu, mp_int q1, mp_int q2);
 284
 285 /* Modular exponentiation, using Barrett reduction */
 286 STATIC mp_result s_embar(mp_int a, mp_int b, mp_int m, mp_int mu, mp_int c);
 287
 288 /* Unsigned magnitude division.  Assumes |a| > |b|.  Allocates
 289    temporaries; overwrites a with quotient, b with remainder. */
 290 STATIC mp_result s_udiv(mp_int a, mp_int b);
 291
 292 /* Compute the number of digits in radix r required to represent the
 293    given value.  Does not account for sign flags, terminators, etc. */
 294 STATIC int       s_outlen(mp_int z, mp_size r);
 295
 296 /* Guess how many digits of precision will be needed to represent a
 297    radix r value of the specified number of digits.  Returns a value
 298    guaranteed to be no smaller than the actual number required. */
 299 STATIC mp_size   s_inlen(int len, mp_size r);
 300
 301 /* Convert a character to a digit value in radix r, or
 302    -1 if out of range */
 303 STATIC int       s_ch2val(char c, int r);
 304
 305 /* Convert a digit value to a character */
 306 STATIC char      s_val2ch(int v, int caps);
 307
 308 /* Take 2's complement of a buffer in place */
 309 STATIC void      s_2comp(unsigned char *buf, int len);
 310
 311 /* Convert a value to binary, ignoring sign.  On input, *limpos is the
 312    bound on how many bytes should be written to buf; on output, *limpos
 313    is set to the number of bytes actually written. */
 314 STATIC mp_result s_tobin(mp_int z, unsigned char *buf, int *limpos, int pad);
 315
 316 #if DEBUG
 317 /* Dump a representation of the mp_int to standard output */
 318 void      s_print(char *tag, mp_int z);
 319 void      s_print_buf(char *tag, mp_digit *buf, mp_size num);
 320 #endif
 321
 322 /* {{{ mp_int_init(z) */
 323
 324 mp_result mp_int_init(mp_int z)
 325 {
 326   if(z == NULL)
 327     return MP_BADARG;
 328
 329   z->single = 0;
 330   z->digits = &(z->single);
 331   z->alloc  = 1;
 332   z->used   = 1;
 333   z->sign   = MP_ZPOS;
 334
 335   return MP_OK;
 336 }
 337
 338 /* }}} */
 339
 340 /* {{{ mp_int_alloc() */
 341
 342 mp_int    mp_int_alloc(void)
 343 {
 344   mp_int out = malloc(sizeof(mpz_t));
 345
 346   if(out != NULL)
 347     mp_int_init(out);
 348
 349   return out;
 350 }
 351
 352 /* }}} */
 353
 354 /* {{{ mp_int_init_size(z, prec) */
 355
 356 mp_result mp_int_init_size(mp_int z, mp_size prec)
 357 {
 358   CHECK(z != NULL);
 359
 360   if(prec == 0)
 361     prec = default_precision;
 362   else if(prec == 1)
 363     return mp_int_init(z);
 364   else
 365     prec = (mp_size) ROUND_PREC(prec);
 366
 367   if((MP_DIGITS(z) = s_alloc(prec)) == NULL)
 368     return MP_MEMORY;
 369
 370   z->digits[0] = 0;
 371   MP_USED(z) = 1;
 372   MP_ALLOC(z) = prec;
 373   MP_SIGN(z) = MP_ZPOS;
 374
 375   return MP_OK;
 376 }
 377
 378 /* }}} */
 379
 380 /* {{{ mp_int_init_copy(z, old) */
 381
 382 mp_result mp_int_init_copy(mp_int z, mp_int old)
 383 {
 384   mp_result  res;
 385   mp_size    uold;
 386
 387   CHECK(z != NULL && old != NULL);
 388
 389   uold = MP_USED(old);
 390   if(uold == 1) {
 391     mp_int_init(z);
 392   }
 393   else {
 394     mp_size target = MAX(uold, default_precision);
 395
 396     if((res = mp_int_init_size(z, target)) != MP_OK)
 397       return res;
 398   }
 399
 400   MP_USED(z) = uold;
 401   MP_SIGN(z) = MP_SIGN(old);
 402   COPY(MP_DIGITS(old), MP_DIGITS(z), uold);
 403
 404   return MP_OK;
 405 }
 406
 407 /* }}} */
 408
 409 /* {{{ mp_int_init_value(z, value) */
 410
 411 mp_result mp_int_init_value(mp_int z, mp_small value)
 412 {
 413   mpz_t     vtmp;
 414   mp_digit  vbuf[MP_VALUE_DIGITS(value)];
 415
 416   s_fake(&vtmp, value, vbuf);
 417   return mp_int_init_copy(z, &vtmp);
 418 }
 419
 420 /* }}} */
 421
 422 /* {{{ mp_int_set_value(z, value) */
 423
 424 mp_result  mp_int_set_value(mp_int z, mp_small value)
 425 {
 426   mpz_t    vtmp;
 427   mp_digit vbuf[MP_VALUE_DIGITS(value)];
 428
 429   s_fake(&vtmp, value, vbuf);
 430   return mp_int_copy(&vtmp, z);
 431 }
 432
 433 /* }}} */
 434
 435 /* {{{ mp_int_clear(z) */
 436
 437 void      mp_int_clear(mp_int z)
 438 {
 439   if(z == NULL)
 440     return;
 441
 442   if(MP_DIGITS(z) != NULL) {
 443     if((void *) MP_DIGITS(z) != (void *) z)
 444       s_free(MP_DIGITS(z));
 445
 446     MP_DIGITS(z) = NULL;
 447   }
 448 }
 449
 450 /* }}} */
 451
 452 /* {{{ mp_int_free(z) */
 453
 454 void      mp_int_free(mp_int z)
 455 {
 456   NRCHECK(z != NULL);
 457
 458   mp_int_clear(z);
 459   free(z); /* note: NOT s_free() */
 460 }
 461
 462 /* }}} */
 463
 464 /* {{{ mp_int_copy(a, c) */
 465
 466 mp_result mp_int_copy(mp_int a, mp_int c)
 467 {
 468   CHECK(a != NULL && c != NULL);
 469
 470   if(a != c) {
 471     mp_size   ua = MP_USED(a);
 472     mp_digit *da, *dc;
 473
 474     if(!s_pad(c, ua))
 475       return MP_MEMORY;
 476
 477     da = MP_DIGITS(a); dc = MP_DIGITS(c);
 478     COPY(da, dc, ua);
 479
 480     MP_USED(c) = ua;
 481     MP_SIGN(c) = MP_SIGN(a);
 482   }
 483
 484   return MP_OK;
 485 }
 486
 487 /* }}} */
 488
 489 /* {{{ mp_int_swap(a, c) */
 490
 491 void      mp_int_swap(mp_int a, mp_int c)
 492 {
 493   if(a != c) {
 494     mpz_t tmp = *a;
 495
 496     *a = *c;
 497     *c = tmp;
 498   }
 499 }
 500
 501 /* }}} */
 502
 503 /* {{{ mp_int_zero(z) */
 504
 505 void      mp_int_zero(mp_int z)
 506 {
 507   NRCHECK(z != NULL);
 508
 509   z->digits[0] = 0;
 510   MP_USED(z) = 1;
 511   MP_SIGN(z) = MP_ZPOS;
 512 }
 513
 514 /* }}} */
 515
 516 /* {{{ mp_int_abs(a, c) */
 517
 518 mp_result mp_int_abs(mp_int a, mp_int c)
 519 {
 520   mp_result res;
 521
 522   CHECK(a != NULL && c != NULL);
 523
 524   if((res = mp_int_copy(a, c)) != MP_OK)
 525     return res;
 526
 527   MP_SIGN(c) = MP_ZPOS;
 528   return MP_OK;
 529 }
 530
 531 /* }}} */
 532
 533 /* {{{ mp_int_neg(a, c) */
 534
 535 mp_result mp_int_neg(mp_int a, mp_int c)
 536 {
 537   mp_result res;
 538
 539   CHECK(a != NULL && c != NULL);
 540
 541   if((res = mp_int_copy(a, c)) != MP_OK)
 542     return res;
 543
 544   if(CMPZ(c) != 0)
 545     MP_SIGN(c) = 1 - MP_SIGN(a);
 546
 547   return MP_OK;
 548 }
 549
 550 /* }}} */
 551
 552 /* {{{ mp_int_add(a, b, c) */
 553
 554 mp_result mp_int_add(mp_int a, mp_int b, mp_int c)
 555 {
 556   mp_size  ua, ub, max;
 557
 558   CHECK(a != NULL && b != NULL && c != NULL);
 559
 560   ua = MP_USED(a); ub = MP_USED(b);
 561   max = MAX(ua, ub);
 562
 563   if(MP_SIGN(a) == MP_SIGN(b)) {
 564     /* Same sign -- add magnitudes, preserve sign of addends */
 565     mp_digit carry;
 566     mp_size uc;
 567
 568     if(!s_pad(c, max))
 569       return MP_MEMORY;
 570
 571     carry = s_uadd(MP_DIGITS(a), MP_DIGITS(b), MP_DIGITS(c), ua, ub);
 572     uc = max;
 573
 574     if(carry) {
 575       if(!s_pad(c, max + 1))
 576         return MP_MEMORY;
 577
 578       c->digits[max] = carry;
 579       ++uc;
 580     }
 581
 582     MP_USED(c) = uc;
 583     MP_SIGN(c) = MP_SIGN(a);
 584
 585   }
 586   else {
 587     /* Different signs -- subtract magnitudes, preserve sign of greater */
 588     mp_int  x, y;
 589     int     cmp = s_ucmp(a, b); /* magnitude comparision, sign ignored */
 590
 591     /* Set x to max(a, b), y to min(a, b) to simplify later code.
 592        A special case yields zero for equal magnitudes.
 593     */
 594     if(cmp == 0) {
 595       mp_int_zero(c);
 596       return MP_OK;
 597     }
 598     else if(cmp < 0) {
 599       x = b; y = a;
 600     }
 601     else {
 602       x = a; y = b;
 603     }
 604
 605     if(!s_pad(c, MP_USED(x)))
 606       return MP_MEMORY;
 607
 608     /* Subtract smaller from larger */
 609     s_usub(MP_DIGITS(x), MP_DIGITS(y), MP_DIGITS(c), MP_USED(x), MP_USED(y));
 610     MP_USED(c) = MP_USED(x);
 611     CLAMP(c);
 612
 613     /* Give result the sign of the larger */
 614     MP_SIGN(c) = MP_SIGN(x);
 615   }
 616
 617   return MP_OK;
 618 }
 619
 620 /* }}} */
 621
 622 /* {{{ mp_int_add_value(a, value, c) */
 623
 624 mp_result mp_int_add_value(mp_int a, mp_small value, mp_int c)
 625 {
 626   mpz_t     vtmp;
 627   mp_digit  vbuf[MP_VALUE_DIGITS(value)];
 628
 629   s_fake(&vtmp, value, vbuf);
 630
 631   return mp_int_add(a, &vtmp, c);
 632 }
 633
 634 /* }}} */
 635
 636 /* {{{ mp_int_sub(a, b, c) */
 637
 638 mp_result mp_int_sub(mp_int a, mp_int b, mp_int c)
 639 {
 640   mp_size  ua, ub, max;
 641
 642   CHECK(a != NULL && b != NULL && c != NULL);
 643
 644   ua = MP_USED(a); ub = MP_USED(b);
 645   max = MAX(ua, ub);
 646
 647   if(MP_SIGN(a) != MP_SIGN(b)) {
 648     /* Different signs -- add magnitudes and keep sign of a */
 649     mp_digit carry;
 650     mp_size uc;
 651
 652     if(!s_pad(c, max))
 653       return MP_MEMORY;
 654
 655     carry = s_uadd(MP_DIGITS(a), MP_DIGITS(b), MP_DIGITS(c), ua, ub);
 656     uc = max;
 657
 658     if(carry) {
 659       if(!s_pad(c, max + 1))
 660         return MP_MEMORY;
 661
 662       c->digits[max] = carry;
 663       ++uc;
 664     }
 665
 666     MP_USED(c) = uc;
 667     MP_SIGN(c) = MP_SIGN(a);
 668
 669   }
 670   else {
 671     /* Same signs -- subtract magnitudes */
 672     mp_int  x, y;
 673     mp_sign osign;
 674     int     cmp = s_ucmp(a, b);
 675
 676     if(!s_pad(c, max))
 677       return MP_MEMORY;
 678
 679     if(cmp >= 0) {
 680       x = a; y = b; osign = MP_ZPOS;
 681     }
 682     else {
 683       x = b; y = a; osign = MP_NEG;
 684     }
 685
 686     if(MP_SIGN(a) == MP_NEG && cmp != 0)
 687       osign = 1 - osign;
 688
 689     s_usub(MP_DIGITS(x), MP_DIGITS(y), MP_DIGITS(c), MP_USED(x), MP_USED(y));
 690     MP_USED(c) = MP_USED(x);
 691     CLAMP(c);
 692
 693     MP_SIGN(c) = osign;
 694   }
 695
 696   return MP_OK;
 697 }
 698
 699 /* }}} */
 700
 701 /* {{{ mp_int_sub_value(a, value, c) */
 702
 703 mp_result mp_int_sub_value(mp_int a, mp_small value, mp_int c)
 704 {
 705   mpz_t     vtmp;
 706   mp_digit  vbuf[MP_VALUE_DIGITS(value)];
 707
 708   s_fake(&vtmp, value, vbuf);
 709
 710   return mp_int_sub(a, &vtmp, c);
 711 }
 712
 713 /* }}} */
 714
 715 /* {{{ mp_int_mul(a, b, c) */
 716
 717 mp_result mp_int_mul(mp_int a, mp_int b, mp_int c)
 718 {
 719   mp_digit *out;
 720   mp_size   osize, ua, ub, p = 0;
 721   mp_sign   osign;
 722
 723   CHECK(a != NULL && b != NULL && c != NULL);
 724
 725   /* If either input is zero, we can shortcut multiplication */
 726   if(mp_int_compare_zero(a) == 0 || mp_int_compare_zero(b) == 0) {
 727     mp_int_zero(c);
 728     return MP_OK;
 729   }
 730
 731   /* Output is positive if inputs have same sign, otherwise negative */
 732   osign = (MP_SIGN(a) == MP_SIGN(b)) ? MP_ZPOS : MP_NEG;
 733
 734   /* If the output is not identical to any of the inputs, we'll write
 735      the results directly; otherwise, allocate a temporary space. */
 736   ua = MP_USED(a); ub = MP_USED(b);
 737   osize = MAX(ua, ub);
 738   osize = 4 * ((osize + 1) / 2);
 739
 740   if(c == a || c == b) {
 741     p = ROUND_PREC(osize);
 742     p = MAX(p, default_precision);
 743
 744     if((out = s_alloc(p)) == NULL)
 745       return MP_MEMORY;
 746   }
 747   else {
 748     if(!s_pad(c, osize))
 749       return MP_MEMORY;
 750
 751     out = MP_DIGITS(c);
 752   }
 753   ZERO(out, osize);
 754
 755   if(!s_kmul(MP_DIGITS(a), MP_DIGITS(b), out, ua, ub))
 756     return MP_MEMORY;
 757
 758   /* If we allocated a new buffer, get rid of whatever memory c was
 759      already using, and fix up its fields to reflect that.
 760    */
 761   if(out != MP_DIGITS(c)) {
 762     if((void *) MP_DIGITS(c) != (void *) c)
 763       s_free(MP_DIGITS(c));
 764     MP_DIGITS(c) = out;
 765     MP_ALLOC(c) = p;
 766   }
 767
 768   MP_USED(c) = osize; /* might not be true, but we'll fix it ... */
 769   CLAMP(c);           /* ... right here */
 770   MP_SIGN(c) = osign;
 771
 772   return MP_OK;
 773 }
 774
 775 /* }}} */
 776
 777 /* {{{ mp_int_mul_value(a, value, c) */
 778
 779 mp_result mp_int_mul_value(mp_int a, mp_small value, mp_int c)
 780 {
 781   mpz_t     vtmp;
 782   mp_digit  vbuf[MP_VALUE_DIGITS(value)];
 783
 784   s_fake(&vtmp, value, vbuf);
 785
 786   return mp_int_mul(a, &vtmp, c);
 787 }
 788
 789 /* }}} */
 790
 791 /* {{{ mp_int_mul_pow2(a, p2, c) */
 792
 793 mp_result mp_int_mul_pow2(mp_int a, mp_small p2, mp_int c)
 794 {
 795   mp_result res;
 796   CHECK(a != NULL && c != NULL && p2 >= 0);
 797
 798   if((res = mp_int_copy(a, c)) != MP_OK)
 799     return res;
 800
 801   if(s_qmul(c, (mp_size) p2))
 802     return MP_OK;
 803   else
 804     return MP_MEMORY;
 805 }
 806
 807 /* }}} */
 808
 809 /* {{{ mp_int_sqr(a, c) */
 810
 811 mp_result mp_int_sqr(mp_int a, mp_int c)
 812 {
 813   mp_digit *out;
 814   mp_size   osize, p = 0;
 815
 816   CHECK(a != NULL && c != NULL);
 817
 818   /* Get a temporary buffer big enough to hold the result */
 819   osize = (mp_size) 4 * ((MP_USED(a) + 1) / 2);
 820   if(a == c) {
 821     p = ROUND_PREC(osize);
 822     p = MAX(p, default_precision);
 823
 824     if((out = s_alloc(p)) == NULL)
 825       return MP_MEMORY;
 826   }
 827   else {
 828     if(!s_pad(c, osize))
 829       return MP_MEMORY;
 830
 831     out = MP_DIGITS(c);
 832   }
 833   ZERO(out, osize);
 834
 835   s_ksqr(MP_DIGITS(a), out, MP_USED(a));
 836
 837   /* Get rid of whatever memory c was already using, and fix up its
 838      fields to reflect the new digit array it's using
 839    */
 840   if(out != MP_DIGITS(c)) {
 841     if((void *) MP_DIGITS(c) != (void *) c)
 842       s_free(MP_DIGITS(c));
 843     MP_DIGITS(c) = out;
 844     MP_ALLOC(c) = p;
 845   }
 846
 847   MP_USED(c) = osize; /* might not be true, but we'll fix it ... */
 848   CLAMP(c);           /* ... right here */
 849   MP_SIGN(c) = MP_ZPOS;
 850
 851   return MP_OK;
 852 }
 853
 854 /* }}} */
 855
 856 /* {{{ mp_int_div(a, b, q, r) */
 857
 858 mp_result mp_int_div(mp_int a, mp_int b, mp_int q, mp_int r)
 859 {
 860   int       cmp, last = 0, lg;
 861   mp_result res = MP_OK;
 862   mpz_t     temp[2];
 863   mp_int    qout, rout;
 864   mp_sign   sa = MP_SIGN(a), sb = MP_SIGN(b);
 865
 866   CHECK(a != NULL && b != NULL && q != r);
 867
 868   if(CMPZ(b) == 0)
 869     return MP_UNDEF;
 870   else if((cmp = s_ucmp(a, b)) < 0) {
 871     /* If |a| < |b|, no division is required:
 872        q = 0, r = a
 873      */
 874     if(r && (res = mp_int_copy(a, r)) != MP_OK)
 875       return res;
 876
 877     if(q)
 878       mp_int_zero(q);
 879
 880     return MP_OK;
 881   }
 882   else if(cmp == 0) {
 883     /* If |a| = |b|, no division is required:
 884        q = 1 or -1, r = 0
 885      */
 886     if(r)
 887       mp_int_zero(r);
 888
 889     if(q) {
 890       mp_int_zero(q);
 891       q->digits[0] = 1;
 892
 893       if(sa != sb)
 894         MP_SIGN(q) = MP_NEG;
 895     }
 896
 897     return MP_OK;
 898   }
 899
 900   /* When |a| > |b|, real division is required.  We need someplace to
 901      store quotient and remainder, but q and r are allowed to be NULL
 902      or to overlap with the inputs.
 903    */
 904   if((lg = s_isp2(b)) < 0) {
 905     if(q && b != q) {
 906       if((res = mp_int_copy(a, q)) != MP_OK)
 907         goto CLEANUP;
 908       else
 909         qout = q;
 910     }
 911     else {
 912       qout = TEMP(last);
 913       SETUP(mp_int_init_copy(TEMP(last), a), last);
 914     }
 915
 916     if(r && a != r) {
 917       if((res = mp_int_copy(b, r)) != MP_OK)
 918         goto CLEANUP;
 919       else
 920         rout = r;
 921     }
 922     else {
 923       rout = TEMP(last);
 924       SETUP(mp_int_init_copy(TEMP(last), b), last);
 925     }
 926
 927     if((res = s_udiv(qout, rout)) != MP_OK) goto CLEANUP;
 928   }
 929   else {
 930     if(q && (res = mp_int_copy(a, q)) != MP_OK) goto CLEANUP;
 931     if(r && (res = mp_int_copy(a, r)) != MP_OK) goto CLEANUP;
 932
 933     if(q) s_qdiv(q, (mp_size) lg); qout = q;
 934     if(r) s_qmod(r, (mp_size) lg); rout = r;
 935   }
 936
 937   /* Recompute signs for output */
 938   if(rout) {
 939     MP_SIGN(rout) = sa;
 940     if(CMPZ(rout) == 0)
 941       MP_SIGN(rout) = MP_ZPOS;
 942   }
 943   if(qout) {
 944     MP_SIGN(qout) = (sa == sb) ? MP_ZPOS : MP_NEG;
 945     if(CMPZ(qout) == 0)
 946       MP_SIGN(qout) = MP_ZPOS;
 947   }
 948
 949   if(q && (res = mp_int_copy(qout, q)) != MP_OK) goto CLEANUP;
 950   if(r && (res = mp_int_copy(rout, r)) != MP_OK) goto CLEANUP;
 951
 952  CLEANUP:
 953   while(--last >= 0)
 954     mp_int_clear(TEMP(last));
 955
 956   return res;
 957 }
 958
 959 /* }}} */
 960
 961 /* {{{ mp_int_mod(a, m, c) */
 962
 963 mp_result mp_int_mod(mp_int a, mp_int m, mp_int c)
 964 {
 965   mp_result res;
 966   mpz_t     tmp;
 967   mp_int    out;
 968
 969   if(m == c) {
 970     mp_int_init(&tmp);
 971     out = &tmp;
 972   }
 973   else {
 974     out = c;
 975   }
 976
 977   if((res = mp_int_div(a, m, NULL, out)) != MP_OK)
 978     goto CLEANUP;
 979
 980   if(CMPZ(out) < 0)
 981     res = mp_int_add(out, m, c);
 982   else
 983     res = mp_int_copy(out, c);
 984
 985  CLEANUP:
 986   if(out != c)
 987     mp_int_clear(&tmp);
 988
 989   return res;
 990 }
 991
 992 /* }}} */
 993
 994 /* {{{ mp_int_div_value(a, value, q, r) */
 995
 996 mp_result mp_int_div_value(mp_int a, mp_small value, mp_int q, mp_small *r)
 997 {
 998   mpz_t     vtmp, rtmp;
 999   mp_digit  vbuf[MP_VALUE_DIGITS(value)];
1000   mp_result res;
1001
1002   mp_int_init(&rtmp);
1003   s_fake(&vtmp, value, vbuf);
1004
1005   if((res = mp_int_div(a, &vtmp, q, &rtmp)) != MP_OK)
1006     goto CLEANUP;
1007
1008   if(r)
1009     (void) mp_int_to_int(&rtmp, r); /* can't fail */
1010
1011  CLEANUP:
1012   mp_int_clear(&rtmp);
1013   return res;
1014 }
1015
1016 /* }}} */
1017
1018 /* {{{ mp_int_div_pow2(a, p2, q, r) */
1019
1020 mp_result mp_int_div_pow2(mp_int a, mp_small p2, mp_int q, mp_int r)
1021 {
1022   mp_result res = MP_OK;
1023
1024   CHECK(a != NULL && p2 >= 0 && q != r);
1025
1026   if(q != NULL && (res = mp_int_copy(a, q)) == MP_OK)
1027     s_qdiv(q, (mp_size) p2);
1028
1029   if(res == MP_OK && r != NULL && (res = mp_int_copy(a, r)) == MP_OK)
1030     s_qmod(r, (mp_size) p2);
1031
1032   return res;
1033 }
1034
1035 /* }}} */
1036
1037 /* {{{ mp_int_expt(a, b, c) */
1038
1039 mp_result mp_int_expt(mp_int a, mp_small b, mp_int c)
1040 {
1041   mpz_t     t;
1042   mp_result res;
1043   unsigned int v = abs(b);
1044
1045   CHECK(b >= 0 && c != NULL);
1046
1047   if((res = mp_int_init_copy(&t, a)) != MP_OK)
1048     return res;
1049
1050   (void) mp_int_set_value(c, 1);
1051   while(v != 0) {
1052     if(v & 1) {
1053       if((res = mp_int_mul(c, &t, c)) != MP_OK)
1054         goto CLEANUP;
1055     }
1056
1057     v >>= 1;
1058     if(v == 0) break;
1059
1060     if((res = mp_int_sqr(&t, &t)) != MP_OK)
1061       goto CLEANUP;
1062   }
1063
1064  CLEANUP:
1065   mp_int_clear(&t);
1066   return res;
1067 }
1068
1069 /* }}} */
1070
1071 /* {{{ mp_int_expt_value(a, b, c) */
1072
1073 mp_result mp_int_expt_value(mp_small a, mp_small b, mp_int c)
1074 {
1075   mpz_t     t;
1076   mp_result res;
1077   unsigned int v = abs(b);
1078
1079   CHECK(b >= 0 && c != NULL);
1080
1081   if((res = mp_int_init_value(&t, a)) != MP_OK)
1082     return res;
1083
1084   (void) mp_int_set_value(c, 1);
1085   while(v != 0) {
1086     if(v & 1) {
1087       if((res = mp_int_mul(c, &t, c)) != MP_OK)
1088         goto CLEANUP;
1089     }
1090
1091     v >>= 1;
1092     if(v == 0) break;
1093
1094     if((res = mp_int_sqr(&t, &t)) != MP_OK)
1095       goto CLEANUP;
1096   }
1097
1098  CLEANUP:
1099   mp_int_clear(&t);
1100   return res;
1101 }
1102
1103 /* }}} */
1104
1105 /* {{{ mp_int_compare(a, b) */
1106
1107 int       mp_int_compare(mp_int a, mp_int b)
1108 {
1109   mp_sign sa;
1110
1111   CHECK(a != NULL && b != NULL);
1112
1113   sa = MP_SIGN(a);
1114   if(sa == MP_SIGN(b)) {
1115     int cmp = s_ucmp(a, b);
1116
1117     /* If they're both zero or positive, the normal comparison
1118        applies; if both negative, the sense is reversed. */
1119     if(sa == MP_ZPOS)
1120       return cmp;
1121     else
1122       return -cmp;
1123
1124   }
1125   else {
1126     if(sa == MP_ZPOS)
1127       return 1;
1128     else
1129       return -1;
1130   }
1131 }
1132
1133 /* }}} */
1134
1135 /* {{{ mp_int_compare_unsigned(a, b) */
1136
1137 int       mp_int_compare_unsigned(mp_int a, mp_int b)
1138 {
1139   NRCHECK(a != NULL && b != NULL);
1140
1141   return s_ucmp(a, b);
1142 }
1143
1144 /* }}} */
1145
1146 /* {{{ mp_int_compare_zero(z) */
1147
1148 int       mp_int_compare_zero(mp_int z)
1149 {
1150   NRCHECK(z != NULL);
1151
1152   if(MP_USED(z) == 1 && z->digits[0] == 0)
1153     return 0;
1154   else if(MP_SIGN(z) == MP_ZPOS)
1155     return 1;
1156   else
1157     return -1;
1158 }
1159
1160 /* }}} */
1161
1162 /* {{{ mp_int_compare_value(z, value) */
1163
1164 int       mp_int_compare_value(mp_int z, mp_small value)
1165 {
1166   mp_sign vsign = (value < 0) ? MP_NEG : MP_ZPOS;
1167   int     cmp;
1168
1169   CHECK(z != NULL);
1170
1171   if(vsign == MP_SIGN(z)) {
1172     cmp = s_vcmp(z, value);
1173
1174     if(vsign == MP_ZPOS)
1175       return cmp;
1176     else
1177       return -cmp;
1178   }
1179   else {
1180     if(value < 0)
1181       return 1;
1182     else
1183       return -1;
1184   }
1185 }
1186
1187 /* }}} */
1188
1189 /* {{{ mp_int_exptmod(a, b, m, c) */
1190
1191 mp_result mp_int_exptmod(mp_int a, mp_int b, mp_int m, mp_int c)
1192 {
1193   mp_result res;
1194   mp_size   um;
1195   mpz_t     temp[3];
1196   mp_int    s;
1197   int       last = 0;
1198
1199   CHECK(a != NULL && b != NULL && c != NULL && m != NULL);
1200
1201   /* Zero moduli and negative exponents are not considered. */
1202   if(CMPZ(m) == 0)
1203     return MP_UNDEF;
1204   if(CMPZ(b) < 0)
1205     return MP_RANGE;
1206
1207   um = MP_USED(m);
1208   SETUP(mp_int_init_size(TEMP(0), 2 * um), last);
1209   SETUP(mp_int_init_size(TEMP(1), 2 * um), last);
1210
1211   if(c == b || c == m) {
1212     SETUP(mp_int_init_size(TEMP(2), 2 * um), last);
1213     s = TEMP(2);
1214   }
1215   else {
1216     s = c;
1217   }
1218
1219   if((res = mp_int_mod(a, m, TEMP(0))) != MP_OK) goto CLEANUP;
1220
1221   if((res = s_brmu(TEMP(1), m)) != MP_OK) goto CLEANUP;
1222
1223   if((res = s_embar(TEMP(0), b, m, TEMP(1), s)) != MP_OK)
1224     goto CLEANUP;
1225
1226   res = mp_int_copy(s, c);
1227
1228  CLEANUP:
1229   while(--last >= 0)
1230     mp_int_clear(TEMP(last));
1231
1232   return res;
1233 }
1234
1235 /* }}} */
1236
1237 /* {{{ mp_int_exptmod_evalue(a, value, m, c) */
1238
1239 mp_result mp_int_exptmod_evalue(mp_int a, mp_small value, mp_int m, mp_int c)
1240 {
1241   mpz_t    vtmp;
1242   mp_digit vbuf[MP_VALUE_DIGITS(value)];
1243
1244   s_fake(&vtmp, value, vbuf);
1245
1246   return mp_int_exptmod(a, &vtmp, m, c);
1247 }
1248
1249 /* }}} */
1250
1251 /* {{{ mp_int_exptmod_bvalue(v, b, m, c) */
1252
1253 mp_result mp_int_exptmod_bvalue(mp_small value, mp_int b,
1254                                 mp_int m, mp_int c)
1255 {
1256   mpz_t    vtmp;
1257   mp_digit vbuf[MP_VALUE_DIGITS(value)];
1258
1259   s_fake(&vtmp, value, vbuf);
1260
1261   return mp_int_exptmod(&vtmp, b, m, c);
1262 }
1263
1264 /* }}} */
1265
1266 /* {{{ mp_int_exptmod_known(a, b, m, mu, c) */
1267
1268 mp_result mp_int_exptmod_known(mp_int a, mp_int b, mp_int m, mp_int mu, mp_int c)
1269 {
1270   mp_result res;
1271   mp_size   um;
1272   mpz_t     temp[2];
1273   mp_int    s;
1274   int       last = 0;
1275
1276   CHECK(a && b && m && c);
1277
1278   /* Zero moduli and negative exponents are not considered. */
1279   if(CMPZ(m) == 0)
1280     return MP_UNDEF;
1281   if(CMPZ(b) < 0)
1282     return MP_RANGE;
1283
1284   um = MP_USED(m);
1285   SETUP(mp_int_init_size(TEMP(0), 2 * um), last);
1286
1287   if(c == b || c == m) {
1288     SETUP(mp_int_init_size(TEMP(1), 2 * um), last);
1289     s = TEMP(1);
1290   }
1291   else {
1292     s = c;
1293   }
1294
1295   if((res = mp_int_mod(a, m, TEMP(0))) != MP_OK) goto CLEANUP;
1296
1297   if((res = s_embar(TEMP(0), b, m, mu, s)) != MP_OK)
1298     goto CLEANUP;
1299
1300   res = mp_int_copy(s, c);
1301
1302  CLEANUP:
1303   while(--last >= 0)
1304     mp_int_clear(TEMP(last));
1305
1306   return res;
1307 }
1308
1309 /* }}} */
1310
1311 /* {{{ mp_int_redux_const(m, c) */
1312
1313 mp_result mp_int_redux_const(mp_int m, mp_int c)
1314 {
1315   CHECK(m != NULL && c != NULL && m != c);
1316
1317   return s_brmu(c, m);
1318 }
1319
1320 /* }}} */
1321
1322 /* {{{ mp_int_invmod(a, m, c) */
1323
1324 mp_result mp_int_invmod(mp_int a, mp_int m, mp_int c)
1325 {
1326   mp_result res;
1327   mp_sign   sa;
1328   int       last = 0;
1329   mpz_t     temp[2];
1330
1331   CHECK(a != NULL && m != NULL && c != NULL);
1332
1333   if(CMPZ(a) == 0 || CMPZ(m) <= 0)
1334     return MP_RANGE;
1335
1336   sa = MP_SIGN(a); /* need this for the result later */
1337
1338   for(last = 0; last < 2; ++last)
1339     mp_int_init(TEMP(last));
1340
1341   if((res = mp_int_egcd(a, m, TEMP(0), TEMP(1), NULL)) != MP_OK)
1342     goto CLEANUP;
1343
1344   if(mp_int_compare_value(TEMP(0), 1) != 0) {
1345     res = MP_UNDEF;
1346     goto CLEANUP;
1347   }
1348
1349   /* It is first necessary to constrain the value to the proper range */
1350   if((res = mp_int_mod(TEMP(1), m, TEMP(1))) != MP_OK)
1351     goto CLEANUP;
1352
1353   /* Now, if 'a' was originally negative, the value we have is
1354      actually the magnitude of the negative representative; to get the
1355      positive value we have to subtract from the modulus.  Otherwise,
1356      the value is okay as it stands.
1357    */
1358   if(sa == MP_NEG)
1359     res = mp_int_sub(m, TEMP(1), c);
1360   else
1361     res = mp_int_copy(TEMP(1), c);
1362
1363  CLEANUP:
1364   while(--last >= 0)
1365     mp_int_clear(TEMP(last));
1366
1367   return res;
1368 }
1369
1370 /* }}} */
1371
1372 /* {{{ mp_int_gcd(a, b, c) */
1373
1374 /* Binary GCD algorithm due to Josef Stein, 1961 */
1375 mp_result mp_int_gcd(mp_int a, mp_int b, mp_int c)
1376 {
1377   int       ca, cb, k = 0;
1378   mpz_t     u, v, t;
1379   mp_result res;
1380
1381   CHECK(a != NULL && b != NULL && c != NULL);
1382
1383   ca = CMPZ(a);
1384   cb = CMPZ(b);
1385   if(ca == 0 && cb == 0)
1386     return MP_UNDEF;
1387   else if(ca == 0)
1388     return mp_int_abs(b, c);
1389   else if(cb == 0)
1390     return mp_int_abs(a, c);
1391
1392   mp_int_init(&t);
1393   if((res = mp_int_init_copy(&u, a)) != MP_OK)
1394     goto U;
1395   if((res = mp_int_init_copy(&v, b)) != MP_OK)
1396     goto V;
1397
1398   MP_SIGN(&u) = MP_ZPOS; MP_SIGN(&v) = MP_ZPOS;
1399
1400   { /* Divide out common factors of 2 from u and v */
1401     int div2_u = s_dp2k(&u), div2_v = s_dp2k(&v);
1402
1403     k = MIN(div2_u, div2_v);
1404     s_qdiv(&u, (mp_size) k);
1405     s_qdiv(&v, (mp_size) k);
1406   }
1407
1408   if(mp_int_is_odd(&u)) {
1409     if((res = mp_int_neg(&v, &t)) != MP_OK)
1410       goto CLEANUP;
1411   }
1412   else {
1413     if((res = mp_int_copy(&u, &t)) != MP_OK)
1414       goto CLEANUP;
1415   }
1416
1417   for(;;) {
1418     s_qdiv(&t, s_dp2k(&t));
1419
1420     if(CMPZ(&t) > 0) {
1421       if((res = mp_int_copy(&t, &u)) != MP_OK)
1422         goto CLEANUP;
1423     }
1424     else {
1425       if((res = mp_int_neg(&t, &v)) != MP_OK)
1426         goto CLEANUP;
1427     }
1428
1429     if((res = mp_int_sub(&u, &v, &t)) != MP_OK)
1430       goto CLEANUP;
1431
1432     if(CMPZ(&t) == 0)
1433       break;
1434   }
1435
1436   if((res = mp_int_abs(&u, c)) != MP_OK)
1437     goto CLEANUP;
1438   if(!s_qmul(c, (mp_size) k))
1439     res = MP_MEMORY;
1440
1441  CLEANUP:
1442   mp_int_clear(&v);
1443  V: mp_int_clear(&u);
1444  U: mp_int_clear(&t);
1445
1446   return res;
1447 }
1448
1449 /* }}} */
1450
1451 /* {{{ mp_int_egcd(a, b, c, x, y) */
1452
1453 /* This is the binary GCD algorithm again, but this time we keep track
1454    of the elementary matrix operations as we go, so we can get values
1455    x and y satisfying c = ax + by.
1456  */
1457 mp_result mp_int_egcd(mp_int a, mp_int b, mp_int c,
1458                       mp_int x, mp_int y)
1459 {
1460   int       k, last = 0, ca, cb;
1461   mpz_t     temp[8];
1462   mp_result res;
1463
1464   CHECK(a != NULL && b != NULL && c != NULL &&
1465         (x != NULL || y != NULL));
1466
1467   ca = CMPZ(a);
1468   cb = CMPZ(b);
1469   if(ca == 0 && cb == 0)
1470     return MP_UNDEF;
1471   else if(ca == 0) {
1472     if((res = mp_int_abs(b, c)) != MP_OK) return res;
1473     mp_int_zero(x); (void) mp_int_set_value(y, 1); return MP_OK;
1474   }
1475   else if(cb == 0) {
1476     if((res = mp_int_abs(a, c)) != MP_OK) return res;
1477     (void) mp_int_set_value(x, 1); mp_int_zero(y); return MP_OK;
1478   }
1479
1480   /* Initialize temporaries:
1481      A:0, B:1, C:2, D:3, u:4, v:5, ou:6, ov:7 */
1482   for(last = 0; last < 4; ++last)
1483     mp_int_init(TEMP(last));
1484   TEMP(0)->digits[0] = 1;
1485   TEMP(3)->digits[0] = 1;
1486
1487   SETUP(mp_int_init_copy(TEMP(4), a), last);
1488   SETUP(mp_int_init_copy(TEMP(5), b), last);
1489
1490   /* We will work with absolute values here */
1491   MP_SIGN(TEMP(4)) = MP_ZPOS;
1492   MP_SIGN(TEMP(5)) = MP_ZPOS;
1493
1494   { /* Divide out common factors of 2 from u and v */
1495     int  div2_u = s_dp2k(TEMP(4)), div2_v = s_dp2k(TEMP(5));
1496
1497     k = MIN(div2_u, div2_v);
1498     s_qdiv(TEMP(4), k);
1499     s_qdiv(TEMP(5), k);
1500   }
1501
1502   SETUP(mp_int_init_copy(TEMP(6), TEMP(4)), last);
1503   SETUP(mp_int_init_copy(TEMP(7), TEMP(5)), last);
1504
1505   for(;;) {
1506     while(mp_int_is_even(TEMP(4))) {
1507       s_qdiv(TEMP(4), 1);
1508
1509       if(mp_int_is_odd(TEMP(0)) || mp_int_is_odd(TEMP(1))) {
1510         if((res = mp_int_add(TEMP(0), TEMP(7), TEMP(0))) != MP_OK)
1511           goto CLEANUP;
1512         if((res = mp_int_sub(TEMP(1), TEMP(6), TEMP(1))) != MP_OK)
1513           goto CLEANUP;
1514       }
1515
1516       s_qdiv(TEMP(0), 1);
1517       s_qdiv(TEMP(1), 1);
1518     }
1519
1520     while(mp_int_is_even(TEMP(5))) {
1521       s_qdiv(TEMP(5), 1);
1522
1523       if(mp_int_is_odd(TEMP(2)) || mp_int_is_odd(TEMP(3))) {
1524         if((res = mp_int_add(TEMP(2), TEMP(7), TEMP(2))) != MP_OK)
1525           goto CLEANUP;
1526         if((res = mp_int_sub(TEMP(3), TEMP(6), TEMP(3))) != MP_OK)
1527           goto CLEANUP;
1528       }
1529
1530       s_qdiv(TEMP(2), 1);
1531       s_qdiv(TEMP(3), 1);
1532     }
1533
1534     if(mp_int_compare(TEMP(4), TEMP(5)) >= 0) {
1535       if((res = mp_int_sub(TEMP(4), TEMP(5), TEMP(4))) != MP_OK) goto CLEANUP;
1536       if((res = mp_int_sub(TEMP(0), TEMP(2), TEMP(0))) != MP_OK) goto CLEANUP;
1537       if((res = mp_int_sub(TEMP(1), TEMP(3), TEMP(1))) != MP_OK) goto CLEANUP;
1538     }
1539     else {
1540       if((res = mp_int_sub(TEMP(5), TEMP(4), TEMP(5))) != MP_OK) goto CLEANUP;
1541       if((res = mp_int_sub(TEMP(2), TEMP(0), TEMP(2))) != MP_OK) goto CLEANUP;
1542       if((res = mp_int_sub(TEMP(3), TEMP(1), TEMP(3))) != MP_OK) goto CLEANUP;
1543     }
1544
1545     if(CMPZ(TEMP(4)) == 0) {
1546       if(x && (res = mp_int_copy(TEMP(2), x)) != MP_OK) goto CLEANUP;
1547       if(y && (res = mp_int_copy(TEMP(3), y)) != MP_OK) goto CLEANUP;
1548       if(c) {
1549         if(!s_qmul(TEMP(5), k)) {
1550           res = MP_MEMORY;
1551           goto CLEANUP;
1552         }
1553
1554         res = mp_int_copy(TEMP(5), c);
1555       }
1556
1557       break;
1558     }
1559   }
1560
1561  CLEANUP:
1562   while(--last >= 0)
1563     mp_int_clear(TEMP(last));
1564
1565   return res;
1566 }
1567
1568 /* }}} */
1569
1570 /* {{{ mp_int_lcm(a, b, c) */
1571
1572 mp_result mp_int_lcm(mp_int a, mp_int b, mp_int c)
1573 {
1574   mpz_t     lcm;
1575   mp_result res;
1576
1577   CHECK(a != NULL && b != NULL && c != NULL);
1578
1579   /* Since a * b = gcd(a, b) * lcm(a, b), we can compute
1580      lcm(a, b) = (a / gcd(a, b)) * b.
1581
1582      This formulation insures everything works even if the input
1583      variables share space.
1584    */
1585   if((res = mp_int_init(&lcm)) != MP_OK)
1586     return res;
1587   if((res = mp_int_gcd(a, b, &lcm)) != MP_OK)
1588     goto CLEANUP;
1589   if((res = mp_int_div(a, &lcm, &lcm, NULL)) != MP_OK)
1590     goto CLEANUP;
1591   if((res = mp_int_mul(&lcm, b, &lcm)) != MP_OK)
1592     goto CLEANUP;
1593
1594   res = mp_int_copy(&lcm, c);
1595
1596   CLEANUP:
1597     mp_int_clear(&lcm);
1598
1599   return res;
1600 }
1601
1602 /* }}} */
1603
1604 /* {{{ mp_int_divisible_value(a, v) */
1605
1606 int       mp_int_divisible_value(mp_int a, mp_small v)
1607 {
1608   mp_small rem = 0;
1609
1610   if(mp_int_div_value(a, v, NULL, &rem) != MP_OK)
1611     return 0;
1612
1613   return rem == 0;
1614 }
1615
1616 /* }}} */
1617
1618 /* {{{ mp_int_is_pow2(z) */
1619
1620 int       mp_int_is_pow2(mp_int z)
1621 {
1622   CHECK(z != NULL);
1623
1624   return s_isp2(z);
1625 }
1626
1627 /* }}} */
1628
1629 /* {{{ mp_int_root(a, b, c) */
1630
1631 /* Implementation of Newton's root finding method, based loosely on a
1632    patch contributed by Hal Finkel <half@halssoftware.com>
1633    modified by M. J. Fromberger.
1634  */
1635 mp_result mp_int_root(mp_int a, mp_small b, mp_int c)
1636 {
1637   mp_result  res = MP_OK;
1638   mpz_t      temp[5];
1639   int        last = 0;
1640   int        flips = 0;
1641
1642   CHECK(a != NULL && c != NULL && b > 0);
1643
1644   if(b == 1) {
1645     return mp_int_copy(a, c);
1646   }
1647   if(MP_SIGN(a) == MP_NEG) {
1648     if(b % 2 == 0)
1649       return MP_UNDEF; /* root does not exist for negative a with even b */
1650     else
1651       flips = 1;
1652   }
1653
1654   SETUP(mp_int_init_copy(TEMP(last), a), last);
1655   SETUP(mp_int_init_copy(TEMP(last), a), last);
1656   SETUP(mp_int_init(TEMP(last)), last);
1657   SETUP(mp_int_init(TEMP(last)), last);
1658   SETUP(mp_int_init(TEMP(last)), last);
1659
1660   (void) mp_int_abs(TEMP(0), TEMP(0));
1661   (void) mp_int_abs(TEMP(1), TEMP(1));
1662
1663   for(;;) {
1664     if((res = mp_int_expt(TEMP(1), b, TEMP(2))) != MP_OK)
1665       goto CLEANUP;
1666
1667     if(mp_int_compare_unsigned(TEMP(2), TEMP(0)) <= 0)
1668       break;
1669
1670     if((res = mp_int_sub(TEMP(2), TEMP(0), TEMP(2))) != MP_OK)
1671       goto CLEANUP;
1672     if((res = mp_int_expt(TEMP(1), b - 1, TEMP(3))) != MP_OK)
1673       goto CLEANUP;
1674     if((res = mp_int_mul_value(TEMP(3), b, TEMP(3))) != MP_OK)
1675       goto CLEANUP;
1676     if((res = mp_int_div(TEMP(2), TEMP(3), TEMP(4), NULL)) != MP_OK)
1677       goto CLEANUP;
1678     if((res = mp_int_sub(TEMP(1), TEMP(4), TEMP(4))) != MP_OK)
1679       goto CLEANUP;
1680
1681     if(mp_int_compare_unsigned(TEMP(1), TEMP(4)) == 0) {
1682       if((res = mp_int_sub_value(TEMP(4), 1, TEMP(4))) != MP_OK)
1683         goto CLEANUP;
1684     }
1685     if((res = mp_int_copy(TEMP(4), TEMP(1))) != MP_OK)
1686       goto CLEANUP;
1687   }
1688
1689   if((res = mp_int_copy(TEMP(1), c)) != MP_OK)
1690     goto CLEANUP;
1691
1692   /* If the original value of a was negative, flip the output sign. */
1693   if(flips)
1694     (void) mp_int_neg(c, c); /* cannot fail */
1695
1696  CLEANUP:
1697   while(--last >= 0)
1698     mp_int_clear(TEMP(last));
1699
1700   return res;
1701 }
1702
1703 /* }}} */
1704
1705 /* {{{ mp_int_to_int(z, out) */
1706
1707 mp_result mp_int_to_int(mp_int z, mp_small *out)
1708 {
1709   mp_usmall uv = 0;
1710   mp_size   uz;
1711   mp_digit *dz;
1712   mp_sign   sz;
1713
1714   CHECK(z != NULL);
1715
1716   /* Make sure the value is representable as an int */
1717   sz = MP_SIGN(z);
1718   if((sz == MP_ZPOS && mp_int_compare_value(z, MP_SMALL_MAX) > 0) ||
1719      mp_int_compare_value(z, MP_SMALL_MIN) < 0)
1720     return MP_RANGE;
1721
1722   uz = MP_USED(z);
1723   dz = MP_DIGITS(z) + uz - 1;
1724
1725   while(uz > 0) {
1726     uv <<= MP_DIGIT_BIT/2;
1727     uv = (uv << (MP_DIGIT_BIT/2)) | *dz--;
1728     --uz;
1729   }
1730
1731   if(out)
1732     *out = (sz == MP_NEG) ? -(mp_small)uv : (mp_small)uv;
1733
1734   return MP_OK;
1735 }
1736
1737 /* }}} */
1738
1739 /* {{{ mp_int_to_uint(z, *out) */
1740
1741 mp_result mp_int_to_uint(mp_int z, mp_usmall *out)
1742 {
1743   mp_usmall uv = 0;
1744   mp_size   uz;
1745   mp_digit *dz;
1746   mp_sign   sz;
1747
1748   CHECK(z != NULL);
1749
1750   /* Make sure the value is representable as an int */
1751   sz = MP_SIGN(z);
1752   if(!(sz == MP_ZPOS && mp_int_compare_value(z, UINT_MAX) <= 0))
1753     return MP_RANGE;
1754
1755   uz = MP_USED(z);
1756   dz = MP_DIGITS(z) + uz - 1;
1757
1758   while(uz > 0) {
1759     uv <<= MP_DIGIT_BIT/2;
1760     uv = (uv << (MP_DIGIT_BIT/2)) | *dz--;
1761     --uz;
1762   }
1763
1764   if(out)
1765     *out = uv;
1766
1767   return MP_OK;
1768 }
1769
1770 /* }}} */
1771
1772 /* {{{ mp_int_to_string(z, radix, str, limit) */
1773
1774 mp_result mp_int_to_string(mp_int z, mp_size radix,
1775                            char *str, int limit)
1776 {
1777   mp_result res;
1778   int       cmp = 0;
1779
1780   CHECK(z != NULL && str != NULL && limit >= 2);
1781
1782   if(radix < MP_MIN_RADIX || radix > MP_MAX_RADIX)
1783     return MP_RANGE;
1784
1785   if(CMPZ(z) == 0) {
1786     *str++ = s_val2ch(0, 1);
1787   }
1788   else {
1789     mpz_t tmp;
1790     char  *h, *t;
1791
1792     if((res = mp_int_init_copy(&tmp, z)) != MP_OK)
1793       return res;
1794
1795     if(MP_SIGN(z) == MP_NEG) {
1796       *str++ = '-';
1797       --limit;
1798     }
1799     h = str;
1800
1801     /* Generate digits in reverse order until finished or limit reached */
1802     for(/* */; limit > 0; --limit) {
1803       mp_digit d;
1804
1805       if((cmp = CMPZ(&tmp)) == 0)
1806         break;
1807
1808       d = s_ddiv(&tmp, (mp_digit)radix);
1809       *str++ = s_val2ch(d, 1);
1810     }
1811     t = str - 1;
1812
1813     /* Put digits back in correct output order */
1814     while(h < t) {
1815       char tc = *h;
1816       *h++ = *t;
1817       *t-- = tc;
1818     }
1819
1820     mp_int_clear(&tmp);
1821   }
1822
1823   *str = '\0';
1824   if(cmp == 0)
1825     return MP_OK;
1826   else
1827     return MP_TRUNC;
1828 }
1829
1830 /* }}} */
1831
1832 /* {{{ mp_int_string_len(z, radix) */
1833
1834 mp_result mp_int_string_len(mp_int z, mp_size radix)
1835 {
1836   int  len;
1837
1838   CHECK(z != NULL);
1839
1840   if(radix < MP_MIN_RADIX || radix > MP_MAX_RADIX)
1841     return MP_RANGE;
1842
1843   len = s_outlen(z, radix) + 1; /* for terminator */
1844
1845   /* Allow for sign marker on negatives */
1846   if(MP_SIGN(z) == MP_NEG)
1847     len += 1;
1848
1849   return len;
1850 }
1851
1852 /* }}} */
1853
1854 /* {{{ mp_int_read_string(z, radix, *str) */
1855
1856 /* Read zero-terminated string into z */
1857 mp_result mp_int_read_string(mp_int z, mp_size radix, const char *str)
1858 {
1859   return mp_int_read_cstring(z, radix, str, NULL);
1860
1861 }
1862
1863 /* }}} */
1864
1865 /* {{{ mp_int_read_cstring(z, radix, *str, **end) */
1866
1867 mp_result mp_int_read_cstring(mp_int z, mp_size radix, const char *str, char **end)
1868 {
1869   int       ch;
1870
1871   CHECK(z != NULL && str != NULL);
1872
1873   if(radix < MP_MIN_RADIX || radix > MP_MAX_RADIX)
1874     return MP_RANGE;
1875
1876   /* Skip leading whitespace */
1877   while(isspace((int)*str))
1878     ++str;
1879
1880   /* Handle leading sign tag (+/-, positive default) */
1881   switch(*str) {
1882   case '-':
1883     MP_SIGN(z) = MP_NEG;
1884     ++str;
1885     break;
1886   case '+':
1887     ++str; /* fallthrough */
1888   default:
1889     MP_SIGN(z) = MP_ZPOS;
1890     break;
1891   }
1892
1893   /* Skip leading zeroes */
1894   while((ch = s_ch2val(*str, radix)) == 0)
1895     ++str;
1896
1897   /* Make sure there is enough space for the value */
1898   if(!s_pad(z, s_inlen(strlen(str), radix)))
1899     return MP_MEMORY;
1900
1901   MP_USED(z) = 1; z->digits[0] = 0;
1902
1903   while(*str != '\0' && ((ch = s_ch2val(*str, radix)) >= 0)) {
1904     s_dmul(z, (mp_digit)radix);
1905     s_dadd(z, (mp_digit)ch);
1906     ++str;
1907   }
1908
1909   CLAMP(z);
1910
1911   /* Override sign for zero, even if negative specified. */
1912   if(CMPZ(z) == 0)
1913     MP_SIGN(z) = MP_ZPOS;
1914
1915   if(end != NULL)
1916     *end = (char *)str;
1917
1918   /* Return a truncation error if the string has unprocessed
1919      characters remaining, so the caller can tell if the whole string
1920      was done */
1921   if(*str != '\0')
1922     return MP_TRUNC;
1923   else
1924     return MP_OK;
1925 }
1926
1927 /* }}} */
1928
1929 /* {{{ mp_int_count_bits(z) */
1930
1931 mp_result mp_int_count_bits(mp_int z)
1932 {
1933   mp_size  nbits = 0, uz;
1934   mp_digit d;
1935
1936   CHECK(z != NULL);
1937
1938   uz = MP_USED(z);
1939   if(uz == 1 && z->digits[0] == 0)
1940     return 1;
1941
1942   --uz;
1943   nbits = uz * MP_DIGIT_BIT;
1944   d = z->digits[uz];
1945
1946   while(d != 0) {
1947     d >>= 1;
1948     ++nbits;
1949   }
1950
1951   return nbits;
1952 }
1953
1954 /* }}} */
1955
1956 /* {{{ mp_int_to_binary(z, buf, limit) */
1957
1958 mp_result mp_int_to_binary(mp_int z, unsigned char *buf, int limit)
1959 {
1960   static const int PAD_FOR_2C = 1;
1961
1962   mp_result res;
1963   int       limpos = limit;
1964
1965   CHECK(z != NULL && buf != NULL);
1966
1967   res = s_tobin(z, buf, &limpos, PAD_FOR_2C);
1968
1969   if(MP_SIGN(z) == MP_NEG)
1970     s_2comp(buf, limpos);
1971
1972   return res;
1973 }
1974
1975 /* }}} */
1976
1977 /* {{{ mp_int_read_binary(z, buf, len) */
1978
1979 mp_result mp_int_read_binary(mp_int z, unsigned char *buf, int len)
1980 {
1981   mp_size need, i;
1982   unsigned char *tmp;
1983   mp_digit *dz;
1984
1985   CHECK(z != NULL && buf != NULL && len > 0);
1986
1987   /* Figure out how many digits are needed to represent this value */
1988   need = ((len * CHAR_BIT) + (MP_DIGIT_BIT - 1)) / MP_DIGIT_BIT;
1989   if(!s_pad(z, need))
1990     return MP_MEMORY;
1991
1992   mp_int_zero(z);
1993
1994   /* If the high-order bit is set, take the 2's complement before
1995      reading the value (it will be restored afterward) */
1996   if(buf[0] >> (CHAR_BIT - 1)) {
1997     MP_SIGN(z) = MP_NEG;
1998     s_2comp(buf, len);
1999   }
2000
2001   dz = MP_DIGITS(z);
2002   for(tmp = buf, i = len; i > 0; --i, ++tmp) {
2003     s_qmul(z, (mp_size) CHAR_BIT);
2004     *dz |= *tmp;
2005   }
2006
2007   /* Restore 2's complement if we took it before */
2008   if(MP_SIGN(z) == MP_NEG)
2009     s_2comp(buf, len);
2010
2011   return MP_OK;
2012 }
2013
2014 /* }}} */
2015
2016 /* {{{ mp_int_binary_len(z) */
2017
2018 mp_result mp_int_binary_len(mp_int z)
2019 {
2020   mp_result  res = mp_int_count_bits(z);
2021   int        bytes;
2022
2023   if(res <= 0)
2024     return res;
2025
2026   bytes = (res + (CHAR_BIT - 1)) / CHAR_BIT;
2027
2028   /* If the highest-order bit falls exactly on a byte boundary, we
2029      need to pad with an extra byte so that the sign will be read
2030      correctly when reading it back in. */
2031   if(bytes * CHAR_BIT == res)
2032     ++bytes;
2033
2034   return bytes;
2035 }
2036
2037 /* }}} */
2038
2039 /* {{{ mp_int_to_unsigned(z, buf, limit) */
2040
2041 mp_result mp_int_to_unsigned(mp_int z, unsigned char *buf, int limit)
2042 {
2043   static const int NO_PADDING = 0;
2044
2045   CHECK(z != NULL && buf != NULL);
2046
2047   return s_tobin(z, buf, &limit, NO_PADDING);
2048 }
2049
2050 /* }}} */
2051
2052 /* {{{ mp_int_read_unsigned(z, buf, len) */
2053
2054 mp_result mp_int_read_unsigned(mp_int z, unsigned char *buf, int len)
2055 {
2056   mp_size need, i;
2057   unsigned char *tmp;
2058   mp_digit *dz;
2059
2060   CHECK(z != NULL && buf != NULL && len > 0);
2061
2062   /* Figure out how many digits are needed to represent this value */
2063   need = ((len * CHAR_BIT) + (MP_DIGIT_BIT - 1)) / MP_DIGIT_BIT;
2064   if(!s_pad(z, need))
2065     return MP_MEMORY;
2066
2067   mp_int_zero(z);
2068
2069   dz = MP_DIGITS(z);
2070   for(tmp = buf, i = len; i > 0; --i, ++tmp) {
2071     (void) s_qmul(z, CHAR_BIT);
2072     *dz |= *tmp;
2073   }
2074
2075   return MP_OK;
2076 }
2077
2078 /* }}} */
2079
2080 /* {{{ mp_int_unsigned_len(z) */
2081
2082 mp_result mp_int_unsigned_len(mp_int z)
2083 {
2084   mp_result  res = mp_int_count_bits(z);
2085   int        bytes;
2086
2087   if(res <= 0)
2088     return res;
2089
2090   bytes = (res + (CHAR_BIT - 1)) / CHAR_BIT;
2091
2092   return bytes;
2093 }
2094
2095 /* }}} */
2096
2097 /* {{{ mp_error_string(res) */
2098
2099 const char *mp_error_string(mp_result res)
2100 {
2101   int ix;
2102   if(res > 0)
2103     return s_unknown_err;
2104
2105   res = -res;
2106   for(ix = 0; ix < res && s_error_msg[ix] != NULL; ++ix)
2107     ;
2108
2109   if(s_error_msg[ix] != NULL)
2110     return s_error_msg[ix];
2111   else
2112     return s_unknown_err;
2113 }
2114
2115 /* }}} */
2116
2117 /*------------------------------------------------------------------------*/
2118 /* Private functions for internal use.  These make assumptions.           */
2119
2120 /* {{{ s_alloc(num) */
2121
2122 STATIC mp_digit *s_alloc(mp_size num)
2123 {
2124   mp_digit *out = malloc(num * sizeof(mp_digit));
2125
2126   assert(out != NULL); /* for debugging */
2127 #if DEBUG > 1
2128   {
2129     mp_digit v = (mp_digit) 0xdeadbeef;
2130     int      ix;
2131
2132     for(ix = 0; ix < num; ++ix)
2133       out[ix] = v;
2134   }
2135 #endif
2136
2137   return out;
2138 }
2139
2140 /* }}} */
2141
2142 /* {{{ s_realloc(old, osize, nsize) */
2143
2144 STATIC mp_digit *s_realloc(mp_digit *old, mp_size osize, mp_size nsize)
2145 {
2146 #if DEBUG > 1
2147   mp_digit *new = s_alloc(nsize);
2148   int       ix;
2149
2150   for(ix = 0; ix < nsize; ++ix)
2151     new[ix] = (mp_digit) 0xdeadbeef;
2152
2153   memcpy(new, old, osize * sizeof(mp_digit));
2154 #else
2155   mp_digit *new = realloc(old, nsize * sizeof(mp_digit));
2156
2157   assert(new != NULL); /* for debugging */
2158 #endif
2159   return new;
2160 }
2161
2162 /* }}} */
2163
2164 /* {{{ s_free(ptr) */
2165
2166 STATIC void s_free(void *ptr)
2167 {
2168   free(ptr);
2169 }
2170
2171 /* }}} */
2172
2173 /* {{{ s_pad(z, min) */
2174
2175 STATIC int      s_pad(mp_int z, mp_size min)
2176 {
2177   if(MP_ALLOC(z) < min) {
2178     mp_size nsize = ROUND_PREC(min);
2179     mp_digit *tmp;
2180
2181     if((void *)z->digits == (void *)z) {
2182       if((tmp = s_alloc(nsize)) == NULL)
2183         return 0;
2184
2185       COPY(MP_DIGITS(z), tmp, MP_USED(z));
2186     }
2187     else if((tmp = s_realloc(MP_DIGITS(z), MP_ALLOC(z), nsize)) == NULL)
2188       return 0;
2189
2190     MP_DIGITS(z) = tmp;
2191     MP_ALLOC(z) = nsize;
2192   }
2193
2194   return 1;
2195 }
2196
2197 /* }}} */
2198
2199 /* {{{ s_fake(z, value, vbuf) */
2200
2201 STATIC void      s_fake(mp_int z, mp_small value, mp_digit vbuf[])
2202 {
2203   mp_size uv = (mp_size) s_vpack(value, vbuf);
2204
2205   z->used = uv;
2206   z->alloc = MP_VALUE_DIGITS(value);
2207   z->sign = (value < 0) ? MP_NEG : MP_ZPOS;
2208   z->digits = vbuf;
2209 }
2210
2211 /* }}} */
2212
2213 /* {{{ s_cdig(da, db, len) */
2214
2215 STATIC int      s_cdig(mp_digit *da, mp_digit *db, mp_size len)
2216 {
2217   mp_digit *dat = da + len - 1, *dbt = db + len - 1;
2218
2219   for(/* */; len != 0; --len, --dat, --dbt) {
2220     if(*dat > *dbt)
2221       return 1;
2222     else if(*dat < *dbt)
2223       return -1;
2224   }
2225
2226   return 0;
2227 }
2228
2229 /* }}} */
2230
2231 /* {{{ s_vpack(v, t[]) */
2232
2233 STATIC int       s_vpack(mp_small v, mp_digit t[])
2234 {
2235   mp_usmall    uv = (mp_usmall) ((v < 0) ? -v : v);
2236   int          ndig = 0;
2237
2238   if(uv == 0)
2239     t[ndig++] = 0;
2240   else {
2241     while(uv != 0) {
2242       t[ndig++] = (mp_digit) uv;
2243       uv >>= MP_DIGIT_BIT/2;
2244       uv >>= MP_DIGIT_BIT/2;
2245     }
2246   }
2247
2248   return ndig;
2249 }
2250
2251 /* }}} */
2252
2253 /* {{{ s_ucmp(a, b) */
2254
2255 STATIC int      s_ucmp(mp_int a, mp_int b)
2256 {
2257   mp_size  ua = MP_USED(a), ub = MP_USED(b);
2258
2259   if(ua > ub)
2260     return 1;
2261   else if(ub > ua)
2262     return -1;
2263   else
2264     return s_cdig(MP_DIGITS(a), MP_DIGITS(b), ua);
2265 }
2266
2267 /* }}} */
2268
2269 /* {{{ s_vcmp(a, v) */
2270
2271 STATIC int      s_vcmp(mp_int a, mp_small v)
2272 {
2273   mp_digit     vdig[MP_VALUE_DIGITS(v)];
2274   int          ndig = 0;
2275   mp_size      ua = MP_USED(a);
2276
2277   ndig = s_vpack(v, vdig);
2278
2279   if(ua > ndig)
2280     return 1;
2281   else if(ua < ndig)
2282     return -1;
2283   else
2284     return s_cdig(MP_DIGITS(a), vdig, ndig);
2285 }
2286
2287 /* }}} */
2288
2289 /* {{{ s_uadd(da, db, dc, size_a, size_b) */
2290
2291 STATIC mp_digit s_uadd(mp_digit *da, mp_digit *db, mp_digit *dc,
2292                        mp_size size_a, mp_size size_b)
2293 {
2294   mp_size pos;
2295   mp_word w = 0;
2296
2297   /* Insure that da is the longer of the two to simplify later code */
2298   if(size_b > size_a) {
2299     SWAP(mp_digit *, da, db);
2300     SWAP(mp_size, size_a, size_b);
2301   }
2302
2303   /* Add corresponding digits until the shorter number runs out */
2304   for(pos = 0; pos < size_b; ++pos, ++da, ++db, ++dc) {
2305     w = w + (mp_word) *da + (mp_word) *db;
2306     *dc = LOWER_HALF(w);
2307     w = UPPER_HALF(w);
2308   }
2309
2310   /* Propagate carries as far as necessary */
2311   for(/* */; pos < size_a; ++pos, ++da, ++dc) {
2312     w = w + *da;
2313
2314     *dc = LOWER_HALF(w);
2315     w = UPPER_HALF(w);
2316   }
2317
2318   /* Return carry out */
2319   return (mp_digit)w;
2320 }
2321
2322 /* }}} */
2323
2324 /* {{{ s_usub(da, db, dc, size_a, size_b) */
2325
2326 STATIC void     s_usub(mp_digit *da, mp_digit *db, mp_digit *dc,
2327                        mp_size size_a, mp_size size_b)
2328 {
2329   mp_size pos;
2330   mp_word w = 0;
2331
2332   /* We assume that |a| >= |b| so this should definitely hold */
2333   assert(size_a >= size_b);
2334
2335   /* Subtract corresponding digits and propagate borrow */
2336   for(pos = 0; pos < size_b; ++pos, ++da, ++db, ++dc) {
2337     w = ((mp_word)MP_DIGIT_MAX + 1 +  /* MP_RADIX */
2338          (mp_word)*da) - w - (mp_word)*db;
2339
2340     *dc = LOWER_HALF(w);
2341     w = (UPPER_HALF(w) == 0);
2342   }
2343
2344   /* Finish the subtraction for remaining upper digits of da */
2345   for(/* */; pos < size_a; ++pos, ++da, ++dc) {
2346     w = ((mp_word)MP_DIGIT_MAX + 1 +  /* MP_RADIX */
2347          (mp_word)*da) - w;
2348
2349     *dc = LOWER_HALF(w);
2350     w = (UPPER_HALF(w) == 0);
2351   }
2352
2353   /* If there is a borrow out at the end, it violates the precondition */
2354   assert(w == 0);
2355 }
2356
2357 /* }}} */
2358
2359 /* {{{ s_kmul(da, db, dc, size_a, size_b) */
2360
2361 STATIC int       s_kmul(mp_digit *da, mp_digit *db, mp_digit *dc,
2362                         mp_size size_a, mp_size size_b)
2363 {
2364   mp_size  bot_size;
2365
2366   /* Make sure b is the smaller of the two input values */
2367   if(size_b > size_a) {
2368     SWAP(mp_digit *, da, db);
2369     SWAP(mp_size, size_a, size_b);
2370   }
2371
2372   /* Insure that the bottom is the larger half in an odd-length split;
2373      the code below relies on this being true.
2374    */
2375   bot_size = (size_a + 1) / 2;
2376
2377   /* If the values are big enough to bother with recursion, use the
2378      Karatsuba algorithm to compute the product; otherwise use the
2379      normal multiplication algorithm
2380    */
2381   if(multiply_threshold &&
2382      size_a >= multiply_threshold &&
2383      size_b > bot_size) {
2384
2385     mp_digit *t1, *t2, *t3, carry;
2386
2387     mp_digit *a_top = da + bot_size;
2388     mp_digit *b_top = db + bot_size;
2389
2390     mp_size  at_size = size_a - bot_size;
2391     mp_size  bt_size = size_b - bot_size;
2392     mp_size  buf_size = 2 * bot_size;
2393
2394     /* Do a single allocation for all three temporary buffers needed;
2395        each buffer must be big enough to hold the product of two
2396        bottom halves, and one buffer needs space for the completed
2397        product; twice the space is plenty.
2398      */
2399     if((t1 = s_alloc(4 * buf_size)) == NULL) return 0;
2400     t2 = t1 + buf_size;
2401     t3 = t2 + buf_size;
2402     ZERO(t1, 4 * buf_size);
2403
2404     /* t1 and t2 are initially used as temporaries to compute the inner product
2405        (a1 + a0)(b1 + b0) = a1b1 + a1b0 + a0b1 + a0b0
2406      */
2407     carry = s_uadd(da, a_top, t1, bot_size, at_size);      /* t1 = a1 + a0 */
2408     t1[bot_size] = carry;
2409
2410     carry = s_uadd(db, b_top, t2, bot_size, bt_size);      /* t2 = b1 + b0 */
2411     t2[bot_size] = carry;
2412
2413     (void) s_kmul(t1, t2, t3, bot_size + 1, bot_size + 1); /* t3 = t1 * t2 */
2414
2415     /* Now we'll get t1 = a0b0 and t2 = a1b1, and subtract them out so that
2416        we're left with only the pieces we want:  t3 = a1b0 + a0b1
2417      */
2418     ZERO(t1, buf_size);
2419     ZERO(t2, buf_size);
2420     (void) s_kmul(da, db, t1, bot_size, bot_size);     /* t1 = a0 * b0 */
2421     (void) s_kmul(a_top, b_top, t2, at_size, bt_size); /* t2 = a1 * b1 */
2422
2423     /* Subtract out t1 and t2 to get the inner product */
2424     s_usub(t3, t1, t3, buf_size + 2, buf_size);
2425     s_usub(t3, t2, t3, buf_size + 2, buf_size);
2426
2427     /* Assemble the output value */
2428     COPY(t1, dc, buf_size);
2429     carry = s_uadd(t3, dc + bot_size, dc + bot_size,
2430                    buf_size + 1, buf_size);
2431     assert(carry == 0);
2432
2433     carry = s_uadd(t2, dc + 2*bot_size, dc + 2*bot_size,
2434                    buf_size, buf_size);
2435     assert(carry == 0);
2436
2437     s_free(t1); /* note t2 and t3 are just internal pointers to t1 */
2438   }
2439   else {
2440     s_umul(da, db, dc, size_a, size_b);
2441   }
2442
2443   return 1;
2444 }
2445
2446 /* }}} */
2447
2448 /* {{{ s_umul(da, db, dc, size_a, size_b) */
2449
2450 STATIC void     s_umul(mp_digit *da, mp_digit *db, mp_digit *dc,
2451                        mp_size size_a, mp_size size_b)
2452 {
2453   mp_size   a, b;
2454   mp_word   w;
2455
2456   for(a = 0; a < size_a; ++a, ++dc, ++da) {
2457     mp_digit *dct = dc;
2458     mp_digit *dbt = db;
2459
2460     if(*da == 0)
2461       continue;
2462
2463     w = 0;
2464     for(b = 0; b < size_b; ++b, ++dbt, ++dct) {
2465       w = (mp_word)*da * (mp_word)*dbt + w + (mp_word)*dct;
2466
2467       *dct = LOWER_HALF(w);
2468       w = UPPER_HALF(w);
2469     }
2470
2471     *dct = (mp_digit)w;
2472   }
2473 }
2474
2475 /* }}} */
2476
2477 /* {{{ s_ksqr(da, dc, size_a) */
2478
2479 STATIC int       s_ksqr(mp_digit *da, mp_digit *dc, mp_size size_a)
2480 {
2481   if(multiply_threshold && size_a > multiply_threshold) {
2482     mp_size    bot_size = (size_a + 1) / 2;
2483     mp_digit  *a_top = da + bot_size;
2484     mp_digit  *t1, *t2, *t3, carry;
2485     mp_size    at_size = size_a - bot_size;
2486     mp_size    buf_size = 2 * bot_size;
2487
2488     if((t1 = s_alloc(4 * buf_size)) == NULL) return 0;
2489     t2 = t1 + buf_size;
2490     t3 = t2 + buf_size;
2491     ZERO(t1, 4 * buf_size);
2492
2493     (void) s_ksqr(da, t1, bot_size);    /* t1 = a0 ^ 2 */
2494     (void) s_ksqr(a_top, t2, at_size);  /* t2 = a1 ^ 2 */
2495
2496     (void) s_kmul(da, a_top, t3, bot_size, at_size);  /* t3 = a0 * a1 */
2497
2498     /* Quick multiply t3 by 2, shifting left (can't overflow) */
2499     {
2500       int     i, top = bot_size + at_size;
2501       mp_word w, save = 0;
2502
2503       for(i = 0; i < top; ++i) {
2504         w = t3[i];
2505         w = (w << 1) | save;
2506         t3[i] = LOWER_HALF(w);
2507         save = UPPER_HALF(w);
2508       }
2509       t3[i] = LOWER_HALF(save);
2510     }
2511
2512     /* Assemble the output value */
2513     COPY(t1, dc, 2 * bot_size);
2514     carry = s_uadd(t3, dc + bot_size, dc + bot_size,
2515                    buf_size + 1, buf_size);
2516     assert(carry == 0);
2517
2518     carry = s_uadd(t2, dc + 2*bot_size, dc + 2*bot_size,
2519                    buf_size, buf_size);
2520     assert(carry == 0);
2521
2522     s_free(t1); /* note that t2 and t2 are internal pointers only */
2523
2524   }
2525   else {
2526     s_usqr(da, dc, size_a);
2527   }
2528
2529   return 1;
2530 }
2531
2532 /* }}} */
2533
2534 /* {{{ s_usqr(da, dc, size_a) */
2535
2536 STATIC void      s_usqr(mp_digit *da, mp_digit *dc, mp_size size_a)
2537 {
2538   mp_size  i, j;
2539   mp_word  w;
2540
2541   for(i = 0; i < size_a; ++i, dc += 2, ++da) {
2542     mp_digit  *dct = dc, *dat = da;
2543
2544     if(*da == 0)
2545       continue;
2546
2547     /* Take care of the first digit, no rollover */
2548     w = (mp_word)*dat * (mp_word)*dat + (mp_word)*dct;
2549     *dct = LOWER_HALF(w);
2550     w = UPPER_HALF(w);
2551     ++dat; ++dct;
2552
2553     for(j = i + 1; j < size_a; ++j, ++dat, ++dct) {
2554       mp_word  t = (mp_word)*da * (mp_word)*dat;
2555       mp_word  u = w + (mp_word)*dct, ov = 0;
2556
2557       /* Check if doubling t will overflow a word */
2558       if(HIGH_BIT_SET(t))
2559         ov = 1;
2560
2561       w = t + t;
2562
2563       /* Check if adding u to w will overflow a word */
2564       if(ADD_WILL_OVERFLOW(w, u))
2565         ov = 1;
2566
2567       w += u;
2568
2569       *dct = LOWER_HALF(w);
2570       w = UPPER_HALF(w);
2571       if(ov) {
2572         w += MP_DIGIT_MAX; /* MP_RADIX */
2573         ++w;
2574       }
2575     }
2576
2577     w = w + *dct;
2578     *dct = (mp_digit)w;
2579     while((w = UPPER_HALF(w)) != 0) {
2580       ++dct; w = w + *dct;
2581       *dct = LOWER_HALF(w);
2582     }
2583
2584     assert(w == 0);
2585   }
2586 }
2587
2588 /* }}} */
2589
2590 /* {{{ s_dadd(a, b) */
2591
2592 STATIC void      s_dadd(mp_int a, mp_digit b)
2593 {
2594   mp_word   w = 0;
2595   mp_digit *da = MP_DIGITS(a);
2596   mp_size   ua = MP_USED(a);
2597
2598   w = (mp_word)*da + b;
2599   *da++ = LOWER_HALF(w);
2600   w = UPPER_HALF(w);
2601
2602   for(ua -= 1; ua > 0; --ua, ++da) {
2603     w = (mp_word)*da + w;
2604
2605     *da = LOWER_HALF(w);
2606     w = UPPER_HALF(w);
2607   }
2608
2609   if(w) {
2610     *da = (mp_digit)w;
2611     MP_USED(a) += 1;
2612   }
2613 }
2614
2615 /* }}} */
2616
2617 /* {{{ s_dmul(a, b) */
2618
2619 STATIC void      s_dmul(mp_int a, mp_digit b)
2620 {
2621   mp_word   w = 0;
2622   mp_digit *da = MP_DIGITS(a);
2623   mp_size   ua = MP_USED(a);
2624
2625   while(ua > 0) {
2626     w = (mp_word)*da * b + w;
2627     *da++ = LOWER_HALF(w);
2628     w = UPPER_HALF(w);
2629     --ua;
2630   }
2631
2632   if(w) {
2633     *da = (mp_digit)w;
2634     MP_USED(a) += 1;
2635   }
2636 }
2637
2638 /* }}} */
2639
2640 /* {{{ s_dbmul(da, b, dc, size_a) */
2641
2642 STATIC void      s_dbmul(mp_digit *da, mp_digit b, mp_digit *dc, mp_size size_a)
2643 {
2644   mp_word  w = 0;
2645
2646   while(size_a > 0) {
2647     w = (mp_word)*da++ * (mp_word)b + w;
2648
2649     *dc++ = LOWER_HALF(w);
2650     w = UPPER_HALF(w);
2651     --size_a;
2652   }
2653
2654   if(w)
2655     *dc = LOWER_HALF(w);
2656 }
2657
2658 /* }}} */
2659
2660 /* {{{ s_ddiv(da, d, dc, size_a) */
2661
2662 STATIC mp_digit  s_ddiv(mp_int a, mp_digit b)
2663 {
2664   mp_word   w = 0, qdigit;
2665   mp_size   ua = MP_USED(a);
2666   mp_digit *da = MP_DIGITS(a) + ua - 1;
2667
2668   for(/* */; ua > 0; --ua, --da) {
2669     w = (w << MP_DIGIT_BIT) | *da;
2670
2671     if(w >= b) {
2672       qdigit = w / b;
2673       w = w % b;
2674     }
2675     else {
2676       qdigit = 0;
2677     }
2678
2679     *da = (mp_digit)qdigit;
2680   }
2681
2682   CLAMP(a);
2683   return (mp_digit)w;
2684 }
2685
2686 /* }}} */
2687
2688 /* {{{ s_qdiv(z, p2) */
2689
2690 STATIC void     s_qdiv(mp_int z, mp_size p2)
2691 {
2692   mp_size ndig = p2 / MP_DIGIT_BIT, nbits = p2 % MP_DIGIT_BIT;
2693   mp_size uz = MP_USED(z);
2694
2695   if(ndig) {
2696     mp_size  mark;
2697     mp_digit *to, *from;
2698
2699     if(ndig >= uz) {
2700       mp_int_zero(z);
2701       return;
2702     }
2703
2704     to = MP_DIGITS(z); from = to + ndig;
2705
2706     for(mark = ndig; mark < uz; ++mark)
2707       *to++ = *from++;
2708
2709     MP_USED(z) = uz - ndig;
2710   }
2711
2712   if(nbits) {
2713     mp_digit d = 0, *dz, save;
2714     mp_size  up = MP_DIGIT_BIT - nbits;
2715
2716     uz = MP_USED(z);
2717     dz = MP_DIGITS(z) + uz - 1;
2718
2719     for(/* */; uz > 0; --uz, --dz) {
2720       save = *dz;
2721
2722       *dz = (*dz >> nbits) | (d << up);
2723       d = save;
2724     }
2725
2726     CLAMP(z);
2727   }
2728
2729   if(MP_USED(z) == 1 && z->digits[0] == 0)
2730     MP_SIGN(z) = MP_ZPOS;
2731 }
2732
2733 /* }}} */
2734
2735 /* {{{ s_qmod(z, p2) */
2736
2737 STATIC void     s_qmod(mp_int z, mp_size p2)
2738 {
2739   mp_size   start = p2 / MP_DIGIT_BIT + 1, rest = p2 % MP_DIGIT_BIT;
2740   mp_size   uz = MP_USED(z);
2741   mp_digit  mask = (1 << rest) - 1;
2742
2743   if(start <= uz) {
2744     MP_USED(z) = start;
2745     z->digits[start - 1] &= mask;
2746     CLAMP(z);
2747   }
2748 }
2749
2750 /* }}} */
2751
2752 /* {{{ s_qmul(z, p2) */
2753
2754 STATIC int      s_qmul(mp_int z, mp_size p2)
2755 {
2756   mp_size   uz, need, rest, extra, i;
2757   mp_digit *from, *to, d;
2758
2759   if(p2 == 0)
2760     return 1;
2761
2762   uz = MP_USED(z);
2763   need = p2 / MP_DIGIT_BIT; rest = p2 % MP_DIGIT_BIT;
2764
2765   /* Figure out if we need an extra digit at the top end; this occurs
2766      if the topmost `rest' bits of the high-order digit of z are not
2767      zero, meaning they will be shifted off the end if not preserved */
2768   extra = 0;
2769   if(rest != 0) {
2770     mp_digit *dz = MP_DIGITS(z) + uz - 1;
2771
2772     if((*dz >> (MP_DIGIT_BIT - rest)) != 0)
2773       extra = 1;
2774   }
2775
2776   if(!s_pad(z, uz + need + extra))
2777     return 0;
2778
2779   /* If we need to shift by whole digits, do that in one pass, then
2780      to back and shift by partial digits.
2781    */
2782   if(need > 0) {
2783     from = MP_DIGITS(z) + uz - 1;
2784     to = from + need;
2785
2786     for(i = 0; i < uz; ++i)
2787       *to-- = *from--;
2788
2789     ZERO(MP_DIGITS(z), need);
2790     uz += need;
2791   }
2792
2793   if(rest) {
2794     d = 0;
2795     for(i = need, from = MP_DIGITS(z) + need; i < uz; ++i, ++from) {
2796       mp_digit save = *from;
2797
2798       *from = (*from << rest) | (d >> (MP_DIGIT_BIT - rest));
2799       d = save;
2800     }
2801
2802     d >>= (MP_DIGIT_BIT - rest);
2803     if(d != 0) {
2804       *from = d;
2805       uz += extra;
2806     }
2807   }
2808
2809   MP_USED(z) = uz;
2810   CLAMP(z);
2811
2812   return 1;
2813 }
2814
2815 /* }}} */
2816
2817 /* {{{ s_qsub(z, p2) */
2818
2819 /* Compute z = 2^p2 - |z|; requires that 2^p2 >= |z|
2820    The sign of the result is always zero/positive.
2821  */
2822 STATIC int       s_qsub(mp_int z, mp_size p2)
2823 {
2824   mp_digit hi = (1 << (p2 % MP_DIGIT_BIT)), *zp;
2825   mp_size  tdig = (p2 / MP_DIGIT_BIT), pos;
2826   mp_word  w = 0;
2827
2828   if(!s_pad(z, tdig + 1))
2829     return 0;
2830
2831   for(pos = 0, zp = MP_DIGITS(z); pos < tdig; ++pos, ++zp) {
2832     w = ((mp_word) MP_DIGIT_MAX + 1) - w - (mp_word)*zp;
2833
2834     *zp = LOWER_HALF(w);
2835     w = UPPER_HALF(w) ? 0 : 1;
2836   }
2837
2838   w = ((mp_word) MP_DIGIT_MAX + 1 + hi) - w - (mp_word)*zp;
2839   *zp = LOWER_HALF(w);
2840
2841   assert(UPPER_HALF(w) != 0); /* no borrow out should be possible */
2842
2843   MP_SIGN(z) = MP_ZPOS;
2844   CLAMP(z);
2845
2846   return 1;
2847 }
2848
2849 /* }}} */
2850
2851 /* {{{ s_dp2k(z) */
2852
2853 STATIC int      s_dp2k(mp_int z)
2854 {
2855   int       k = 0;
2856   mp_digit *dp = MP_DIGITS(z), d;
2857
2858   if(MP_USED(z) == 1 && *dp == 0)
2859     return 1;
2860
2861   while(*dp == 0) {
2862     k += MP_DIGIT_BIT;
2863     ++dp;
2864   }
2865
2866   d = *dp;
2867   while((d & 1) == 0) {
2868     d >>= 1;
2869     ++k;
2870   }
2871
2872   return k;
2873 }
2874
2875 /* }}} */
2876
2877 /* {{{ s_isp2(z) */
2878
2879 STATIC int       s_isp2(mp_int z)
2880 {
2881   mp_size uz = MP_USED(z), k = 0;
2882   mp_digit *dz = MP_DIGITS(z), d;
2883
2884   while(uz > 1) {
2885     if(*dz++ != 0)
2886       return -1;
2887     k += MP_DIGIT_BIT;
2888     --uz;
2889   }
2890
2891   d = *dz;
2892   while(d > 1) {
2893     if(d & 1)
2894       return -1;
2895     ++k; d >>= 1;
2896   }
2897
2898   return (int) k;
2899 }
2900
2901 /* }}} */
2902
2903 /* {{{ s_2expt(z, k) */
2904
2905 STATIC int       s_2expt(mp_int z, mp_small k)
2906 {
2907   mp_size  ndig, rest;
2908   mp_digit *dz;
2909
2910   ndig = (k + MP_DIGIT_BIT) / MP_DIGIT_BIT;
2911   rest = k % MP_DIGIT_BIT;
2912
2913   if(!s_pad(z, ndig))
2914     return 0;
2915
2916   dz = MP_DIGITS(z);
2917   ZERO(dz, ndig);
2918   *(dz + ndig - 1) = (1 << rest);
2919   MP_USED(z) = ndig;
2920
2921   return 1;
2922 }
2923
2924 /* }}} */
2925
2926 /* {{{ s_norm(a, b) */
2927
2928 STATIC int      s_norm(mp_int a, mp_int b)
2929 {
2930   mp_digit d = b->digits[MP_USED(b) - 1];
2931   int      k = 0;
2932
2933   while(d < (mp_digit) (1 << (MP_DIGIT_BIT - 1))) { /* d < (MP_RADIX / 2) */
2934     d <<= 1;
2935     ++k;
2936   }
2937
2938   /* These multiplications can't fail */
2939   if(k != 0) {
2940     (void) s_qmul(a, (mp_size) k);
2941     (void) s_qmul(b, (mp_size) k);
2942   }
2943
2944   return k;
2945 }
2946
2947 /* }}} */
2948
2949 /* {{{ s_brmu(z, m) */
2950
2951 STATIC mp_result s_brmu(mp_int z, mp_int m)
2952 {
2953   mp_size um = MP_USED(m) * 2;
2954
2955   if(!s_pad(z, um))
2956     return MP_MEMORY;
2957
2958   s_2expt(z, MP_DIGIT_BIT * um);
2959   return mp_int_div(z, m, z, NULL);
2960 }
2961
2962 /* }}} */
2963
2964 /* {{{ s_reduce(x, m, mu, q1, q2) */
2965
2966 STATIC int       s_reduce(mp_int x, mp_int m, mp_int mu, mp_int q1, mp_int q2)
2967 {
2968   mp_size   um = MP_USED(m), umb_p1, umb_m1;
2969
2970   umb_p1 = (um + 1) * MP_DIGIT_BIT;
2971   umb_m1 = (um - 1) * MP_DIGIT_BIT;
2972
2973   if(mp_int_copy(x, q1) != MP_OK)
2974     return 0;
2975
2976   /* Compute q2 = floor((floor(x / b^(k-1)) * mu) / b^(k+1)) */
2977   s_qdiv(q1, umb_m1);
2978   UMUL(q1, mu, q2);
2979   s_qdiv(q2, umb_p1);
2980
2981   /* Set x = x mod b^(k+1) */
2982   s_qmod(x, umb_p1);
2983
2984   /* Now, q is a guess for the quotient a / m.
2985      Compute x - q * m mod b^(k+1), replacing x.  This may be off
2986      by a factor of 2m, but no more than that.
2987    */
2988   UMUL(q2, m, q1);
2989   s_qmod(q1, umb_p1);
2990   (void) mp_int_sub(x, q1, x); /* can't fail */
2991
2992   /* The result may be < 0; if it is, add b^(k+1) to pin it in the
2993      proper range. */
2994   if((CMPZ(x) < 0) && !s_qsub(x, umb_p1))
2995     return 0;
2996
2997   /* If x > m, we need to back it off until it is in range.
2998      This will be required at most twice.  */
2999   if(mp_int_compare(x, m) >= 0) {
3000     (void) mp_int_sub(x, m, x);
3001     if(mp_int_compare(x, m) >= 0)
3002       (void) mp_int_sub(x, m, x);
3003   }
3004
3005   /* At this point, x has been properly reduced. */
3006   return 1;
3007 }
3008
3009 /* }}} */
3010
3011 /* {{{ s_embar(a, b, m, mu, c) */
3012
3013 /* Perform modular exponentiation using Barrett's method, where mu is
3014    the reduction constant for m.  Assumes a < m, b > 0. */
3015 STATIC mp_result s_embar(mp_int a, mp_int b, mp_int m, mp_int mu, mp_int c)
3016 {
3017   mp_digit  *db, *dbt, umu, d;
3018   mpz_t     temp[3];
3019   mp_result res = 0;
3020   int       last = 0;
3021
3022   umu = MP_USED(mu); db = MP_DIGITS(b); dbt = db + MP_USED(b) - 1;
3023
3024   while(last < 3) {
3025     SETUP(mp_int_init_size(TEMP(last), 4 * umu), last);
3026     ZERO(MP_DIGITS(TEMP(last - 1)), MP_ALLOC(TEMP(last - 1)));
3027   }
3028
3029   (void) mp_int_set_value(c, 1);
3030
3031   /* Take care of low-order digits */
3032   while(db < dbt) {
3033     int      i;
3034
3035     for(d = *db, i = MP_DIGIT_BIT; i > 0; --i, d >>= 1) {
3036       if(d & 1) {
3037         /* The use of a second temporary avoids allocation */
3038         UMUL(c, a, TEMP(0));
3039         if(!s_reduce(TEMP(0), m, mu, TEMP(1), TEMP(2))) {
3040           res = MP_MEMORY; goto CLEANUP;
3041         }
3042         mp_int_copy(TEMP(0), c);
3043       }
3044
3045
3046       USQR(a, TEMP(0));
3047       assert(MP_SIGN(TEMP(0)) == MP_ZPOS);
3048       if(!s_reduce(TEMP(0), m, mu, TEMP(1), TEMP(2))) {
3049         res = MP_MEMORY; goto CLEANUP;
3050       }
3051       assert(MP_SIGN(TEMP(0)) == MP_ZPOS);
3052       mp_int_copy(TEMP(0), a);
3053
3054
3055     }
3056
3057     ++db;
3058   }
3059
3060   /* Take care of highest-order digit */
3061   d = *dbt;
3062   for(;;) {
3063     if(d & 1) {
3064       UMUL(c, a, TEMP(0));
3065       if(!s_reduce(TEMP(0), m, mu, TEMP(1), TEMP(2))) {
3066         res = MP_MEMORY; goto CLEANUP;
3067       }
3068       mp_int_copy(TEMP(0), c);
3069     }
3070
3071     d >>= 1;
3072     if(!d) break;
3073
3074     USQR(a, TEMP(0));
3075     if(!s_reduce(TEMP(0), m, mu, TEMP(1), TEMP(2))) {
3076       res = MP_MEMORY; goto CLEANUP;
3077     }
3078     (void) mp_int_copy(TEMP(0), a);
3079   }
3080
3081  CLEANUP:
3082   while(--last >= 0)
3083     mp_int_clear(TEMP(last));
3084
3085   return res;
3086 }
3087
3088 /* }}} */
3089
3090 /* {{{ s_udiv(a, b) */
3091
3092 /* Precondition:  a >= b and b > 0
3093    Postcondition: a' = a / b, b' = a % b
3094  */
3095 STATIC mp_result s_udiv(mp_int a, mp_int b)
3096 {
3097   mpz_t     q, r, t;
3098   mp_size   ua, ub, qpos = 0;
3099   mp_digit *da, btop;
3100   mp_result res = MP_OK;
3101   int       k, skip = 0;
3102
3103   /* Force signs to positive */
3104   MP_SIGN(a) = MP_ZPOS;
3105   MP_SIGN(b) = MP_ZPOS;
3106
3107   /* Normalize, per Knuth */
3108   k = s_norm(a, b);
3109
3110   ua = MP_USED(a); ub = MP_USED(b); btop = b->digits[ub - 1];
3111   if((res = mp_int_init_size(&q, ua)) != MP_OK) return res;
3112   if((res = mp_int_init_size(&t, ua + 1)) != MP_OK) goto CLEANUP;
3113
3114   da = MP_DIGITS(a);
3115   r.digits = da + ua - 1;  /* The contents of r are shared with a */
3116   r.used   = 1;
3117   r.sign   = MP_ZPOS;
3118   r.alloc  = MP_ALLOC(a);
3119   ZERO(t.digits, t.alloc);
3120
3121   /* Solve for quotient digits, store in q.digits in reverse order */
3122   while(r.digits >= da) {
3123     assert(qpos <= q.alloc);
3124
3125     if(s_ucmp(b, &r) > 0) {
3126       r.digits -= 1;
3127       r.used += 1;
3128
3129       if(++skip > 1 && qpos > 0)
3130         q.digits[qpos++] = 0;
3131
3132       CLAMP(&r);
3133     }
3134     else {
3135       mp_word  pfx = r.digits[r.used - 1];
3136       mp_word  qdigit;
3137
3138       if(r.used > 1 && pfx <= btop) {
3139         pfx <<= MP_DIGIT_BIT / 2;
3140         pfx <<= MP_DIGIT_BIT / 2;
3141         pfx |= r.digits[r.used - 2];
3142       }
3143
3144       qdigit = pfx / btop;
3145       if(qdigit > MP_DIGIT_MAX) {
3146         qdigit = MP_DIGIT_MAX;
3147       }
3148
3149       s_dbmul(MP_DIGITS(b), (mp_digit) qdigit, t.digits, ub);
3150       t.used = ub + 1; CLAMP(&t);
3151       while(s_ucmp(&t, &r) > 0) {
3152         --qdigit;
3153         (void) mp_int_sub(&t, b, &t); /* cannot fail */
3154       }
3155
3156       s_usub(r.digits, t.digits, r.digits, r.used, t.used);
3157       CLAMP(&r);
3158
3159       q.digits[qpos++] = (mp_digit) qdigit;
3160       ZERO(t.digits, t.used);
3161       skip = 0;
3162     }
3163   }
3164
3165   /* Put quotient digits in the correct order, and discard extra zeroes */
3166   q.used = qpos;
3167   REV(mp_digit, q.digits, qpos);
3168   CLAMP(&q);
3169
3170   /* Denormalize the remainder */
3171   CLAMP(a);
3172   if(k != 0)
3173     s_qdiv(a, k);
3174
3175   mp_int_copy(a, b);  /* ok:  0 <= r < b */
3176   mp_int_copy(&q, a); /* ok:  q <= a     */
3177
3178   mp_int_clear(&t);
3179  CLEANUP:
3180   mp_int_clear(&q);
3181   return res;
3182 }
3183
3184 /* }}} */
3185
3186 /* {{{ s_outlen(z, r) */
3187
3188 STATIC int       s_outlen(mp_int z, mp_size r)
3189 {
3190   mp_result  bits;
3191   double     raw;
3192
3193   assert(r >= MP_MIN_RADIX && r <= MP_MAX_RADIX);
3194
3195   bits = mp_int_count_bits(z);
3196   raw = (double)bits * s_log2[r];
3197
3198   return (int)(raw + 0.999999);
3199 }
3200
3201 /* }}} */
3202
3203 /* {{{ s_inlen(len, r) */
3204
3205 STATIC mp_size   s_inlen(int len, mp_size r)
3206 {
3207   double  raw = (double)len / s_log2[r];
3208   mp_size bits = (mp_size)(raw + 0.5);
3209
3210   return (mp_size)((bits + (MP_DIGIT_BIT - 1)) / MP_DIGIT_BIT);
3211 }
3212
3213 /* }}} */
3214
3215 /* {{{ s_ch2val(c, r) */
3216
3217 STATIC int       s_ch2val(char c, int r)
3218 {
3219   int out;
3220
3221   if(isdigit((unsigned char) c))
3222     out = c - '0';
3223   else if(r > 10 && isalpha((unsigned char) c))
3224     out = toupper(c) - 'A' + 10;
3225   else
3226     return -1;
3227
3228   return (out >= r) ? -1 : out;
3229 }
3230
3231 /* }}} */
3232
3233 /* {{{ s_val2ch(v, caps) */
3234
3235 STATIC char      s_val2ch(int v, int caps)
3236 {
3237   assert(v >= 0);
3238
3239   if(v < 10)
3240     return v + '0';
3241   else {
3242     char out = (v - 10) + 'a';
3243
3244     if(caps)
3245       return toupper(out);
3246     else
3247       return out;
3248   }
3249 }
3250
3251 /* }}} */
3252
3253 /* {{{ s_2comp(buf, len) */
3254
3255 STATIC void      s_2comp(unsigned char *buf, int len)
3256 {
3257   int i;
3258   unsigned short s = 1;
3259
3260   for(i = len - 1; i >= 0; --i) {
3261     unsigned char c = ~buf[i];
3262
3263     s = c + s;
3264     c = s & UCHAR_MAX;
3265     s >>= CHAR_BIT;
3266
3267     buf[i] = c;
3268   }
3269
3270   /* last carry out is ignored */
3271 }
3272
3273 /* }}} */
3274
3275 /* {{{ s_tobin(z, buf, *limpos) */
3276
3277 STATIC mp_result s_tobin(mp_int z, unsigned char *buf, int *limpos, int pad)
3278 {
3279   mp_size uz;
3280   mp_digit *dz;
3281   int pos = 0, limit = *limpos;
3282
3283   uz = MP_USED(z); dz = MP_DIGITS(z);
3284   while(uz > 0 && pos < limit) {
3285     mp_digit d = *dz++;
3286     int i;
3287
3288     for(i = sizeof(mp_digit); i > 0 && pos < limit; --i) {
3289       buf[pos++] = (unsigned char)d;
3290       d >>= CHAR_BIT;
3291
3292       /* Don't write leading zeroes */
3293       if(d == 0 && uz == 1)
3294         i = 0; /* exit loop without signaling truncation */
3295     }
3296
3297     /* Detect truncation (loop exited with pos >= limit) */
3298     if(i > 0) break;
3299
3300     --uz;
3301   }
3302
3303   if(pad != 0 && (buf[pos - 1] >> (CHAR_BIT - 1))) {
3304     if(pos < limit)
3305       buf[pos++] = 0;
3306     else
3307       uz = 1;
3308   }
3309
3310   /* Digits are in reverse order, fix that */
3311   REV(unsigned char, buf, pos);
3312
3313   /* Return the number of bytes actually written */
3314   *limpos = pos;
3315
3316   return (uz == 0) ? MP_OK : MP_TRUNC;
3317 }
3318
3319 /* }}} */
3320
3321 /* {{{ s_print(tag, z) */
3322
3323 #if DEBUG
3324 void      s_print(char *tag, mp_int z)
3325 {
3326   int  i;
3327
3328   fprintf(stderr, "%s: %c ", tag,
3329           (MP_SIGN(z) == MP_NEG) ? '-' : '+');
3330
3331   for(i = MP_USED(z) - 1; i >= 0; --i)
3332     fprintf(stderr, "%0*X", (int)(MP_DIGIT_BIT / 4), z->digits[i]);
3333
3334   fputc('\n', stderr);
3335
3336 }
3337
3338 void      s_print_buf(char *tag, mp_digit *buf, mp_size num)
3339 {
3340   int  i;
3341
3342   fprintf(stderr, "%s: ", tag);
3343
3344   for(i = num - 1; i >= 0; --i)
3345     fprintf(stderr, "%0*X", (int)(MP_DIGIT_BIT / 4), buf[i]);
3346
3347   fputc('\n', stderr);
3348 }
3349 #endif
3350
3351 /* }}} */
3352
3353 /* HERE THERE BE DRAGONS */