source4/heimdal/lib/hcrypto/libtommath/bn_fast_s_mp_sqr.c

   1 #include <tommath.h>
   2 #ifdef BN_FAST_S_MP_SQR_C
   3 /* LibTomMath, multiple-precision integer library -- Tom St Denis
   4  *
   5  * LibTomMath is a library that provides multiple-precision
   6  * integer arithmetic as well as number theoretic functionality.
   7  *
   8  * The library was designed directly after the MPI library by
   9  * Michael Fromberger but has been written from scratch with
  10  * additional optimizations in place.
  11  *
  12  * The library is free for all purposes without any express
  13  * guarantee it works.
  14  *
  15  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  16  */
  17
  18 /* the jist of squaring...
  19  * you do like mult except the offset of the tmpx [one that
  20  * starts closer to zero] can't equal the offset of tmpy.
  21  * So basically you set up iy like before then you min it with
  22  * (ty-tx) so that it never happens.  You double all those
  23  * you add in the inner loop
  24
  25 After that loop you do the squares and add them in.
  26 */
  27
  28 int fast_s_mp_sqr (mp_int * a, mp_int * b)
  29 {
  30   int       olduse, res, pa, ix, iz;
  31   mp_digit   W[MP_WARRAY], *tmpx;
  32   mp_word   W1;
  33
  34   /* grow the destination as required */
  35   pa = a->used + a->used;
  36   if (b->alloc < pa) {
  37     if ((res = mp_grow (b, pa)) != MP_OKAY) {
  38       return res;
  39     }
  40   }
  41
  42   /* number of output digits to produce */
  43   W1 = 0;
  44   for (ix = 0; ix < pa; ix++) {
  45       int      tx, ty, iy;
  46       mp_word  _W;
  47       mp_digit *tmpy;
  48
  49       /* clear counter */
  50       _W = 0;
  51
  52       /* get offsets into the two bignums */
  53       ty = MIN(a->used-1, ix);
  54       tx = ix - ty;
  55
  56       /* setup temp aliases */
  57       tmpx = a->dp + tx;
  58       tmpy = a->dp + ty;
  59
  60       /* this is the number of times the loop will iterrate, essentially
  61          while (tx++ < a->used && ty-- >= 0) { ... }
  62        */
  63       iy = MIN(a->used-tx, ty+1);
  64
  65       /* now for squaring tx can never equal ty
  66        * we halve the distance since they approach at a rate of 2x
  67        * and we have to round because odd cases need to be executed
  68        */
  69       iy = MIN(iy, (ty-tx+1)>>1);
  70
  71       /* execute loop */
  72       for (iz = 0; iz < iy; iz++) {
  73          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
  74       }
  75
  76       /* double the inner product and add carry */
  77       _W = _W + _W + W1;
  78
  79       /* even columns have the square term in them */
  80       if ((ix&1) == 0) {
  81          _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
  82       }
  83
  84       /* store it */
  85       W[ix] = (mp_digit)(_W & MP_MASK);
  86
  87       /* make next carry */
  88       W1 = _W >> ((mp_word)DIGIT_BIT);
  89   }
  90
  91   /* setup dest */
  92   olduse  = b->used;
  93   b->used = a->used+a->used;
  94
  95   {
  96     mp_digit *tmpb;
  97     tmpb = b->dp;
  98     for (ix = 0; ix < pa; ix++) {
  99       *tmpb++ = W[ix] & MP_MASK;
 100     }
 101
 102     /* clear unused digits [that existed in the old copy of c] */
 103     for (; ix < olduse; ix++) {
 104       *tmpb++ = 0;
 105     }
 106   }
 107   mp_clamp (b);
 108   return MP_OKAY;
 109 }
 110 #endif
 111
 112 /* $Source: /cvs/libtom/libtommath/bn_fast_s_mp_sqr.c,v $ */
 113 /* $Revision: 1.4 $ */
 114 /* $Date: 2006/12/28 01:25:13 $ */