__div64_const32(): abstract out the actual 128-bit cross product code

author Nicolas Pitre <nicolas.pitre@linaro.org>

Fri, 30 Oct 2015 21:54:56 +0000 (17:54 -0400)

committer Nicolas Pitre <nicolas.pitre@linaro.org>

Mon, 16 Nov 2015 19:42:11 +0000 (14:42 -0500)
author Nicolas Pitre <nicolas.pitre@linaro.org>
Fri, 30 Oct 2015 21:54:56 +0000 (17:54 -0400)
committer Nicolas Pitre <nicolas.pitre@linaro.org>
Mon, 16 Nov 2015 19:42:11 +0000 (14:42 -0500)
diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h

index 5a1bf1aff50261773f1172e8fe27b0e7a533d2b2..408856a9aba14d72b62e4edde8e83d72e23e8eaf 100644 (file)
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -63,7 +63,7 @@
          * do the trick here).                                          \
          */                                                             \
         uint64_t ___res, ___x, ___t, ___m, ___n = (n);                  \
-       uint32_t ___p, ___bias, ___m_lo, ___m_hi, ___n_lo, ___n_hi;     \
+       uint32_t ___p, ___bias;                                         \
                                                                         \
         /* determine MSB of b */                                        \
         ___p = 1 << ilog2(___b);                                        \
@@ -138,41 +138,62 @@
          * 2) whether or not there might be an overflow in the cross    \
          *    product determined by (___m & ((1 << 63) | (1 << 31))).   \
          *                                                              \
-        * Select the best way to do (m_bias + m * n) / (p << 64).      \
+        * Select the best way to do (m_bias + m * n) / (1 << 64).      \
          * From now on there will be actual runtime code generated.     \
          */                                                             \
-                                                                       \
-       ___m_lo = ___m;                                                 \
-       ___m_hi = ___m >> 32;                                           \
-       ___n_lo = ___n;                                                 \
-       ___n_hi = ___n >> 32;                                           \
-                                                                       \
-       if (!___bias) {                                                 \
-               ___res = ((uint64_t)___m_lo * ___n_lo) >> 32;           \
-       } else if (!(___m & ((1ULL << 63) | (1ULL << 31)))) {           \
-               ___res = (___m + (uint64_t)___m_lo * ___n_lo) >> 32;    \
-       } else {                                                        \
-               ___res = ___m + (uint64_t)___m_lo * ___n_lo;            \
-               ___t = (___res < ___m) ? (1ULL << 32) : 0;              \
-               ___res = (___res >> 32) + ___t;                         \
-       }                                                               \
-                                                                       \
-       if (!(___m & ((1ULL << 63) | (1ULL << 31)))) {                  \
-               ___res += (uint64_t)___m_lo * ___n_hi;                  \
-               ___res += (uint64_t)___m_hi * ___n_lo;                  \
-               ___res >>= 32;                                          \
-       } else {                                                        \
-               ___t = ___res += (uint64_t)___m_lo * ___n_hi;           \
-               ___res += (uint64_t)___m_hi * ___n_lo;                  \
-               ___t = (___res < ___t) ? (1ULL << 32) : 0;              \
-               ___res = (___res >> 32) + ___t;                         \
-       }                                                               \
-                                                                       \
-       ___res += (uint64_t)___m_hi * ___n_hi;                          \
+       ___res = __arch_xprod_64(___m, ___n, ___bias);                  \
                                                                         \
         ___res /= ___p;                                                 \
  })
  
+#ifndef __arch_xprod_64
+/*
+ * Default C implementation for __arch_xprod_64()
+ *
+ * Prototype: uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
+ * Semantic:  retval = ((bias ? m : 0) + m * n) >> 64
+ *
+ * The product is a 128-bit value, scaled down to 64 bits.
+ * Assuming constant propagation to optimize away unused conditional code.
+ * Architectures may provide their own optimized assembly implementation.
+ */
+static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
+{
+       uint32_t m_lo = m;
+       uint32_t m_hi = m >> 32;
+       uint32_t n_lo = n;
+       uint32_t n_hi = n >> 32;
+       uint64_t res, tmp;
+
+       if (!bias) {
+               res = ((uint64_t)m_lo * n_lo) >> 32;
+       } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
+               /* there can't be any overflow here */
+               res = (m + (uint64_t)m_lo * n_lo) >> 32;
+       } else {
+               res = m + (uint64_t)m_lo * n_lo;
+               tmp = (res < m) ? (1ULL << 32) : 0;
+               res = (res >> 32) + tmp;
+       }
+
+       if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
+               /* there can't be any overflow here */
+               res += (uint64_t)m_lo * n_hi;
+               res += (uint64_t)m_hi * n_lo;
+               res >>= 32;
+       } else {
+               tmp = res += (uint64_t)m_lo * n_hi;
+               res += (uint64_t)m_hi * n_lo;
+               tmp = (res < tmp) ? (1ULL << 32) : 0;
+               res = (res >> 32) + tmp;
+       }
+
+       res += (uint64_t)m_hi * n_hi;
+
+       return res;
+}
+#endif
+
  extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
  
  /* The unnecessary pointer compare is there
author	Nicolas Pitre <nicolas.pitre@linaro.org>
	Fri, 30 Oct 2015 21:54:56 +0000 (17:54 -0400)
committer	Nicolas Pitre <nicolas.pitre@linaro.org>
	Mon, 16 Nov 2015 19:42:11 +0000 (14:42 -0500)