c語言中x*x和pow(x,2)哪個計算更快一點？

01-13

x*x*...x 和 pow(x, n)呢？

https://godbolt.org/g/1PZ8GV

如果x*x更慢，那你就可以去打死你用的編譯器的實現者了。

編程三大錯覺之首：我比編譯器聰明

據評論區要求附上二三，來源來自無數年前看見的bbs

我超越了標準庫

我能管理好內存


   The GNU C Library is free software; you can redistribute it and/or

   modify it under the terms of the GNU Lesser General Public

   License as published by the Free Software Foundation; either

   version 2.1 of the License, or (at your option) any later version.
   The GNU C Library is distributed in the hope that it will be useful,

   but WITHOUT ANY WARRANTY; without even the implied warranty of

   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public

   License along with the GNU C Library; if not, see

   &.  */
#include &

#include &
CFLOAT

M_DECL_FUNC (__cpow) (CFLOAT x, CFLOAT c)

{

  return M_SUF (__cexp) (c * M_SUF (__clog) (x));

}
declare_mgen_alias (__cpow, cpow)

#if M_LIBM_NEED_COMPAT (cpow) declare_mgen_libm_compat (__cpow, cpow) #endif

上面是gcc的glibc中，math庫對pow( )函數的實現。

在數學上就是 $x^{c} = e^{c*logx}$

比x*x複雜，但通用性強。

再看一下另一個系統的實現代碼：

/* An ultimate power routine. Given two IEEE double machine numbers y, x it computes the correctly rounded (to nearest) value of X^y. */ double SECTION __ieee754_pow (double x, double y) { double z, a, aa, error, t, a1, a2, y1, y2; mynumber u, v; int k; int4 qx, qy; v.x = y; u.x = x; if (v.i[LOW_HALF] == 0) { /* of y */ qx = u.i[HIGH_HALF] 0x7fffffff; /* Is x a NaN? */ if ((((qx == 0x7ff00000) (u.i[LOW_HALF] != 0)) || (qx &> 0x7ff00000)) (y != 0 || issignaling (x))) return x + x; if (y == 1.0) return x; if (y == 2.0) return x * x; if (y == -1.0) return 1.0 / x; if (y == 0) return 1.0; } /* else */ if (((u.i[HIGH_HALF] &> 0 u.i[HIGH_HALF] &< 0x7ff00000) || /* x&>0 and not x-&>0 */ (u.i[HIGH_HALF] == 0 u.i[LOW_HALF] != 0)) /* 2^-1023&< x&<= 2^-1023 * 0x1.0000ffffffff */ (v.i[HIGH_HALF] 0x7fffffff) &< 0x4ff00000) { /* if y&<-1 or y&>1 */ double retval;


      {

	SET_RESTORE_ROUND (FE_TONEAREST);
	/* Avoid internal underflow for tiny y.  The exact value of y does

	   not matter if |y| &<= 2**-64.  */
	if (fabs (y) &< 0x1p-64)
	  y = y &< 0 ? -0x1p-64 : 0x1p-64;
	z = log1 (x, aa, error);	/* x^y  =e^(y log (X)) */
	t = y * CN;
	y1 = t - (t - y);
	y2 = y - y1;
	t = z * CN;
	a1 = t - (t - z);
	a2 = (z - a1) + aa;
	a = y1 * a1;
	aa = y2 * a1 + y * a2;
	a1 = a + aa;
	a2 = (a - a1) + aa;
	error = error * fabs (y);
	t = __exp1 (a1, a2, 1.9e16 * error);	/* return -10 or 0 if wasn"t computed exactly */
	retval = (t &> 0) ? t : power1 (x, y);

      }
      if (isinf (retval))

	retval = huge * huge;

      else if (retval == 0)

	retval = tiny * tiny;

      else

	math_check_force_underflow_nonneg (retval);

      return retval;

    }
  if (x == 0)

    {

      if (((v.i[HIGH_HALF]  0x7fffffff) == 0x7ff00000  v.i[LOW_HALF] != 0)

	  || (v.i[HIGH_HALF]  0x7fffffff) &> 0x7ff00000)	/* NaN */

	return y + y;

      if (fabs (y) &> 1.0e20)

	return (y &> 0) ? 0 : 1.0 / 0.0;

      k = checkint (y);

      if (k == -1)

	return y &< 0 ? 1.0 / x : x;
      else
	return y &< 0 ? 1.0 / 0.0 : 0.0;	/* return 0 */
    }

  qx = u.i[HIGH_HALF]  0x7fffffff;	/*   no sign   */
  qy = v.i[HIGH_HALF]  0x7fffffff;	/*   no sign   */

  if (qx &>= 0x7ff00000  (qx &> 0x7ff00000 || u.i[LOW_HALF] != 0))	/* NaN */

    return x + y;

  if (qy &>= 0x7ff00000  (qy &> 0x7ff00000 || v.i[LOW_HALF] != 0))	/* NaN */

    return x == 1.0  !issignaling (y) ? 1.0 : y + y;
  /* if x&<0 */
  if (u.i[HIGH_HALF] &< 0)
    {
      k = checkint (y);
      if (k == 0)
	{
	  if (qy == 0x7ff00000)
	    {
	      if (x == -1.0)
		return 1.0;
	      else if (x &> -1.0)

		return v.i[HIGH_HALF] &< 0 ? INF.x : 0.0;
	      else
		return v.i[HIGH_HALF] &< 0 ? 0.0 : INF.x;
	    }
	  else if (qx == 0x7ff00000)
	    return y &< 0 ? 0.0 : INF.x;
	  return (x - x) / (x - x);	/* y not integer and x&<0 */
	}
      else if (qx == 0x7ff00000)
	{
	  if (k &< 0)
	    return y &< 0 ? nZERO.x : nINF.x;
	  else
	    return y &< 0 ? 0.0 : INF.x;
	}
      /* if y even or odd */
      if (k == 1)
	return __ieee754_pow (-x, y);
      else
	{
	  double retval;
	  {
	    SET_RESTORE_ROUND (FE_TONEAREST);
	    retval = -__ieee754_pow (-x, y);
	  }
	  if (isinf (retval))
	    retval = -huge * huge;
	  else if (retval == 0)
	    retval = -tiny * tiny;
	  return retval;
	}
    }
  /* x&>0 */
  if (qx == 0x7ff00000)		/* x= 2^-0x3ff */

    return y &> 0 ? x : 0;

if (qy &> 0x45f00000 qy &< 0x7ff00000) { if (x == 1.0) return 1.0; if (y &> 0) return (x &> 1.0) ? huge * huge : tiny * tiny; if (y &< 0) return (x &< 1.0) ? huge * huge : tiny * tiny; } if (x == 1.0) return 1.0; if (y &> 0) return (x &> 1.0) ? INF.x : 0; if (y &< 0) return (x &< 1.0) ? INF.x : 0; return 0; /* unreachable, to make the compiler happy */ }

看到

if (y == 2.0) return x * x;

了吧

裸代碼的話，pow(x,2)多一次函數調用，入棧出棧會帶來額外的性能開銷。但是開了優化之後，編譯器基本上會把這點差別給抹掉，所以你不用考慮這個。

編譯器：來來來，筆給你，你來優化。

算連乘，乘的越多越好優化。

例如，算x^10，優化演算法可以先算

x^2

然後再算

x^4

然後再算x^8

最後算x^8*x^2

一共幾次乘法？

4次

直接乘就是9次啦

如果是整型的話，肯定是*快，其它的就看你開不開優化了。

不邀自答

一個函數再怎麼的也要return對吧！

僅針對x*x 和pow（x，2）

這些基本數據類型的計算（對彙編最後都是加，是的連除法最後也要轉成加）如果沒有邏輯轉換的話（對彙編就是jump），彙編幾乎不可能不同的（現代彙編已經優化的幾乎到了極致）。

反對 @我是水軍的回答

怎麼想都是x*x快啊

@我是水軍認為 pow是定製操作可以在特定情況下比 x*x 更快

我很好奇怎麼定製pow能比x*x更優越

於是他貼出來下圖

黑人問號.jpg

imul ?? 這就是定製優化

x是1.5怎麼辦 @我是水軍認為不考慮不存在

pow2就是定製就是快

x是int 這麼說 @我是水軍認為不考慮不存在

pow2就是定製就是快

pow2連基本功能都沒完成這能叫做定製