avx512erintrin.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
  2. This file is part of GCC.
  3. GCC is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. GCC is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. Under Section 7 of GPL version 3, you are granted additional
  12. permissions described in the GCC Runtime Library Exception, version
  13. 3.1, as published by the Free Software Foundation.
  14. You should have received a copy of the GNU General Public License and
  15. a copy of the GCC Runtime Library Exception along with this program;
  16. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  17. <http://www.gnu.org/licenses/>. */
  18. #ifndef _IMMINTRIN_H_INCLUDED
  19. #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
  20. #endif
  21. #ifndef _AVX512ERINTRIN_H_INCLUDED
  22. #define _AVX512ERINTRIN_H_INCLUDED
  23. #ifndef __AVX512ER__
  24. #pragma GCC push_options
  25. #pragma GCC target("avx512er")
  26. #define __DISABLE_AVX512ER__
  27. #endif /* __AVX512ER__ */
  28. /* Internal data types for implementing the intrinsics. */
  29. typedef double __v8df __attribute__ ((__vector_size__ (64)));
  30. typedef float __v16sf __attribute__ ((__vector_size__ (64)));
  31. /* The Intel API is flexible enough that we must allow aliasing with other
  32. vector types, and their scalar components. */
  33. typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
  34. typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
  35. typedef unsigned char __mmask8;
  36. typedef unsigned short __mmask16;
  37. #ifdef __OPTIMIZE__
  38. extern __inline __m512d
  39. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  40. _mm512_exp2a23_round_pd (__m512d __A, int __R)
  41. {
  42. __m512d __W;
  43. return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
  44. (__v8df) __W,
  45. (__mmask8) -1, __R);
  46. }
  47. extern __inline __m512d
  48. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  49. _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
  50. {
  51. return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
  52. (__v8df) __W,
  53. (__mmask8) __U, __R);
  54. }
  55. extern __inline __m512d
  56. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  57. _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
  58. {
  59. return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
  60. (__v8df) _mm512_setzero_pd (),
  61. (__mmask8) __U, __R);
  62. }
  63. extern __inline __m512
  64. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  65. _mm512_exp2a23_round_ps (__m512 __A, int __R)
  66. {
  67. __m512 __W;
  68. return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
  69. (__v16sf) __W,
  70. (__mmask16) -1, __R);
  71. }
  72. extern __inline __m512
  73. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  74. _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
  75. {
  76. return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
  77. (__v16sf) __W,
  78. (__mmask16) __U, __R);
  79. }
  80. extern __inline __m512
  81. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  82. _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
  83. {
  84. return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
  85. (__v16sf) _mm512_setzero_ps (),
  86. (__mmask16) __U, __R);
  87. }
  88. extern __inline __m512d
  89. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  90. _mm512_rcp28_round_pd (__m512d __A, int __R)
  91. {
  92. __m512d __W;
  93. return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
  94. (__v8df) __W,
  95. (__mmask8) -1, __R);
  96. }
  97. extern __inline __m512d
  98. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  99. _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
  100. {
  101. return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
  102. (__v8df) __W,
  103. (__mmask8) __U, __R);
  104. }
  105. extern __inline __m512d
  106. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  107. _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
  108. {
  109. return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
  110. (__v8df) _mm512_setzero_pd (),
  111. (__mmask8) __U, __R);
  112. }
  113. extern __inline __m512
  114. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  115. _mm512_rcp28_round_ps (__m512 __A, int __R)
  116. {
  117. __m512 __W;
  118. return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
  119. (__v16sf) __W,
  120. (__mmask16) -1, __R);
  121. }
  122. extern __inline __m512
  123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  124. _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
  125. {
  126. return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
  127. (__v16sf) __W,
  128. (__mmask16) __U, __R);
  129. }
  130. extern __inline __m512
  131. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  132. _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
  133. {
  134. return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
  135. (__v16sf) _mm512_setzero_ps (),
  136. (__mmask16) __U, __R);
  137. }
  138. extern __inline __m128d
  139. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  140. _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
  141. {
  142. return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
  143. (__v2df) __A,
  144. __R);
  145. }
  146. extern __inline __m128d
  147. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  148. _mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  149. __m128d __B, int __R)
  150. {
  151. return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
  152. (__v2df) __A,
  153. (__v2df) __W,
  154. __U,
  155. __R);
  156. }
  157. extern __inline __m128d
  158. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  159. _mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
  160. {
  161. return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
  162. (__v2df) __A,
  163. (__v2df)
  164. _mm_setzero_pd (),
  165. __U,
  166. __R);
  167. }
  168. extern __inline __m128
  169. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  170. _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
  171. {
  172. return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
  173. (__v4sf) __A,
  174. __R);
  175. }
  176. extern __inline __m128
  177. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  178. _mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  179. __m128 __B, int __R)
  180. {
  181. return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
  182. (__v4sf) __A,
  183. (__v4sf) __W,
  184. __U,
  185. __R);
  186. }
  187. extern __inline __m128
  188. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  189. _mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
  190. {
  191. return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
  192. (__v4sf) __A,
  193. (__v4sf)
  194. _mm_setzero_ps (),
  195. __U,
  196. __R);
  197. }
  198. extern __inline __m512d
  199. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  200. _mm512_rsqrt28_round_pd (__m512d __A, int __R)
  201. {
  202. __m512d __W;
  203. return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
  204. (__v8df) __W,
  205. (__mmask8) -1, __R);
  206. }
  207. extern __inline __m512d
  208. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  209. _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
  210. {
  211. return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
  212. (__v8df) __W,
  213. (__mmask8) __U, __R);
  214. }
  215. extern __inline __m512d
  216. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  217. _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
  218. {
  219. return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
  220. (__v8df) _mm512_setzero_pd (),
  221. (__mmask8) __U, __R);
  222. }
  223. extern __inline __m512
  224. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  225. _mm512_rsqrt28_round_ps (__m512 __A, int __R)
  226. {
  227. __m512 __W;
  228. return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
  229. (__v16sf) __W,
  230. (__mmask16) -1, __R);
  231. }
  232. extern __inline __m512
  233. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  234. _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
  235. {
  236. return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
  237. (__v16sf) __W,
  238. (__mmask16) __U, __R);
  239. }
  240. extern __inline __m512
  241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  242. _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
  243. {
  244. return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
  245. (__v16sf) _mm512_setzero_ps (),
  246. (__mmask16) __U, __R);
  247. }
  248. extern __inline __m128d
  249. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  250. _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
  251. {
  252. return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
  253. (__v2df) __A,
  254. __R);
  255. }
  256. extern __inline __m128d
  257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  258. _mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  259. __m128d __B, int __R)
  260. {
  261. return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
  262. (__v2df) __A,
  263. (__v2df) __W,
  264. __U,
  265. __R);
  266. }
  267. extern __inline __m128d
  268. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  269. _mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
  270. {
  271. return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
  272. (__v2df) __A,
  273. (__v2df)
  274. _mm_setzero_pd (),
  275. __U,
  276. __R);
  277. }
  278. extern __inline __m128
  279. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  280. _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
  281. {
  282. return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
  283. (__v4sf) __A,
  284. __R);
  285. }
  286. extern __inline __m128
  287. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  288. _mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  289. __m128 __B, int __R)
  290. {
  291. return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
  292. (__v4sf) __A,
  293. (__v4sf) __W,
  294. __U,
  295. __R);
  296. }
  297. extern __inline __m128
  298. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  299. _mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
  300. {
  301. return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
  302. (__v4sf) __A,
  303. (__v4sf)
  304. _mm_setzero_ps (),
  305. __U,
  306. __R);
  307. }
  308. #else
  309. #define _mm512_exp2a23_round_pd(A, C) \
  310. __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
  311. #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
  312. __builtin_ia32_exp2pd_mask(A, W, U, C)
  313. #define _mm512_maskz_exp2a23_round_pd(U, A, C) \
  314. __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
  315. #define _mm512_exp2a23_round_ps(A, C) \
  316. __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
  317. #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
  318. __builtin_ia32_exp2ps_mask(A, W, U, C)
  319. #define _mm512_maskz_exp2a23_round_ps(U, A, C) \
  320. __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
  321. #define _mm512_rcp28_round_pd(A, C) \
  322. __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
  323. #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
  324. __builtin_ia32_rcp28pd_mask(A, W, U, C)
  325. #define _mm512_maskz_rcp28_round_pd(U, A, C) \
  326. __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
  327. #define _mm512_rcp28_round_ps(A, C) \
  328. __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
  329. #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
  330. __builtin_ia32_rcp28ps_mask(A, W, U, C)
  331. #define _mm512_maskz_rcp28_round_ps(U, A, C) \
  332. __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
  333. #define _mm512_rsqrt28_round_pd(A, C) \
  334. __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
  335. #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
  336. __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
  337. #define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
  338. __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
  339. #define _mm512_rsqrt28_round_ps(A, C) \
  340. __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
  341. #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
  342. __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
  343. #define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
  344. __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
  345. #define _mm_rcp28_round_sd(A, B, R) \
  346. __builtin_ia32_rcp28sd_round(A, B, R)
  347. #define _mm_mask_rcp28_round_sd(W, U, A, B, R) \
  348. __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
  349. #define _mm_maskz_rcp28_round_sd(U, A, B, R) \
  350. __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
  351. (U), (R))
  352. #define _mm_rcp28_round_ss(A, B, R) \
  353. __builtin_ia32_rcp28ss_round(A, B, R)
  354. #define _mm_mask_rcp28_round_ss(W, U, A, B, R) \
  355. __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
  356. #define _mm_maskz_rcp28_round_ss(U, A, B, R) \
  357. __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
  358. (U), (R))
  359. #define _mm_rsqrt28_round_sd(A, B, R) \
  360. __builtin_ia32_rsqrt28sd_round(A, B, R)
  361. #define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) \
  362. __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
  363. #define _mm_maskz_rsqrt28_round_sd(U, A, B, R) \
  364. __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
  365. (U), (R))
  366. #define _mm_rsqrt28_round_ss(A, B, R) \
  367. __builtin_ia32_rsqrt28ss_round(A, B, R)
  368. #define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) \
  369. __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
  370. #define _mm_maskz_rsqrt28_round_ss(U, A, B, R) \
  371. __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
  372. (U), (R))
  373. #endif
  374. #define _mm_mask_rcp28_sd(W, U, A, B)\
  375. _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  376. #define _mm_maskz_rcp28_sd(U, A, B)\
  377. _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  378. #define _mm_mask_rcp28_ss(W, U, A, B)\
  379. _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  380. #define _mm_maskz_rcp28_ss(U, A, B)\
  381. _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  382. #define _mm_mask_rsqrt28_sd(W, U, A, B)\
  383. _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  384. #define _mm_maskz_rsqrt28_sd(U, A, B)\
  385. _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  386. #define _mm_mask_rsqrt28_ss(W, U, A, B)\
  387. _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  388. #define _mm_maskz_rsqrt28_ss(U, A, B)\
  389. _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  390. #define _mm512_exp2a23_pd(A) \
  391. _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
  392. #define _mm512_mask_exp2a23_pd(W, U, A) \
  393. _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
  394. #define _mm512_maskz_exp2a23_pd(U, A) \
  395. _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
  396. #define _mm512_exp2a23_ps(A) \
  397. _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
  398. #define _mm512_mask_exp2a23_ps(W, U, A) \
  399. _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
  400. #define _mm512_maskz_exp2a23_ps(U, A) \
  401. _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
  402. #define _mm512_rcp28_pd(A) \
  403. _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
  404. #define _mm512_mask_rcp28_pd(W, U, A) \
  405. _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
  406. #define _mm512_maskz_rcp28_pd(U, A) \
  407. _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
  408. #define _mm512_rcp28_ps(A) \
  409. _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
  410. #define _mm512_mask_rcp28_ps(W, U, A) \
  411. _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
  412. #define _mm512_maskz_rcp28_ps(U, A) \
  413. _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
  414. #define _mm512_rsqrt28_pd(A) \
  415. _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
  416. #define _mm512_mask_rsqrt28_pd(W, U, A) \
  417. _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
  418. #define _mm512_maskz_rsqrt28_pd(U, A) \
  419. _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
  420. #define _mm512_rsqrt28_ps(A) \
  421. _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
  422. #define _mm512_mask_rsqrt28_ps(W, U, A) \
  423. _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
  424. #define _mm512_maskz_rsqrt28_ps(U, A) \
  425. _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
  426. #define _mm_rcp28_sd(A, B) \
  427. __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
  428. #define _mm_rcp28_ss(A, B) \
  429. __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
  430. #define _mm_rsqrt28_sd(A, B) \
  431. __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
  432. #define _mm_rsqrt28_ss(A, B) \
  433. __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
  434. #ifdef __DISABLE_AVX512ER__
  435. #undef __DISABLE_AVX512ER__
  436. #pragma GCC pop_options
  437. #endif /* __DISABLE_AVX512ER__ */
  438. #endif /* _AVX512ERINTRIN_H_INCLUDED */