avx512vlintrin.h 420 KB


  1. /* Copyright (C) 2014-2022 Free Software Foundation, Inc.
  2. This file is part of GCC.
  3. GCC is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. GCC is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. Under Section 7 of GPL version 3, you are granted additional
  12. permissions described in the GCC Runtime Library Exception, version
  13. 3.1, as published by the Free Software Foundation.
  14. You should have received a copy of the GNU General Public License and
  15. a copy of the GCC Runtime Library Exception along with this program;
  16. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  17. <http://www.gnu.org/licenses/>. */
  18. #ifndef _IMMINTRIN_H_INCLUDED
  19. #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
  20. #endif
  21. #ifndef _AVX512VLINTRIN_H_INCLUDED
  22. #define _AVX512VLINTRIN_H_INCLUDED
  23. #ifndef __AVX512VL__
  24. #pragma GCC push_options
  25. #pragma GCC target("avx512vl")
  26. #define __DISABLE_AVX512VL__
  27. #endif /* __AVX512VL__ */
  28. /* Internal data types for implementing the intrinsics. */
  29. typedef unsigned int __mmask32;
  30. typedef int __v4si_u __attribute__ ((__vector_size__ (16), \
  31. __may_alias__, __aligned__ (1)));
  32. typedef int __v8si_u __attribute__ ((__vector_size__ (32), \
  33. __may_alias__, __aligned__ (1)));
  34. typedef long long __v2di_u __attribute__ ((__vector_size__ (16), \
  35. __may_alias__, __aligned__ (1)));
  36. typedef long long __v4di_u __attribute__ ((__vector_size__ (32), \
  37. __may_alias__, __aligned__ (1)));
  38. extern __inline __m256d
  39. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  40. _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
  41. {
  42. return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
  43. (__v4df) __W,
  44. (__mmask8) __U);
  45. }
  46. extern __inline __m256d
  47. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  48. _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
  49. {
  50. return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
  51. (__v4df)
  52. _mm256_setzero_pd (),
  53. (__mmask8) __U);
  54. }
  55. extern __inline __m128d
  56. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  57. _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
  58. {
  59. return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
  60. (__v2df) __W,
  61. (__mmask8) __U);
  62. }
  63. extern __inline __m128d
  64. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  65. _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
  66. {
  67. return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
  68. (__v2df)
  69. _mm_setzero_pd (),
  70. (__mmask8) __U);
  71. }
  72. extern __inline __m256d
  73. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  74. _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
  75. {
  76. return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
  77. (__v4df) __W,
  78. (__mmask8) __U);
  79. }
  80. extern __inline __m256d
  81. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  82. _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
  83. {
  84. return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
  85. (__v4df)
  86. _mm256_setzero_pd (),
  87. (__mmask8) __U);
  88. }
  89. extern __inline __m128d
  90. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  91. _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
  92. {
  93. return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
  94. (__v2df) __W,
  95. (__mmask8) __U);
  96. }
  97. extern __inline __m128d
  98. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  99. _mm_maskz_load_pd (__mmask8 __U, void const *__P)
  100. {
  101. return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
  102. (__v2df)
  103. _mm_setzero_pd (),
  104. (__mmask8) __U);
  105. }
  106. extern __inline void
  107. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  108. _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
  109. {
  110. __builtin_ia32_storeapd256_mask ((__v4df *) __P,
  111. (__v4df) __A,
  112. (__mmask8) __U);
  113. }
  114. extern __inline void
  115. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  116. _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
  117. {
  118. __builtin_ia32_storeapd128_mask ((__v2df *) __P,
  119. (__v2df) __A,
  120. (__mmask8) __U);
  121. }
  122. extern __inline __m256
  123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  124. _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
  125. {
  126. return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
  127. (__v8sf) __W,
  128. (__mmask8) __U);
  129. }
  130. extern __inline __m256
  131. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  132. _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
  133. {
  134. return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
  135. (__v8sf)
  136. _mm256_setzero_ps (),
  137. (__mmask8) __U);
  138. }
  139. extern __inline __m128
  140. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  141. _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
  142. {
  143. return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
  144. (__v4sf) __W,
  145. (__mmask8) __U);
  146. }
  147. extern __inline __m128
  148. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  149. _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
  150. {
  151. return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
  152. (__v4sf)
  153. _mm_setzero_ps (),
  154. (__mmask8) __U);
  155. }
  156. extern __inline __m256
  157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  158. _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
  159. {
  160. return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
  161. (__v8sf) __W,
  162. (__mmask8) __U);
  163. }
  164. extern __inline __m256
  165. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  166. _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
  167. {
  168. return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
  169. (__v8sf)
  170. _mm256_setzero_ps (),
  171. (__mmask8) __U);
  172. }
  173. extern __inline __m128
  174. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  175. _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
  176. {
  177. return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
  178. (__v4sf) __W,
  179. (__mmask8) __U);
  180. }
  181. extern __inline __m128
  182. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  183. _mm_maskz_load_ps (__mmask8 __U, void const *__P)
  184. {
  185. return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
  186. (__v4sf)
  187. _mm_setzero_ps (),
  188. (__mmask8) __U);
  189. }
  190. extern __inline void
  191. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  192. _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
  193. {
  194. __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
  195. (__v8sf) __A,
  196. (__mmask8) __U);
  197. }
  198. extern __inline void
  199. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  200. _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
  201. {
  202. __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
  203. (__v4sf) __A,
  204. (__mmask8) __U);
  205. }
  206. extern __inline __m256i
  207. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  208. _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  209. {
  210. return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
  211. (__v4di) __W,
  212. (__mmask8) __U);
  213. }
  214. extern __inline __m256i
  215. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  216. _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
  217. {
  218. return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
  219. (__v4di)
  220. _mm256_setzero_si256 (),
  221. (__mmask8) __U);
  222. }
  223. extern __inline __m128i
  224. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  225. _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  226. {
  227. return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
  228. (__v2di) __W,
  229. (__mmask8) __U);
  230. }
  231. extern __inline __m128i
  232. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  233. _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
  234. {
  235. return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
  236. (__v2di)
  237. _mm_setzero_si128 (),
  238. (__mmask8) __U);
  239. }
  240. extern __inline __m256i
  241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  242. _mm256_load_epi64 (void const *__P)
  243. {
  244. return (__m256i) (*(__v4di *) __P);
  245. }
  246. extern __inline __m256i
  247. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  248. _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
  249. {
  250. return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
  251. (__v4di) __W,
  252. (__mmask8)
  253. __U);
  254. }
  255. extern __inline __m256i
  256. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  257. _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
  258. {
  259. return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
  260. (__v4di)
  261. _mm256_setzero_si256 (),
  262. (__mmask8)
  263. __U);
  264. }
  265. extern __inline __m128i
  266. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  267. _mm_load_epi64 (void const *__P)
  268. {
  269. return (__m128i) (*(__v2di *) __P);
  270. }
  271. extern __inline __m128i
  272. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  273. _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
  274. {
  275. return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
  276. (__v2di) __W,
  277. (__mmask8)
  278. __U);
  279. }
  280. extern __inline __m128i
  281. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  282. _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
  283. {
  284. return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
  285. (__v2di)
  286. _mm_setzero_si128 (),
  287. (__mmask8)
  288. __U);
  289. }
  290. extern __inline void
  291. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  292. _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
  293. {
  294. __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
  295. (__v4di) __A,
  296. (__mmask8) __U);
  297. }
  298. extern __inline void
  299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  300. _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
  301. {
  302. __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
  303. (__v2di) __A,
  304. (__mmask8) __U);
  305. }
  306. extern __inline __m256i
  307. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  308. _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  309. {
  310. return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
  311. (__v8si) __W,
  312. (__mmask8) __U);
  313. }
  314. extern __inline __m256i
  315. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  316. _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
  317. {
  318. return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
  319. (__v8si)
  320. _mm256_setzero_si256 (),
  321. (__mmask8) __U);
  322. }
  323. extern __inline __m128i
  324. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  325. _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  326. {
  327. return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
  328. (__v4si) __W,
  329. (__mmask8) __U);
  330. }
  331. extern __inline __m128i
  332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  333. _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
  334. {
  335. return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
  336. (__v4si)
  337. _mm_setzero_si128 (),
  338. (__mmask8) __U);
  339. }
  340. extern __inline __m256i
  341. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  342. _mm256_load_epi32 (void const *__P)
  343. {
  344. return (__m256i) (*(__v8si *) __P);
  345. }
  346. extern __inline __m256i
  347. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  348. _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
  349. {
  350. return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
  351. (__v8si) __W,
  352. (__mmask8)
  353. __U);
  354. }
  355. extern __inline __m256i
  356. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  357. _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
  358. {
  359. return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
  360. (__v8si)
  361. _mm256_setzero_si256 (),
  362. (__mmask8)
  363. __U);
  364. }
  365. extern __inline __m128i
  366. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  367. _mm_load_epi32 (void const *__P)
  368. {
  369. return (__m128i) (*(__v4si *) __P);
  370. }
  371. extern __inline __m128i
  372. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  373. _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
  374. {
  375. return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
  376. (__v4si) __W,
  377. (__mmask8)
  378. __U);
  379. }
  380. extern __inline __m128i
  381. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  382. _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
  383. {
  384. return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
  385. (__v4si)
  386. _mm_setzero_si128 (),
  387. (__mmask8)
  388. __U);
  389. }
  390. extern __inline void
  391. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  392. _mm256_store_epi32 (void *__P, __m256i __A)
  393. {
  394. *(__v8si *) __P = (__v8si) __A;
  395. }
  396. extern __inline void
  397. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  398. _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
  399. {
  400. __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
  401. (__v8si) __A,
  402. (__mmask8) __U);
  403. }
  404. extern __inline void
  405. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  406. _mm_store_epi32 (void *__P, __m128i __A)
  407. {
  408. *(__v4si *) __P = (__v4si) __A;
  409. }
  410. extern __inline void
  411. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  412. _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
  413. {
  414. __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
  415. (__v4si) __A,
  416. (__mmask8) __U);
  417. }
  418. extern __inline __m128d
  419. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  420. _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  421. {
  422. return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
  423. (__v2df) __B,
  424. (__v2df) __W,
  425. (__mmask8) __U);
  426. }
  427. extern __inline __m128d
  428. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  429. _mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
  430. {
  431. return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
  432. (__v2df) __B,
  433. (__v2df)
  434. _mm_setzero_pd (),
  435. (__mmask8) __U);
  436. }
  437. extern __inline __m256d
  438. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  439. _mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
  440. __m256d __B)
  441. {
  442. return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
  443. (__v4df) __B,
  444. (__v4df) __W,
  445. (__mmask8) __U);
  446. }
  447. extern __inline __m256d
  448. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  449. _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
  450. {
  451. return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
  452. (__v4df) __B,
  453. (__v4df)
  454. _mm256_setzero_pd (),
  455. (__mmask8) __U);
  456. }
  457. extern __inline __m128
  458. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  459. _mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  460. {
  461. return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
  462. (__v4sf) __B,
  463. (__v4sf) __W,
  464. (__mmask8) __U);
  465. }
  466. extern __inline __m128
  467. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  468. _mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
  469. {
  470. return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
  471. (__v4sf) __B,
  472. (__v4sf)
  473. _mm_setzero_ps (),
  474. (__mmask8) __U);
  475. }
  476. extern __inline __m256
  477. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  478. _mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  479. {
  480. return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
  481. (__v8sf) __B,
  482. (__v8sf) __W,
  483. (__mmask8) __U);
  484. }
  485. extern __inline __m256
  486. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  487. _mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
  488. {
  489. return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
  490. (__v8sf) __B,
  491. (__v8sf)
  492. _mm256_setzero_ps (),
  493. (__mmask8) __U);
  494. }
  495. extern __inline __m128d
  496. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  497. _mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  498. {
  499. return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
  500. (__v2df) __B,
  501. (__v2df) __W,
  502. (__mmask8) __U);
  503. }
  504. extern __inline __m128d
  505. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  506. _mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
  507. {
  508. return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
  509. (__v2df) __B,
  510. (__v2df)
  511. _mm_setzero_pd (),
  512. (__mmask8) __U);
  513. }
  514. extern __inline __m256d
  515. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  516. _mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
  517. __m256d __B)
  518. {
  519. return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
  520. (__v4df) __B,
  521. (__v4df) __W,
  522. (__mmask8) __U);
  523. }
  524. extern __inline __m256d
  525. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  526. _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
  527. {
  528. return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
  529. (__v4df) __B,
  530. (__v4df)
  531. _mm256_setzero_pd (),
  532. (__mmask8) __U);
  533. }
  534. extern __inline __m128
  535. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  536. _mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  537. {
  538. return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
  539. (__v4sf) __B,
  540. (__v4sf) __W,
  541. (__mmask8) __U);
  542. }
  543. extern __inline __m128
  544. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  545. _mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
  546. {
  547. return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
  548. (__v4sf) __B,
  549. (__v4sf)
  550. _mm_setzero_ps (),
  551. (__mmask8) __U);
  552. }
  553. extern __inline __m256
  554. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  555. _mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  556. {
  557. return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
  558. (__v8sf) __B,
  559. (__v8sf) __W,
  560. (__mmask8) __U);
  561. }
  562. extern __inline __m256
  563. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  564. _mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
  565. {
  566. return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
  567. (__v8sf) __B,
  568. (__v8sf)
  569. _mm256_setzero_ps (),
  570. (__mmask8) __U);
  571. }
  572. extern __inline void
  573. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  574. _mm256_store_epi64 (void *__P, __m256i __A)
  575. {
  576. *(__m256i *) __P = __A;
  577. }
  578. extern __inline void
  579. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  580. _mm_store_epi64 (void *__P, __m128i __A)
  581. {
  582. *(__m128i *) __P = __A;
  583. }
  584. extern __inline __m256d
  585. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  586. _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
  587. {
  588. return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
  589. (__v4df) __W,
  590. (__mmask8) __U);
  591. }
  592. extern __inline __m256d
  593. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  594. _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
  595. {
  596. return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
  597. (__v4df)
  598. _mm256_setzero_pd (),
  599. (__mmask8) __U);
  600. }
  601. extern __inline __m128d
  602. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  603. _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
  604. {
  605. return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
  606. (__v2df) __W,
  607. (__mmask8) __U);
  608. }
  609. extern __inline __m128d
  610. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  611. _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
  612. {
  613. return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
  614. (__v2df)
  615. _mm_setzero_pd (),
  616. (__mmask8) __U);
  617. }
  618. extern __inline void
  619. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  620. _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
  621. {
  622. __builtin_ia32_storeupd256_mask ((double *) __P,
  623. (__v4df) __A,
  624. (__mmask8) __U);
  625. }
  626. extern __inline void
  627. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  628. _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
  629. {
  630. __builtin_ia32_storeupd128_mask ((double *) __P,
  631. (__v2df) __A,
  632. (__mmask8) __U);
  633. }
  634. extern __inline __m256
  635. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  636. _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
  637. {
  638. return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
  639. (__v8sf) __W,
  640. (__mmask8) __U);
  641. }
  642. extern __inline __m256
  643. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  644. _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
  645. {
  646. return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
  647. (__v8sf)
  648. _mm256_setzero_ps (),
  649. (__mmask8) __U);
  650. }
  651. extern __inline __m128
  652. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  653. _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
  654. {
  655. return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
  656. (__v4sf) __W,
  657. (__mmask8) __U);
  658. }
  659. extern __inline __m128
  660. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  661. _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
  662. {
  663. return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
  664. (__v4sf)
  665. _mm_setzero_ps (),
  666. (__mmask8) __U);
  667. }
  668. extern __inline void
  669. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  670. _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
  671. {
  672. __builtin_ia32_storeups256_mask ((float *) __P,
  673. (__v8sf) __A,
  674. (__mmask8) __U);
  675. }
  676. extern __inline void
  677. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  678. _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
  679. {
  680. __builtin_ia32_storeups128_mask ((float *) __P,
  681. (__v4sf) __A,
  682. (__mmask8) __U);
  683. }
  684. extern __inline __m256i
  685. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  686. _mm256_loadu_epi64 (void const *__P)
  687. {
  688. return (__m256i) (*(__v4di_u *) __P);
  689. }
  690. extern __inline __m256i
  691. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  692. _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
  693. {
  694. return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
  695. (__v4di) __W,
  696. (__mmask8) __U);
  697. }
  698. extern __inline __m256i
  699. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  700. _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
  701. {
  702. return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
  703. (__v4di)
  704. _mm256_setzero_si256 (),
  705. (__mmask8) __U);
  706. }
  707. extern __inline __m128i
  708. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  709. _mm_loadu_epi64 (void const *__P)
  710. {
  711. return (__m128i) (*(__v2di_u *) __P);
  712. }
  713. extern __inline __m128i
  714. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  715. _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
  716. {
  717. return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
  718. (__v2di) __W,
  719. (__mmask8) __U);
  720. }
  721. extern __inline __m128i
  722. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  723. _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
  724. {
  725. return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
  726. (__v2di)
  727. _mm_setzero_si128 (),
  728. (__mmask8) __U);
  729. }
  730. extern __inline void
  731. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  732. _mm256_storeu_epi64 (void *__P, __m256i __A)
  733. {
  734. *(__m256i_u *) __P = (__m256i_u) __A;
  735. }
  736. extern __inline void
  737. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  738. _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
  739. {
  740. __builtin_ia32_storedqudi256_mask ((long long *) __P,
  741. (__v4di) __A,
  742. (__mmask8) __U);
  743. }
  744. extern __inline void
  745. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  746. _mm_storeu_epi64 (void *__P, __m128i __A)
  747. {
  748. *(__m128i_u *) __P = (__m128i_u) __A;
  749. }
  750. extern __inline void
  751. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  752. _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
  753. {
  754. __builtin_ia32_storedqudi128_mask ((long long *) __P,
  755. (__v2di) __A,
  756. (__mmask8) __U);
  757. }
  758. extern __inline __m256i
  759. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  760. _mm256_loadu_epi32 (void const *__P)
  761. {
  762. return (__m256i) (*(__v8si_u *) __P);
  763. }
  764. extern __inline __m256i
  765. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  766. _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
  767. {
  768. return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
  769. (__v8si) __W,
  770. (__mmask8) __U);
  771. }
  772. extern __inline __m256i
  773. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  774. _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
  775. {
  776. return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
  777. (__v8si)
  778. _mm256_setzero_si256 (),
  779. (__mmask8) __U);
  780. }
  781. extern __inline __m128i
  782. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  783. _mm_loadu_epi32 (void const *__P)
  784. {
  785. return (__m128i) (*(__v4si_u *) __P);
  786. }
  787. extern __inline __m128i
  788. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  789. _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
  790. {
  791. return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
  792. (__v4si) __W,
  793. (__mmask8) __U);
  794. }
  795. extern __inline __m128i
  796. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  797. _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
  798. {
  799. return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
  800. (__v4si)
  801. _mm_setzero_si128 (),
  802. (__mmask8) __U);
  803. }
  804. extern __inline void
  805. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  806. _mm256_storeu_epi32 (void *__P, __m256i __A)
  807. {
  808. *(__m256i_u *) __P = (__m256i_u) __A;
  809. }
  810. extern __inline void
  811. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  812. _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
  813. {
  814. __builtin_ia32_storedqusi256_mask ((int *) __P,
  815. (__v8si) __A,
  816. (__mmask8) __U);
  817. }
  818. extern __inline void
  819. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  820. _mm_storeu_epi32 (void *__P, __m128i __A)
  821. {
  822. *(__m128i_u *) __P = (__m128i_u) __A;
  823. }
  824. extern __inline void
  825. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  826. _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
  827. {
  828. __builtin_ia32_storedqusi128_mask ((int *) __P,
  829. (__v4si) __A,
  830. (__mmask8) __U);
  831. }
  832. extern __inline __m256i
  833. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  834. _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  835. {
  836. return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
  837. (__v8si) __W,
  838. (__mmask8) __U);
  839. }
  840. extern __inline __m256i
  841. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  842. _mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
  843. {
  844. return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
  845. (__v8si)
  846. _mm256_setzero_si256 (),
  847. (__mmask8) __U);
  848. }
  849. extern __inline __m128i
  850. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  851. _mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  852. {
  853. return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
  854. (__v4si) __W,
  855. (__mmask8) __U);
  856. }
  857. extern __inline __m128i
  858. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  859. _mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
  860. {
  861. return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
  862. (__v4si)
  863. _mm_setzero_si128 (),
  864. (__mmask8) __U);
  865. }
  866. extern __inline __m256i
  867. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  868. _mm256_abs_epi64 (__m256i __A)
  869. {
  870. return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
  871. (__v4di)
  872. _mm256_setzero_si256 (),
  873. (__mmask8) -1);
  874. }
  875. extern __inline __m256i
  876. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  877. _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  878. {
  879. return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
  880. (__v4di) __W,
  881. (__mmask8) __U);
  882. }
  883. extern __inline __m256i
  884. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  885. _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
  886. {
  887. return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
  888. (__v4di)
  889. _mm256_setzero_si256 (),
  890. (__mmask8) __U);
  891. }
  892. extern __inline __m128i
  893. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  894. _mm_abs_epi64 (__m128i __A)
  895. {
  896. return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
  897. (__v2di)
  898. _mm_setzero_si128 (),
  899. (__mmask8) -1);
  900. }
  901. extern __inline __m128i
  902. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  903. _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  904. {
  905. return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
  906. (__v2di) __W,
  907. (__mmask8) __U);
  908. }
  909. extern __inline __m128i
  910. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  911. _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
  912. {
  913. return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
  914. (__v2di)
  915. _mm_setzero_si128 (),
  916. (__mmask8) __U);
  917. }
  918. extern __inline __m128i
  919. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  920. _mm256_cvtpd_epu32 (__m256d __A)
  921. {
  922. return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
  923. (__v4si)
  924. _mm_setzero_si128 (),
  925. (__mmask8) -1);
  926. }
  927. extern __inline __m128i
  928. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  929. _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
  930. {
  931. return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
  932. (__v4si) __W,
  933. (__mmask8) __U);
  934. }
  935. extern __inline __m128i
  936. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  937. _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
  938. {
  939. return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
  940. (__v4si)
  941. _mm_setzero_si128 (),
  942. (__mmask8) __U);
  943. }
  944. extern __inline __m128i
  945. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  946. _mm_cvtpd_epu32 (__m128d __A)
  947. {
  948. return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
  949. (__v4si)
  950. _mm_setzero_si128 (),
  951. (__mmask8) -1);
  952. }
  953. extern __inline __m128i
  954. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  955. _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
  956. {
  957. return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
  958. (__v4si) __W,
  959. (__mmask8) __U);
  960. }
  961. extern __inline __m128i
  962. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  963. _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
  964. {
  965. return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
  966. (__v4si)
  967. _mm_setzero_si128 (),
  968. (__mmask8) __U);
  969. }
  970. extern __inline __m256i
  971. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  972. _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
  973. {
  974. return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
  975. (__v8si) __W,
  976. (__mmask8) __U);
  977. }
  978. extern __inline __m256i
  979. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  980. _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
  981. {
  982. return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
  983. (__v8si)
  984. _mm256_setzero_si256 (),
  985. (__mmask8) __U);
  986. }
  987. extern __inline __m128i
  988. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  989. _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
  990. {
  991. return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
  992. (__v4si) __W,
  993. (__mmask8) __U);
  994. }
  995. extern __inline __m128i
  996. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  997. _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
  998. {
  999. return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
  1000. (__v4si)
  1001. _mm_setzero_si128 (),
  1002. (__mmask8) __U);
  1003. }
  1004. extern __inline __m256i
  1005. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1006. _mm256_cvttps_epu32 (__m256 __A)
  1007. {
  1008. return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
  1009. (__v8si)
  1010. _mm256_setzero_si256 (),
  1011. (__mmask8) -1);
  1012. }
  1013. extern __inline __m256i
  1014. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1015. _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
  1016. {
  1017. return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
  1018. (__v8si) __W,
  1019. (__mmask8) __U);
  1020. }
  1021. extern __inline __m256i
  1022. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1023. _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
  1024. {
  1025. return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
  1026. (__v8si)
  1027. _mm256_setzero_si256 (),
  1028. (__mmask8) __U);
  1029. }
  1030. extern __inline __m128i
  1031. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1032. _mm_cvttps_epu32 (__m128 __A)
  1033. {
  1034. return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
  1035. (__v4si)
  1036. _mm_setzero_si128 (),
  1037. (__mmask8) -1);
  1038. }
  1039. extern __inline __m128i
  1040. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1041. _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
  1042. {
  1043. return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
  1044. (__v4si) __W,
  1045. (__mmask8) __U);
  1046. }
  1047. extern __inline __m128i
  1048. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1049. _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
  1050. {
  1051. return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
  1052. (__v4si)
  1053. _mm_setzero_si128 (),
  1054. (__mmask8) __U);
  1055. }
  1056. extern __inline __m128i
  1057. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1058. _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
  1059. {
  1060. return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
  1061. (__v4si) __W,
  1062. (__mmask8) __U);
  1063. }
  1064. extern __inline __m128i
  1065. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1066. _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
  1067. {
  1068. return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
  1069. (__v4si)
  1070. _mm_setzero_si128 (),
  1071. (__mmask8) __U);
  1072. }
  1073. extern __inline __m128i
  1074. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1075. _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
  1076. {
  1077. return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
  1078. (__v4si) __W,
  1079. (__mmask8) __U);
  1080. }
  1081. extern __inline __m128i
  1082. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1083. _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
  1084. {
  1085. return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
  1086. (__v4si)
  1087. _mm_setzero_si128 (),
  1088. (__mmask8) __U);
  1089. }
  1090. extern __inline __m128i
  1091. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1092. _mm256_cvttpd_epu32 (__m256d __A)
  1093. {
  1094. return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
  1095. (__v4si)
  1096. _mm_setzero_si128 (),
  1097. (__mmask8) -1);
  1098. }
  1099. extern __inline __m128i
  1100. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1101. _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
  1102. {
  1103. return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
  1104. (__v4si) __W,
  1105. (__mmask8) __U);
  1106. }
  1107. extern __inline __m128i
  1108. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1109. _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
  1110. {
  1111. return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
  1112. (__v4si)
  1113. _mm_setzero_si128 (),
  1114. (__mmask8) __U);
  1115. }
  1116. extern __inline __m128i
  1117. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1118. _mm_cvttpd_epu32 (__m128d __A)
  1119. {
  1120. return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
  1121. (__v4si)
  1122. _mm_setzero_si128 (),
  1123. (__mmask8) -1);
  1124. }
  1125. extern __inline __m128i
  1126. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1127. _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
  1128. {
  1129. return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
  1130. (__v4si) __W,
  1131. (__mmask8) __U);
  1132. }
  1133. extern __inline __m128i
  1134. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1135. _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
  1136. {
  1137. return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
  1138. (__v4si)
  1139. _mm_setzero_si128 (),
  1140. (__mmask8) __U);
  1141. }
  1142. extern __inline __m128i
  1143. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1144. _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
  1145. {
  1146. return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
  1147. (__v4si) __W,
  1148. (__mmask8) __U);
  1149. }
  1150. extern __inline __m128i
  1151. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1152. _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
  1153. {
  1154. return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
  1155. (__v4si)
  1156. _mm_setzero_si128 (),
  1157. (__mmask8) __U);
  1158. }
  1159. extern __inline __m128i
  1160. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1161. _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
  1162. {
  1163. return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
  1164. (__v4si) __W,
  1165. (__mmask8) __U);
  1166. }
  1167. extern __inline __m128i
  1168. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1169. _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
  1170. {
  1171. return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
  1172. (__v4si)
  1173. _mm_setzero_si128 (),
  1174. (__mmask8) __U);
  1175. }
  1176. extern __inline __m256d
  1177. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1178. _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
  1179. {
  1180. return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
  1181. (__v4df) __W,
  1182. (__mmask8) __U);
  1183. }
  1184. extern __inline __m256d
  1185. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1186. _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
  1187. {
  1188. return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
  1189. (__v4df)
  1190. _mm256_setzero_pd (),
  1191. (__mmask8) __U);
  1192. }
  1193. extern __inline __m128d
  1194. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1195. _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
  1196. {
  1197. return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
  1198. (__v2df) __W,
  1199. (__mmask8) __U);
  1200. }
  1201. extern __inline __m128d
  1202. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1203. _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
  1204. {
  1205. return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
  1206. (__v2df)
  1207. _mm_setzero_pd (),
  1208. (__mmask8) __U);
  1209. }
  1210. extern __inline __m256d
  1211. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1212. _mm256_cvtepu32_pd (__m128i __A)
  1213. {
  1214. return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
  1215. (__v4df)
  1216. _mm256_setzero_pd (),
  1217. (__mmask8) -1);
  1218. }
  1219. extern __inline __m256d
  1220. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1221. _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
  1222. {
  1223. return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
  1224. (__v4df) __W,
  1225. (__mmask8) __U);
  1226. }
  1227. extern __inline __m256d
  1228. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1229. _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
  1230. {
  1231. return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
  1232. (__v4df)
  1233. _mm256_setzero_pd (),
  1234. (__mmask8) __U);
  1235. }
  1236. extern __inline __m128d
  1237. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1238. _mm_cvtepu32_pd (__m128i __A)
  1239. {
  1240. return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
  1241. (__v2df)
  1242. _mm_setzero_pd (),
  1243. (__mmask8) -1);
  1244. }
  1245. extern __inline __m128d
  1246. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1247. _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
  1248. {
  1249. return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
  1250. (__v2df) __W,
  1251. (__mmask8) __U);
  1252. }
  1253. extern __inline __m128d
  1254. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1255. _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
  1256. {
  1257. return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
  1258. (__v2df)
  1259. _mm_setzero_pd (),
  1260. (__mmask8) __U);
  1261. }
  1262. extern __inline __m256
  1263. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1264. _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
  1265. {
  1266. return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
  1267. (__v8sf) __W,
  1268. (__mmask8) __U);
  1269. }
  1270. extern __inline __m256
  1271. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1272. _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
  1273. {
  1274. return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
  1275. (__v8sf)
  1276. _mm256_setzero_ps (),
  1277. (__mmask8) __U);
  1278. }
  1279. extern __inline __m128
  1280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1281. _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
  1282. {
  1283. return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
  1284. (__v4sf) __W,
  1285. (__mmask8) __U);
  1286. }
  1287. extern __inline __m128
  1288. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1289. _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
  1290. {
  1291. return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
  1292. (__v4sf)
  1293. _mm_setzero_ps (),
  1294. (__mmask8) __U);
  1295. }
  1296. extern __inline __m256
  1297. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1298. _mm256_cvtepu32_ps (__m256i __A)
  1299. {
  1300. return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
  1301. (__v8sf)
  1302. _mm256_setzero_ps (),
  1303. (__mmask8) -1);
  1304. }
  1305. extern __inline __m256
  1306. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1307. _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
  1308. {
  1309. return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
  1310. (__v8sf) __W,
  1311. (__mmask8) __U);
  1312. }
  1313. extern __inline __m256
  1314. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1315. _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
  1316. {
  1317. return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
  1318. (__v8sf)
  1319. _mm256_setzero_ps (),
  1320. (__mmask8) __U);
  1321. }
  1322. extern __inline __m128
  1323. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1324. _mm_cvtepu32_ps (__m128i __A)
  1325. {
  1326. return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
  1327. (__v4sf)
  1328. _mm_setzero_ps (),
  1329. (__mmask8) -1);
  1330. }
  1331. extern __inline __m128
  1332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1333. _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
  1334. {
  1335. return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
  1336. (__v4sf) __W,
  1337. (__mmask8) __U);
  1338. }
  1339. extern __inline __m128
  1340. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1341. _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
  1342. {
  1343. return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
  1344. (__v4sf)
  1345. _mm_setzero_ps (),
  1346. (__mmask8) __U);
  1347. }
  1348. extern __inline __m256d
  1349. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1350. _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
  1351. {
  1352. return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
  1353. (__v4df) __W,
  1354. (__mmask8) __U);
  1355. }
  1356. extern __inline __m256d
  1357. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1358. _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
  1359. {
  1360. return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
  1361. (__v4df)
  1362. _mm256_setzero_pd (),
  1363. (__mmask8) __U);
  1364. }
  1365. extern __inline __m128d
  1366. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1367. _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
  1368. {
  1369. return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
  1370. (__v2df) __W,
  1371. (__mmask8) __U);
  1372. }
  1373. extern __inline __m128d
  1374. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1375. _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
  1376. {
  1377. return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
  1378. (__v2df)
  1379. _mm_setzero_pd (),
  1380. (__mmask8) __U);
  1381. }
  1382. extern __inline __m128i
  1383. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1384. _mm_cvtepi32_epi8 (__m128i __A)
  1385. {
  1386. return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
  1387. (__v16qi)
  1388. _mm_undefined_si128 (),
  1389. (__mmask8) -1);
  1390. }
  1391. extern __inline void
  1392. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1393. _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1394. {
  1395. __builtin_ia32_pmovdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M);
  1396. }
  1397. extern __inline __m128i
  1398. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1399. _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1400. {
  1401. return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
  1402. (__v16qi) __O, __M);
  1403. }
  1404. extern __inline __m128i
  1405. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1406. _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
  1407. {
  1408. return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
  1409. (__v16qi)
  1410. _mm_setzero_si128 (),
  1411. __M);
  1412. }
  1413. extern __inline __m128i
  1414. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1415. _mm256_cvtepi32_epi8 (__m256i __A)
  1416. {
  1417. return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
  1418. (__v16qi)
  1419. _mm_undefined_si128 (),
  1420. (__mmask8) -1);
  1421. }
  1422. extern __inline __m128i
  1423. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1424. _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1425. {
  1426. return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
  1427. (__v16qi) __O, __M);
  1428. }
  1429. extern __inline void
  1430. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1431. _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1432. {
  1433. __builtin_ia32_pmovdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M);
  1434. }
  1435. extern __inline __m128i
  1436. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1437. _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
  1438. {
  1439. return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
  1440. (__v16qi)
  1441. _mm_setzero_si128 (),
  1442. __M);
  1443. }
  1444. extern __inline __m128i
  1445. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1446. _mm_cvtsepi32_epi8 (__m128i __A)
  1447. {
  1448. return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
  1449. (__v16qi)
  1450. _mm_undefined_si128 (),
  1451. (__mmask8) -1);
  1452. }
  1453. extern __inline void
  1454. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1455. _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1456. {
  1457. __builtin_ia32_pmovsdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M);
  1458. }
  1459. extern __inline __m128i
  1460. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1461. _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1462. {
  1463. return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
  1464. (__v16qi) __O, __M);
  1465. }
  1466. extern __inline __m128i
  1467. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1468. _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
  1469. {
  1470. return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
  1471. (__v16qi)
  1472. _mm_setzero_si128 (),
  1473. __M);
  1474. }
  1475. extern __inline __m128i
  1476. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1477. _mm256_cvtsepi32_epi8 (__m256i __A)
  1478. {
  1479. return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
  1480. (__v16qi)
  1481. _mm_undefined_si128 (),
  1482. (__mmask8) -1);
  1483. }
  1484. extern __inline void
  1485. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1486. _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1487. {
  1488. __builtin_ia32_pmovsdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M);
  1489. }
  1490. extern __inline __m128i
  1491. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1492. _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1493. {
  1494. return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
  1495. (__v16qi) __O, __M);
  1496. }
  1497. extern __inline __m128i
  1498. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1499. _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
  1500. {
  1501. return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
  1502. (__v16qi)
  1503. _mm_setzero_si128 (),
  1504. __M);
  1505. }
  1506. extern __inline __m128i
  1507. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1508. _mm_cvtusepi32_epi8 (__m128i __A)
  1509. {
  1510. return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
  1511. (__v16qi)
  1512. _mm_undefined_si128 (),
  1513. (__mmask8) -1);
  1514. }
  1515. extern __inline void
  1516. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1517. _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1518. {
  1519. __builtin_ia32_pmovusdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M);
  1520. }
  1521. extern __inline __m128i
  1522. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1523. _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1524. {
  1525. return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
  1526. (__v16qi) __O,
  1527. __M);
  1528. }
  1529. extern __inline __m128i
  1530. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1531. _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
  1532. {
  1533. return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
  1534. (__v16qi)
  1535. _mm_setzero_si128 (),
  1536. __M);
  1537. }
  1538. extern __inline __m128i
  1539. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1540. _mm256_cvtusepi32_epi8 (__m256i __A)
  1541. {
  1542. return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
  1543. (__v16qi)
  1544. _mm_undefined_si128 (),
  1545. (__mmask8) -1);
  1546. }
  1547. extern __inline void
  1548. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1549. _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1550. {
  1551. __builtin_ia32_pmovusdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M);
  1552. }
  1553. extern __inline __m128i
  1554. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1555. _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1556. {
  1557. return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
  1558. (__v16qi) __O,
  1559. __M);
  1560. }
  1561. extern __inline __m128i
  1562. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1563. _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
  1564. {
  1565. return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
  1566. (__v16qi)
  1567. _mm_setzero_si128 (),
  1568. __M);
  1569. }
  1570. extern __inline __m128i
  1571. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1572. _mm_cvtepi32_epi16 (__m128i __A)
  1573. {
  1574. return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
  1575. (__v8hi)
  1576. _mm_setzero_si128 (),
  1577. (__mmask8) -1);
  1578. }
  1579. extern __inline void
  1580. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1581. _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  1582. {
  1583. __builtin_ia32_pmovdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M);
  1584. }
  1585. extern __inline __m128i
  1586. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1587. _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  1588. {
  1589. return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
  1590. (__v8hi) __O, __M);
  1591. }
  1592. extern __inline __m128i
  1593. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1594. _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
  1595. {
  1596. return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
  1597. (__v8hi)
  1598. _mm_setzero_si128 (),
  1599. __M);
  1600. }
  1601. extern __inline __m128i
  1602. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1603. _mm256_cvtepi32_epi16 (__m256i __A)
  1604. {
  1605. return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
  1606. (__v8hi)
  1607. _mm_setzero_si128 (),
  1608. (__mmask8) -1);
  1609. }
  1610. extern __inline void
  1611. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1612. _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  1613. {
  1614. __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
  1615. }
  1616. extern __inline __m128i
  1617. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1618. _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  1619. {
  1620. return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
  1621. (__v8hi) __O, __M);
  1622. }
  1623. extern __inline __m128i
  1624. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1625. _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
  1626. {
  1627. return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
  1628. (__v8hi)
  1629. _mm_setzero_si128 (),
  1630. __M);
  1631. }
  1632. extern __inline __m128i
  1633. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1634. _mm_cvtsepi32_epi16 (__m128i __A)
  1635. {
  1636. return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
  1637. (__v8hi)
  1638. _mm_setzero_si128 (),
  1639. (__mmask8) -1);
  1640. }
  1641. extern __inline void
  1642. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1643. _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  1644. {
  1645. __builtin_ia32_pmovsdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M);
  1646. }
  1647. extern __inline __m128i
  1648. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1649. _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  1650. {
  1651. return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
  1652. (__v8hi)__O,
  1653. __M);
  1654. }
  1655. extern __inline __m128i
  1656. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1657. _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
  1658. {
  1659. return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
  1660. (__v8hi)
  1661. _mm_setzero_si128 (),
  1662. __M);
  1663. }
  1664. extern __inline __m128i
  1665. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1666. _mm256_cvtsepi32_epi16 (__m256i __A)
  1667. {
  1668. return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
  1669. (__v8hi)
  1670. _mm_undefined_si128 (),
  1671. (__mmask8) -1);
  1672. }
  1673. extern __inline void
  1674. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1675. _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  1676. {
  1677. __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
  1678. }
  1679. extern __inline __m128i
  1680. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1681. _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  1682. {
  1683. return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
  1684. (__v8hi) __O, __M);
  1685. }
  1686. extern __inline __m128i
  1687. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1688. _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
  1689. {
  1690. return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
  1691. (__v8hi)
  1692. _mm_setzero_si128 (),
  1693. __M);
  1694. }
  1695. extern __inline __m128i
  1696. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1697. _mm_cvtusepi32_epi16 (__m128i __A)
  1698. {
  1699. return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
  1700. (__v8hi)
  1701. _mm_undefined_si128 (),
  1702. (__mmask8) -1);
  1703. }
  1704. extern __inline void
  1705. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1706. _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  1707. {
  1708. __builtin_ia32_pmovusdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M);
  1709. }
  1710. extern __inline __m128i
  1711. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1712. _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  1713. {
  1714. return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
  1715. (__v8hi) __O, __M);
  1716. }
  1717. extern __inline __m128i
  1718. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1719. _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
  1720. {
  1721. return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
  1722. (__v8hi)
  1723. _mm_setzero_si128 (),
  1724. __M);
  1725. }
  1726. extern __inline __m128i
  1727. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1728. _mm256_cvtusepi32_epi16 (__m256i __A)
  1729. {
  1730. return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
  1731. (__v8hi)
  1732. _mm_undefined_si128 (),
  1733. (__mmask8) -1);
  1734. }
  1735. extern __inline void
  1736. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1737. _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  1738. {
  1739. __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
  1740. }
  1741. extern __inline __m128i
  1742. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1743. _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  1744. {
  1745. return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
  1746. (__v8hi) __O, __M);
  1747. }
  1748. extern __inline __m128i
  1749. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1750. _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
  1751. {
  1752. return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
  1753. (__v8hi)
  1754. _mm_setzero_si128 (),
  1755. __M);
  1756. }
  1757. extern __inline __m128i
  1758. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1759. _mm_cvtepi64_epi8 (__m128i __A)
  1760. {
  1761. return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
  1762. (__v16qi)
  1763. _mm_undefined_si128 (),
  1764. (__mmask8) -1);
  1765. }
  1766. extern __inline void
  1767. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1768. _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1769. {
  1770. __builtin_ia32_pmovqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M);
  1771. }
  1772. extern __inline __m128i
  1773. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1774. _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1775. {
  1776. return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
  1777. (__v16qi) __O, __M);
  1778. }
  1779. extern __inline __m128i
  1780. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1781. _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
  1782. {
  1783. return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
  1784. (__v16qi)
  1785. _mm_setzero_si128 (),
  1786. __M);
  1787. }
  1788. extern __inline __m128i
  1789. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1790. _mm256_cvtepi64_epi8 (__m256i __A)
  1791. {
  1792. return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
  1793. (__v16qi)
  1794. _mm_undefined_si128 (),
  1795. (__mmask8) -1);
  1796. }
  1797. extern __inline void
  1798. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1799. _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1800. {
  1801. __builtin_ia32_pmovqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M);
  1802. }
  1803. extern __inline __m128i
  1804. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1805. _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1806. {
  1807. return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
  1808. (__v16qi) __O, __M);
  1809. }
  1810. extern __inline __m128i
  1811. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1812. _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
  1813. {
  1814. return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
  1815. (__v16qi)
  1816. _mm_setzero_si128 (),
  1817. __M);
  1818. }
  1819. extern __inline __m128i
  1820. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1821. _mm_cvtsepi64_epi8 (__m128i __A)
  1822. {
  1823. return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
  1824. (__v16qi)
  1825. _mm_undefined_si128 (),
  1826. (__mmask8) -1);
  1827. }
  1828. extern __inline void
  1829. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1830. _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1831. {
  1832. __builtin_ia32_pmovsqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M);
  1833. }
  1834. extern __inline __m128i
  1835. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1836. _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1837. {
  1838. return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
  1839. (__v16qi) __O, __M);
  1840. }
  1841. extern __inline __m128i
  1842. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1843. _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
  1844. {
  1845. return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
  1846. (__v16qi)
  1847. _mm_setzero_si128 (),
  1848. __M);
  1849. }
  1850. extern __inline __m128i
  1851. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1852. _mm256_cvtsepi64_epi8 (__m256i __A)
  1853. {
  1854. return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
  1855. (__v16qi)
  1856. _mm_undefined_si128 (),
  1857. (__mmask8) -1);
  1858. }
  1859. extern __inline void
  1860. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1861. _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1862. {
  1863. __builtin_ia32_pmovsqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M);
  1864. }
  1865. extern __inline __m128i
  1866. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1867. _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1868. {
  1869. return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
  1870. (__v16qi) __O, __M);
  1871. }
  1872. extern __inline __m128i
  1873. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1874. _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
  1875. {
  1876. return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
  1877. (__v16qi)
  1878. _mm_setzero_si128 (),
  1879. __M);
  1880. }
  1881. extern __inline __m128i
  1882. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1883. _mm_cvtusepi64_epi8 (__m128i __A)
  1884. {
  1885. return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
  1886. (__v16qi)
  1887. _mm_undefined_si128 (),
  1888. (__mmask8) -1);
  1889. }
  1890. extern __inline void
  1891. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1892. _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1893. {
  1894. __builtin_ia32_pmovusqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M);
  1895. }
  1896. extern __inline __m128i
  1897. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1898. _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1899. {
  1900. return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
  1901. (__v16qi) __O,
  1902. __M);
  1903. }
  1904. extern __inline __m128i
  1905. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1906. _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
  1907. {
  1908. return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
  1909. (__v16qi)
  1910. _mm_setzero_si128 (),
  1911. __M);
  1912. }
  1913. extern __inline __m128i
  1914. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1915. _mm256_cvtusepi64_epi8 (__m256i __A)
  1916. {
  1917. return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
  1918. (__v16qi)
  1919. _mm_undefined_si128 (),
  1920. (__mmask8) -1);
  1921. }
  1922. extern __inline void
  1923. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1924. _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1925. {
  1926. __builtin_ia32_pmovusqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M);
  1927. }
  1928. extern __inline __m128i
  1929. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1930. _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1931. {
  1932. return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
  1933. (__v16qi) __O,
  1934. __M);
  1935. }
  1936. extern __inline __m128i
  1937. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1938. _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
  1939. {
  1940. return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
  1941. (__v16qi)
  1942. _mm_setzero_si128 (),
  1943. __M);
  1944. }
  1945. extern __inline __m128i
  1946. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1947. _mm_cvtepi64_epi16 (__m128i __A)
  1948. {
  1949. return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
  1950. (__v8hi)
  1951. _mm_undefined_si128 (),
  1952. (__mmask8) -1);
  1953. }
  1954. extern __inline void
  1955. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1956. _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  1957. {
  1958. __builtin_ia32_pmovqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M);
  1959. }
  1960. extern __inline __m128i
  1961. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1962. _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  1963. {
  1964. return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
  1965. (__v8hi)__O,
  1966. __M);
  1967. }
  1968. extern __inline __m128i
  1969. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1970. _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
  1971. {
  1972. return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
  1973. (__v8hi)
  1974. _mm_setzero_si128 (),
  1975. __M);
  1976. }
  1977. extern __inline __m128i
  1978. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1979. _mm256_cvtepi64_epi16 (__m256i __A)
  1980. {
  1981. return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
  1982. (__v8hi)
  1983. _mm_undefined_si128 (),
  1984. (__mmask8) -1);
  1985. }
  1986. extern __inline void
  1987. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1988. _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  1989. {
  1990. __builtin_ia32_pmovqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M);
  1991. }
  1992. extern __inline __m128i
  1993. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1994. _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  1995. {
  1996. return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
  1997. (__v8hi) __O, __M);
  1998. }
  1999. extern __inline __m128i
  2000. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2001. _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
  2002. {
  2003. return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
  2004. (__v8hi)
  2005. _mm_setzero_si128 (),
  2006. __M);
  2007. }
  2008. extern __inline __m128i
  2009. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2010. _mm_cvtsepi64_epi16 (__m128i __A)
  2011. {
  2012. return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
  2013. (__v8hi)
  2014. _mm_undefined_si128 (),
  2015. (__mmask8) -1);
  2016. }
  2017. extern __inline void
  2018. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2019. _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  2020. {
  2021. __builtin_ia32_pmovsqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M);
  2022. }
  2023. extern __inline __m128i
  2024. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2025. _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  2026. {
  2027. return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
  2028. (__v8hi) __O, __M);
  2029. }
  2030. extern __inline __m128i
  2031. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2032. _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
  2033. {
  2034. return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
  2035. (__v8hi)
  2036. _mm_setzero_si128 (),
  2037. __M);
  2038. }
  2039. extern __inline __m128i
  2040. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2041. _mm256_cvtsepi64_epi16 (__m256i __A)
  2042. {
  2043. return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
  2044. (__v8hi)
  2045. _mm_undefined_si128 (),
  2046. (__mmask8) -1);
  2047. }
  2048. extern __inline void
  2049. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2050. _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  2051. {
  2052. __builtin_ia32_pmovsqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M);
  2053. }
  2054. extern __inline __m128i
  2055. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2056. _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  2057. {
  2058. return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
  2059. (__v8hi) __O, __M);
  2060. }
  2061. extern __inline __m128i
  2062. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2063. _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
  2064. {
  2065. return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
  2066. (__v8hi)
  2067. _mm_setzero_si128 (),
  2068. __M);
  2069. }
  2070. extern __inline __m128i
  2071. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2072. _mm_cvtusepi64_epi16 (__m128i __A)
  2073. {
  2074. return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
  2075. (__v8hi)
  2076. _mm_undefined_si128 (),
  2077. (__mmask8) -1);
  2078. }
  2079. extern __inline void
  2080. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2081. _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  2082. {
  2083. __builtin_ia32_pmovusqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M);
  2084. }
  2085. extern __inline __m128i
  2086. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2087. _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  2088. {
  2089. return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
  2090. (__v8hi) __O, __M);
  2091. }
  2092. extern __inline __m128i
  2093. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2094. _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
  2095. {
  2096. return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
  2097. (__v8hi)
  2098. _mm_setzero_si128 (),
  2099. __M);
  2100. }
  2101. extern __inline __m128i
  2102. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2103. _mm256_cvtusepi64_epi16 (__m256i __A)
  2104. {
  2105. return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
  2106. (__v8hi)
  2107. _mm_undefined_si128 (),
  2108. (__mmask8) -1);
  2109. }
  2110. extern __inline void
  2111. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2112. _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  2113. {
  2114. __builtin_ia32_pmovusqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M);
  2115. }
  2116. extern __inline __m128i
  2117. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2118. _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  2119. {
  2120. return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
  2121. (__v8hi) __O, __M);
  2122. }
  2123. extern __inline __m128i
  2124. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2125. _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
  2126. {
  2127. return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
  2128. (__v8hi)
  2129. _mm_setzero_si128 (),
  2130. __M);
  2131. }
  2132. extern __inline __m128i
  2133. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2134. _mm_cvtepi64_epi32 (__m128i __A)
  2135. {
  2136. return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
  2137. (__v4si)
  2138. _mm_undefined_si128 (),
  2139. (__mmask8) -1);
  2140. }
  2141. extern __inline void
  2142. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2143. _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
  2144. {
  2145. __builtin_ia32_pmovqd128mem_mask ((unsigned long long *) __P,
  2146. (__v2di) __A, __M);
  2147. }
  2148. extern __inline __m128i
  2149. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2150. _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  2151. {
  2152. return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
  2153. (__v4si) __O, __M);
  2154. }
  2155. extern __inline __m128i
  2156. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2157. _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
  2158. {
  2159. return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
  2160. (__v4si)
  2161. _mm_setzero_si128 (),
  2162. __M);
  2163. }
  2164. extern __inline __m128i
  2165. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2166. _mm256_cvtepi64_epi32 (__m256i __A)
  2167. {
  2168. return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
  2169. (__v4si)
  2170. _mm_undefined_si128 (),
  2171. (__mmask8) -1);
  2172. }
  2173. extern __inline void
  2174. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2175. _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
  2176. {
  2177. __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
  2178. }
  2179. extern __inline __m128i
  2180. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2181. _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
  2182. {
  2183. return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
  2184. (__v4si) __O, __M);
  2185. }
  2186. extern __inline __m128i
  2187. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2188. _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
  2189. {
  2190. return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
  2191. (__v4si)
  2192. _mm_setzero_si128 (),
  2193. __M);
  2194. }
  2195. extern __inline __m128i
  2196. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2197. _mm_cvtsepi64_epi32 (__m128i __A)
  2198. {
  2199. return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
  2200. (__v4si)
  2201. _mm_undefined_si128 (),
  2202. (__mmask8) -1);
  2203. }
  2204. extern __inline void
  2205. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2206. _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
  2207. {
  2208. __builtin_ia32_pmovsqd128mem_mask ((unsigned long long *) __P, (__v2di) __A, __M);
  2209. }
  2210. extern __inline __m128i
  2211. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2212. _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  2213. {
  2214. return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
  2215. (__v4si) __O, __M);
  2216. }
  2217. extern __inline __m128i
  2218. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2219. _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
  2220. {
  2221. return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
  2222. (__v4si)
  2223. _mm_setzero_si128 (),
  2224. __M);
  2225. }
  2226. extern __inline __m128i
  2227. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2228. _mm256_cvtsepi64_epi32 (__m256i __A)
  2229. {
  2230. return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
  2231. (__v4si)
  2232. _mm_undefined_si128 (),
  2233. (__mmask8) -1);
  2234. }
  2235. extern __inline void
  2236. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2237. _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
  2238. {
  2239. __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
  2240. }
  2241. extern __inline __m128i
  2242. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2243. _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
  2244. {
  2245. return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
  2246. (__v4si)__O,
  2247. __M);
  2248. }
  2249. extern __inline __m128i
  2250. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2251. _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
  2252. {
  2253. return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
  2254. (__v4si)
  2255. _mm_setzero_si128 (),
  2256. __M);
  2257. }
  2258. extern __inline __m128i
  2259. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2260. _mm_cvtusepi64_epi32 (__m128i __A)
  2261. {
  2262. return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
  2263. (__v4si)
  2264. _mm_undefined_si128 (),
  2265. (__mmask8) -1);
  2266. }
  2267. extern __inline void
  2268. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2269. _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
  2270. {
  2271. __builtin_ia32_pmovusqd128mem_mask ((unsigned long long *) __P, (__v2di) __A, __M);
  2272. }
  2273. extern __inline __m128i
  2274. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2275. _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  2276. {
  2277. return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
  2278. (__v4si) __O, __M);
  2279. }
  2280. extern __inline __m128i
  2281. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2282. _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
  2283. {
  2284. return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
  2285. (__v4si)
  2286. _mm_setzero_si128 (),
  2287. __M);
  2288. }
  2289. extern __inline __m128i
  2290. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2291. _mm256_cvtusepi64_epi32 (__m256i __A)
  2292. {
  2293. return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
  2294. (__v4si)
  2295. _mm_undefined_si128 (),
  2296. (__mmask8) -1);
  2297. }
  2298. extern __inline void
  2299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2300. _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
  2301. {
  2302. __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
  2303. }
  2304. extern __inline __m128i
  2305. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2306. _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
  2307. {
  2308. return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
  2309. (__v4si) __O, __M);
  2310. }
  2311. extern __inline __m128i
  2312. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2313. _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
  2314. {
  2315. return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
  2316. (__v4si)
  2317. _mm_setzero_si128 (),
  2318. __M);
  2319. }
  2320. extern __inline __m256
  2321. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2322. _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
  2323. {
  2324. return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
  2325. (__v8sf) __O,
  2326. __M);
  2327. }
  2328. extern __inline __m256
  2329. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2330. _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
  2331. {
  2332. return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
  2333. (__v8sf)
  2334. _mm256_setzero_ps (),
  2335. __M);
  2336. }
  2337. extern __inline __m128
  2338. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2339. _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
  2340. {
  2341. return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
  2342. (__v4sf) __O,
  2343. __M);
  2344. }
  2345. extern __inline __m128
  2346. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2347. _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
  2348. {
  2349. return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
  2350. (__v4sf)
  2351. _mm_setzero_ps (),
  2352. __M);
  2353. }
  2354. extern __inline __m256d
  2355. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2356. _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
  2357. {
  2358. return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
  2359. (__v4df) __O,
  2360. __M);
  2361. }
  2362. extern __inline __m256d
  2363. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2364. _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
  2365. {
  2366. return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
  2367. (__v4df)
  2368. _mm256_setzero_pd (),
  2369. __M);
  2370. }
  2371. extern __inline __m256i
  2372. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2373. _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
  2374. {
  2375. return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
  2376. (__v8si) __O,
  2377. __M);
  2378. }
  2379. extern __inline __m256i
  2380. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2381. _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
  2382. {
  2383. return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
  2384. (__v8si)
  2385. _mm256_setzero_si256 (),
  2386. __M);
  2387. }
  2388. extern __inline __m256i
  2389. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2390. _mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
  2391. {
  2392. return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
  2393. __M);
  2394. }
  2395. extern __inline __m256i
  2396. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2397. _mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
  2398. {
  2399. return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
  2400. (__v8si)
  2401. _mm256_setzero_si256 (),
  2402. __M);
  2403. }
  2404. extern __inline __m128i
  2405. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2406. _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  2407. {
  2408. return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
  2409. (__v4si) __O,
  2410. __M);
  2411. }
  2412. extern __inline __m128i
  2413. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2414. _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
  2415. {
  2416. return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
  2417. (__v4si)
  2418. _mm_setzero_si128 (),
  2419. __M);
  2420. }
  2421. extern __inline __m128i
  2422. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2423. _mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
  2424. {
  2425. return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
  2426. __M);
  2427. }
  2428. extern __inline __m128i
  2429. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2430. _mm_maskz_set1_epi32 (__mmask8 __M, int __A)
  2431. {
  2432. return (__m128i)
  2433. __builtin_ia32_pbroadcastd128_gpr_mask (__A,
  2434. (__v4si) _mm_setzero_si128 (),
  2435. __M);
  2436. }
  2437. extern __inline __m256i
  2438. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2439. _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
  2440. {
  2441. return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
  2442. (__v4di) __O,
  2443. __M);
  2444. }
  2445. extern __inline __m256i
  2446. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2447. _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
  2448. {
  2449. return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
  2450. (__v4di)
  2451. _mm256_setzero_si256 (),
  2452. __M);
  2453. }
  2454. extern __inline __m256i
  2455. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2456. _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
  2457. {
  2458. return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
  2459. __M);
  2460. }
  2461. extern __inline __m256i
  2462. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2463. _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
  2464. {
  2465. return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
  2466. (__v4di)
  2467. _mm256_setzero_si256 (),
  2468. __M);
  2469. }
  2470. extern __inline __m128i
  2471. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2472. _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
  2473. {
  2474. return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
  2475. (__v2di) __O,
  2476. __M);
  2477. }
  2478. extern __inline __m128i
  2479. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2480. _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
  2481. {
  2482. return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
  2483. (__v2di)
  2484. _mm_setzero_si128 (),
  2485. __M);
  2486. }
  2487. extern __inline __m128i
  2488. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2489. _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
  2490. {
  2491. return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
  2492. __M);
  2493. }
  2494. extern __inline __m128i
  2495. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2496. _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
  2497. {
  2498. return (__m128i)
  2499. __builtin_ia32_pbroadcastq128_gpr_mask (__A,
  2500. (__v2di) _mm_setzero_si128 (),
  2501. __M);
  2502. }
  2503. extern __inline __m256
  2504. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2505. _mm256_broadcast_f32x4 (__m128 __A)
  2506. {
  2507. return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
  2508. (__v8sf)_mm256_undefined_pd (),
  2509. (__mmask8) -1);
  2510. }
  2511. extern __inline __m256
  2512. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2513. _mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
  2514. {
  2515. return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
  2516. (__v8sf) __O,
  2517. __M);
  2518. }
  2519. extern __inline __m256
  2520. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2521. _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
  2522. {
  2523. return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
  2524. (__v8sf)
  2525. _mm256_setzero_ps (),
  2526. __M);
  2527. }
  2528. extern __inline __m256i
  2529. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2530. _mm256_broadcast_i32x4 (__m128i __A)
  2531. {
  2532. return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
  2533. __A,
  2534. (__v8si)_mm256_undefined_si256 (),
  2535. (__mmask8) -1);
  2536. }
  2537. extern __inline __m256i
  2538. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2539. _mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
  2540. {
  2541. return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
  2542. __A,
  2543. (__v8si)
  2544. __O, __M);
  2545. }
  2546. extern __inline __m256i
  2547. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2548. _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
  2549. {
  2550. return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
  2551. __A,
  2552. (__v8si)
  2553. _mm256_setzero_si256 (),
  2554. __M);
  2555. }
  2556. extern __inline __m256i
  2557. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2558. _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
  2559. {
  2560. return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
  2561. (__v8si) __W,
  2562. (__mmask8) __U);
  2563. }
  2564. extern __inline __m256i
  2565. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2566. _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
  2567. {
  2568. return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
  2569. (__v8si)
  2570. _mm256_setzero_si256 (),
  2571. (__mmask8) __U);
  2572. }
  2573. extern __inline __m128i
  2574. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2575. _mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  2576. {
  2577. return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
  2578. (__v4si) __W,
  2579. (__mmask8) __U);
  2580. }
  2581. extern __inline __m128i
  2582. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2583. _mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
  2584. {
  2585. return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
  2586. (__v4si)
  2587. _mm_setzero_si128 (),
  2588. (__mmask8) __U);
  2589. }
  2590. extern __inline __m256i
  2591. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2592. _mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
  2593. {
  2594. return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
  2595. (__v4di) __W,
  2596. (__mmask8) __U);
  2597. }
  2598. extern __inline __m256i
  2599. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2600. _mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
  2601. {
  2602. return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
  2603. (__v4di)
  2604. _mm256_setzero_si256 (),
  2605. (__mmask8) __U);
  2606. }
  2607. extern __inline __m128i
  2608. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2609. _mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  2610. {
  2611. return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
  2612. (__v2di) __W,
  2613. (__mmask8) __U);
  2614. }
  2615. extern __inline __m128i
  2616. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2617. _mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
  2618. {
  2619. return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
  2620. (__v2di)
  2621. _mm_setzero_si128 (),
  2622. (__mmask8) __U);
  2623. }
  2624. extern __inline __m256i
  2625. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2626. _mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
  2627. {
  2628. return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
  2629. (__v8si) __W,
  2630. (__mmask8) __U);
  2631. }
  2632. extern __inline __m256i
  2633. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2634. _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
  2635. {
  2636. return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
  2637. (__v8si)
  2638. _mm256_setzero_si256 (),
  2639. (__mmask8) __U);
  2640. }
  2641. extern __inline __m128i
  2642. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2643. _mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  2644. {
  2645. return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
  2646. (__v4si) __W,
  2647. (__mmask8) __U);
  2648. }
  2649. extern __inline __m128i
  2650. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2651. _mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
  2652. {
  2653. return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
  2654. (__v4si)
  2655. _mm_setzero_si128 (),
  2656. (__mmask8) __U);
  2657. }
  2658. extern __inline __m256i
  2659. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2660. _mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
  2661. {
  2662. return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
  2663. (__v4di) __W,
  2664. (__mmask8) __U);
  2665. }
  2666. extern __inline __m256i
  2667. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2668. _mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
  2669. {
  2670. return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
  2671. (__v4di)
  2672. _mm256_setzero_si256 (),
  2673. (__mmask8) __U);
  2674. }
  2675. extern __inline __m128i
  2676. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2677. _mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  2678. {
  2679. return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
  2680. (__v2di) __W,
  2681. (__mmask8) __U);
  2682. }
  2683. extern __inline __m128i
  2684. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2685. _mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
  2686. {
  2687. return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
  2688. (__v2di)
  2689. _mm_setzero_si128 (),
  2690. (__mmask8) __U);
  2691. }
  2692. extern __inline __m256i
  2693. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2694. _mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
  2695. {
  2696. return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
  2697. (__v4di) __W,
  2698. (__mmask8) __U);
  2699. }
  2700. extern __inline __m256i
  2701. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2702. _mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
  2703. {
  2704. return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
  2705. (__v4di)
  2706. _mm256_setzero_si256 (),
  2707. (__mmask8) __U);
  2708. }
  2709. extern __inline __m128i
  2710. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2711. _mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
  2712. {
  2713. return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
  2714. (__v2di) __W,
  2715. (__mmask8) __U);
  2716. }
  2717. extern __inline __m128i
  2718. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2719. _mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
  2720. {
  2721. return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
  2722. (__v2di)
  2723. _mm_setzero_si128 (),
  2724. (__mmask8) __U);
  2725. }
  2726. extern __inline __m256i
  2727. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2728. _mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
  2729. {
  2730. return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
  2731. (__v8si) __W,
  2732. (__mmask8) __U);
  2733. }
  2734. extern __inline __m256i
  2735. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2736. _mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
  2737. {
  2738. return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
  2739. (__v8si)
  2740. _mm256_setzero_si256 (),
  2741. (__mmask8) __U);
  2742. }
  2743. extern __inline __m128i
  2744. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2745. _mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  2746. {
  2747. return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
  2748. (__v4si) __W,
  2749. (__mmask8) __U);
  2750. }
  2751. extern __inline __m128i
  2752. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2753. _mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
  2754. {
  2755. return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
  2756. (__v4si)
  2757. _mm_setzero_si128 (),
  2758. (__mmask8) __U);
  2759. }
  2760. extern __inline __m256i
  2761. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2762. _mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
  2763. {
  2764. return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
  2765. (__v4di) __W,
  2766. (__mmask8) __U);
  2767. }
  2768. extern __inline __m256i
  2769. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2770. _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
  2771. {
  2772. return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
  2773. (__v4di)
  2774. _mm256_setzero_si256 (),
  2775. (__mmask8) __U);
  2776. }
  2777. extern __inline __m128i
  2778. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2779. _mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  2780. {
  2781. return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
  2782. (__v2di) __W,
  2783. (__mmask8) __U);
  2784. }
  2785. extern __inline __m128i
  2786. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2787. _mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
  2788. {
  2789. return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
  2790. (__v2di)
  2791. _mm_setzero_si128 (),
  2792. (__mmask8) __U);
  2793. }
  2794. extern __inline __m256i
  2795. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2796. _mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
  2797. {
  2798. return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
  2799. (__v8si) __W,
  2800. (__mmask8) __U);
  2801. }
  2802. extern __inline __m256i
  2803. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2804. _mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
  2805. {
  2806. return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
  2807. (__v8si)
  2808. _mm256_setzero_si256 (),
  2809. (__mmask8) __U);
  2810. }
  2811. extern __inline __m128i
  2812. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2813. _mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  2814. {
  2815. return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
  2816. (__v4si) __W,
  2817. (__mmask8) __U);
  2818. }
  2819. extern __inline __m128i
  2820. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2821. _mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
  2822. {
  2823. return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
  2824. (__v4si)
  2825. _mm_setzero_si128 (),
  2826. (__mmask8) __U);
  2827. }
  2828. extern __inline __m256i
  2829. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2830. _mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
  2831. {
  2832. return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
  2833. (__v4di) __W,
  2834. (__mmask8) __U);
  2835. }
  2836. extern __inline __m256i
  2837. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2838. _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
  2839. {
  2840. return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
  2841. (__v4di)
  2842. _mm256_setzero_si256 (),
  2843. (__mmask8) __U);
  2844. }
  2845. extern __inline __m128i
  2846. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2847. _mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  2848. {
  2849. return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
  2850. (__v2di) __W,
  2851. (__mmask8) __U);
  2852. }
  2853. extern __inline __m128i
  2854. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2855. _mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
  2856. {
  2857. return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
  2858. (__v2di)
  2859. _mm_setzero_si128 (),
  2860. (__mmask8) __U);
  2861. }
  2862. extern __inline __m256i
  2863. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2864. _mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
  2865. {
  2866. return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
  2867. (__v4di) __W,
  2868. (__mmask8) __U);
  2869. }
  2870. extern __inline __m256i
  2871. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2872. _mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
  2873. {
  2874. return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
  2875. (__v4di)
  2876. _mm256_setzero_si256 (),
  2877. (__mmask8) __U);
  2878. }
  2879. extern __inline __m128i
  2880. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2881. _mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
  2882. {
  2883. return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
  2884. (__v2di) __W,
  2885. (__mmask8) __U);
  2886. }
  2887. extern __inline __m128i
  2888. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2889. _mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
  2890. {
  2891. return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
  2892. (__v2di)
  2893. _mm_setzero_si128 (),
  2894. (__mmask8) __U);
  2895. }
  2896. extern __inline __m256d
  2897. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2898. _mm256_rcp14_pd (__m256d __A)
  2899. {
  2900. return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
  2901. (__v4df)
  2902. _mm256_setzero_pd (),
  2903. (__mmask8) -1);
  2904. }
  2905. extern __inline __m256d
  2906. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2907. _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
  2908. {
  2909. return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
  2910. (__v4df) __W,
  2911. (__mmask8) __U);
  2912. }
  2913. extern __inline __m256d
  2914. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2915. _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
  2916. {
  2917. return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
  2918. (__v4df)
  2919. _mm256_setzero_pd (),
  2920. (__mmask8) __U);
  2921. }
  2922. extern __inline __m128d
  2923. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2924. _mm_rcp14_pd (__m128d __A)
  2925. {
  2926. return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
  2927. (__v2df)
  2928. _mm_setzero_pd (),
  2929. (__mmask8) -1);
  2930. }
  2931. extern __inline __m128d
  2932. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2933. _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
  2934. {
  2935. return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
  2936. (__v2df) __W,
  2937. (__mmask8) __U);
  2938. }
  2939. extern __inline __m128d
  2940. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2941. _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
  2942. {
  2943. return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
  2944. (__v2df)
  2945. _mm_setzero_pd (),
  2946. (__mmask8) __U);
  2947. }
  2948. extern __inline __m256
  2949. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2950. _mm256_rcp14_ps (__m256 __A)
  2951. {
  2952. return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
  2953. (__v8sf)
  2954. _mm256_setzero_ps (),
  2955. (__mmask8) -1);
  2956. }
  2957. extern __inline __m256
  2958. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2959. _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
  2960. {
  2961. return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
  2962. (__v8sf) __W,
  2963. (__mmask8) __U);
  2964. }
  2965. extern __inline __m256
  2966. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2967. _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
  2968. {
  2969. return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
  2970. (__v8sf)
  2971. _mm256_setzero_ps (),
  2972. (__mmask8) __U);
  2973. }
  2974. extern __inline __m128
  2975. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2976. _mm_rcp14_ps (__m128 __A)
  2977. {
  2978. return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
  2979. (__v4sf)
  2980. _mm_setzero_ps (),
  2981. (__mmask8) -1);
  2982. }
  2983. extern __inline __m128
  2984. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2985. _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
  2986. {
  2987. return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
  2988. (__v4sf) __W,
  2989. (__mmask8) __U);
  2990. }
  2991. extern __inline __m128
  2992. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2993. _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
  2994. {
  2995. return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
  2996. (__v4sf)
  2997. _mm_setzero_ps (),
  2998. (__mmask8) __U);
  2999. }
  3000. extern __inline __m256d
  3001. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3002. _mm256_rsqrt14_pd (__m256d __A)
  3003. {
  3004. return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
  3005. (__v4df)
  3006. _mm256_setzero_pd (),
  3007. (__mmask8) -1);
  3008. }
  3009. extern __inline __m256d
  3010. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3011. _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
  3012. {
  3013. return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
  3014. (__v4df) __W,
  3015. (__mmask8) __U);
  3016. }
  3017. extern __inline __m256d
  3018. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3019. _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
  3020. {
  3021. return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
  3022. (__v4df)
  3023. _mm256_setzero_pd (),
  3024. (__mmask8) __U);
  3025. }
  3026. extern __inline __m128d
  3027. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3028. _mm_rsqrt14_pd (__m128d __A)
  3029. {
  3030. return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
  3031. (__v2df)
  3032. _mm_setzero_pd (),
  3033. (__mmask8) -1);
  3034. }
  3035. extern __inline __m128d
  3036. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3037. _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
  3038. {
  3039. return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
  3040. (__v2df) __W,
  3041. (__mmask8) __U);
  3042. }
  3043. extern __inline __m128d
  3044. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3045. _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
  3046. {
  3047. return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
  3048. (__v2df)
  3049. _mm_setzero_pd (),
  3050. (__mmask8) __U);
  3051. }
  3052. extern __inline __m256
  3053. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3054. _mm256_rsqrt14_ps (__m256 __A)
  3055. {
  3056. return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
  3057. (__v8sf)
  3058. _mm256_setzero_ps (),
  3059. (__mmask8) -1);
  3060. }
  3061. extern __inline __m256
  3062. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3063. _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
  3064. {
  3065. return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
  3066. (__v8sf) __W,
  3067. (__mmask8) __U);
  3068. }
  3069. extern __inline __m256
  3070. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3071. _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
  3072. {
  3073. return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
  3074. (__v8sf)
  3075. _mm256_setzero_ps (),
  3076. (__mmask8) __U);
  3077. }
  3078. extern __inline __m128
  3079. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3080. _mm_rsqrt14_ps (__m128 __A)
  3081. {
  3082. return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
  3083. (__v4sf)
  3084. _mm_setzero_ps (),
  3085. (__mmask8) -1);
  3086. }
  3087. extern __inline __m128
  3088. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3089. _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
  3090. {
  3091. return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
  3092. (__v4sf) __W,
  3093. (__mmask8) __U);
  3094. }
  3095. extern __inline __m128
  3096. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3097. _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
  3098. {
  3099. return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
  3100. (__v4sf)
  3101. _mm_setzero_ps (),
  3102. (__mmask8) __U);
  3103. }
  3104. extern __inline __m256d
  3105. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3106. _mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
  3107. {
  3108. return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
  3109. (__v4df) __W,
  3110. (__mmask8) __U);
  3111. }
  3112. extern __inline __m256d
  3113. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3114. _mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
  3115. {
  3116. return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
  3117. (__v4df)
  3118. _mm256_setzero_pd (),
  3119. (__mmask8) __U);
  3120. }
  3121. extern __inline __m128d
  3122. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3123. _mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
  3124. {
  3125. return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
  3126. (__v2df) __W,
  3127. (__mmask8) __U);
  3128. }
  3129. extern __inline __m128d
  3130. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3131. _mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
  3132. {
  3133. return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
  3134. (__v2df)
  3135. _mm_setzero_pd (),
  3136. (__mmask8) __U);
  3137. }
  3138. extern __inline __m256
  3139. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3140. _mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
  3141. {
  3142. return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
  3143. (__v8sf) __W,
  3144. (__mmask8) __U);
  3145. }
  3146. extern __inline __m256
  3147. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3148. _mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
  3149. {
  3150. return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
  3151. (__v8sf)
  3152. _mm256_setzero_ps (),
  3153. (__mmask8) __U);
  3154. }
  3155. extern __inline __m128
  3156. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3157. _mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
  3158. {
  3159. return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
  3160. (__v4sf) __W,
  3161. (__mmask8) __U);
  3162. }
  3163. extern __inline __m128
  3164. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3165. _mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
  3166. {
  3167. return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
  3168. (__v4sf)
  3169. _mm_setzero_ps (),
  3170. (__mmask8) __U);
  3171. }
  3172. extern __inline __m256i
  3173. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3174. _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  3175. __m256i __B)
  3176. {
  3177. return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
  3178. (__v8si) __B,
  3179. (__v8si) __W,
  3180. (__mmask8) __U);
  3181. }
  3182. extern __inline __m256i
  3183. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3184. _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  3185. {
  3186. return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
  3187. (__v8si) __B,
  3188. (__v8si)
  3189. _mm256_setzero_si256 (),
  3190. (__mmask8) __U);
  3191. }
  3192. extern __inline __m256i
  3193. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3194. _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  3195. __m256i __B)
  3196. {
  3197. return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
  3198. (__v4di) __B,
  3199. (__v4di) __W,
  3200. (__mmask8) __U);
  3201. }
  3202. extern __inline __m256i
  3203. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3204. _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  3205. {
  3206. return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
  3207. (__v4di) __B,
  3208. (__v4di)
  3209. _mm256_setzero_si256 (),
  3210. (__mmask8) __U);
  3211. }
  3212. extern __inline __m256i
  3213. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3214. _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  3215. __m256i __B)
  3216. {
  3217. return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
  3218. (__v8si) __B,
  3219. (__v8si) __W,
  3220. (__mmask8) __U);
  3221. }
  3222. extern __inline __m256i
  3223. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3224. _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  3225. {
  3226. return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
  3227. (__v8si) __B,
  3228. (__v8si)
  3229. _mm256_setzero_si256 (),
  3230. (__mmask8) __U);
  3231. }
  3232. extern __inline __m256i
  3233. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3234. _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  3235. __m256i __B)
  3236. {
  3237. return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
  3238. (__v4di) __B,
  3239. (__v4di) __W,
  3240. (__mmask8) __U);
  3241. }
  3242. extern __inline __m256i
  3243. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3244. _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  3245. {
  3246. return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
  3247. (__v4di) __B,
  3248. (__v4di)
  3249. _mm256_setzero_si256 (),
  3250. (__mmask8) __U);
  3251. }
  3252. extern __inline __m128i
  3253. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3254. _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  3255. __m128i __B)
  3256. {
  3257. return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
  3258. (__v4si) __B,
  3259. (__v4si) __W,
  3260. (__mmask8) __U);
  3261. }
  3262. extern __inline __m128i
  3263. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3264. _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  3265. {
  3266. return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
  3267. (__v4si) __B,
  3268. (__v4si)
  3269. _mm_setzero_si128 (),
  3270. (__mmask8) __U);
  3271. }
  3272. extern __inline __m128i
  3273. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3274. _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  3275. __m128i __B)
  3276. {
  3277. return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
  3278. (__v2di) __B,
  3279. (__v2di) __W,
  3280. (__mmask8) __U);
  3281. }
  3282. extern __inline __m128i
  3283. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3284. _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  3285. {
  3286. return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
  3287. (__v2di) __B,
  3288. (__v2di)
  3289. _mm_setzero_si128 (),
  3290. (__mmask8) __U);
  3291. }
  3292. extern __inline __m128i
  3293. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3294. _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  3295. __m128i __B)
  3296. {
  3297. return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
  3298. (__v4si) __B,
  3299. (__v4si) __W,
  3300. (__mmask8) __U);
  3301. }
  3302. extern __inline __m128i
  3303. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3304. _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  3305. {
  3306. return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
  3307. (__v4si) __B,
  3308. (__v4si)
  3309. _mm_setzero_si128 (),
  3310. (__mmask8) __U);
  3311. }
  3312. extern __inline __m128i
  3313. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3314. _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  3315. __m128i __B)
  3316. {
  3317. return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
  3318. (__v2di) __B,
  3319. (__v2di) __W,
  3320. (__mmask8) __U);
  3321. }
  3322. extern __inline __m128i
  3323. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3324. _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  3325. {
  3326. return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
  3327. (__v2di) __B,
  3328. (__v2di)
  3329. _mm_setzero_si128 (),
  3330. (__mmask8) __U);
  3331. }
  3332. extern __inline __m256
  3333. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3334. _mm256_getexp_ps (__m256 __A)
  3335. {
  3336. return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
  3337. (__v8sf)
  3338. _mm256_setzero_ps (),
  3339. (__mmask8) -1);
  3340. }
  3341. extern __inline __m256
  3342. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3343. _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
  3344. {
  3345. return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
  3346. (__v8sf) __W,
  3347. (__mmask8) __U);
  3348. }
  3349. extern __inline __m256
  3350. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3351. _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
  3352. {
  3353. return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
  3354. (__v8sf)
  3355. _mm256_setzero_ps (),
  3356. (__mmask8) __U);
  3357. }
  3358. extern __inline __m256d
  3359. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3360. _mm256_getexp_pd (__m256d __A)
  3361. {
  3362. return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
  3363. (__v4df)
  3364. _mm256_setzero_pd (),
  3365. (__mmask8) -1);
  3366. }
  3367. extern __inline __m256d
  3368. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3369. _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
  3370. {
  3371. return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
  3372. (__v4df) __W,
  3373. (__mmask8) __U);
  3374. }
  3375. extern __inline __m256d
  3376. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3377. _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
  3378. {
  3379. return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
  3380. (__v4df)
  3381. _mm256_setzero_pd (),
  3382. (__mmask8) __U);
  3383. }
  3384. extern __inline __m128
  3385. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3386. _mm_getexp_ps (__m128 __A)
  3387. {
  3388. return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
  3389. (__v4sf)
  3390. _mm_setzero_ps (),
  3391. (__mmask8) -1);
  3392. }
  3393. extern __inline __m128
  3394. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3395. _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
  3396. {
  3397. return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
  3398. (__v4sf) __W,
  3399. (__mmask8) __U);
  3400. }
  3401. extern __inline __m128
  3402. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3403. _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
  3404. {
  3405. return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
  3406. (__v4sf)
  3407. _mm_setzero_ps (),
  3408. (__mmask8) __U);
  3409. }
  3410. extern __inline __m128d
  3411. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3412. _mm_getexp_pd (__m128d __A)
  3413. {
  3414. return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
  3415. (__v2df)
  3416. _mm_setzero_pd (),
  3417. (__mmask8) -1);
  3418. }
  3419. extern __inline __m128d
  3420. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3421. _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
  3422. {
  3423. return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
  3424. (__v2df) __W,
  3425. (__mmask8) __U);
  3426. }
  3427. extern __inline __m128d
  3428. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3429. _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
  3430. {
  3431. return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
  3432. (__v2df)
  3433. _mm_setzero_pd (),
  3434. (__mmask8) __U);
  3435. }
  3436. extern __inline __m256i
  3437. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3438. _mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  3439. __m128i __B)
  3440. {
  3441. return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
  3442. (__v4si) __B,
  3443. (__v8si) __W,
  3444. (__mmask8) __U);
  3445. }
  3446. extern __inline __m256i
  3447. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3448. _mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
  3449. {
  3450. return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
  3451. (__v4si) __B,
  3452. (__v8si)
  3453. _mm256_setzero_si256 (),
  3454. (__mmask8) __U);
  3455. }
  3456. extern __inline __m128i
  3457. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3458. _mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  3459. __m128i __B)
  3460. {
  3461. return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
  3462. (__v4si) __B,
  3463. (__v4si) __W,
  3464. (__mmask8) __U);
  3465. }
  3466. extern __inline __m128i
  3467. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3468. _mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  3469. {
  3470. return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
  3471. (__v4si) __B,
  3472. (__v4si)
  3473. _mm_setzero_si128 (),
  3474. (__mmask8) __U);
  3475. }
  3476. extern __inline __m256i
  3477. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3478. _mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  3479. __m128i __B)
  3480. {
  3481. return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
  3482. (__v2di) __B,
  3483. (__v4di) __W,
  3484. (__mmask8) __U);
  3485. }
  3486. extern __inline __m256i
  3487. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3488. _mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
  3489. {
  3490. return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
  3491. (__v2di) __B,
  3492. (__v4di)
  3493. _mm256_setzero_si256 (),
  3494. (__mmask8) __U);
  3495. }
  3496. extern __inline __m128i
  3497. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3498. _mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  3499. __m128i __B)
  3500. {
  3501. return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
  3502. (__v2di) __B,
  3503. (__v2di) __W,
  3504. (__mmask8) __U);
  3505. }
  3506. extern __inline __m128i
  3507. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3508. _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  3509. {
  3510. return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
  3511. (__v2di) __B,
  3512. (__v2di)
  3513. _mm_setzero_si128 (),
  3514. (__mmask8) __U);
  3515. }
  3516. extern __inline __m256i
  3517. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3518. _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  3519. __m256i __B)
  3520. {
  3521. return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
  3522. (__v8si) __B,
  3523. (__v8si) __W,
  3524. (__mmask8) __U);
  3525. }
  3526. extern __inline __m256i
  3527. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3528. _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  3529. {
  3530. return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
  3531. (__v8si) __B,
  3532. (__v8si)
  3533. _mm256_setzero_si256 (),
  3534. (__mmask8) __U);
  3535. }
  3536. extern __inline __m256d
  3537. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3538. _mm256_scalef_pd (__m256d __A, __m256d __B)
  3539. {
  3540. return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
  3541. (__v4df) __B,
  3542. (__v4df)
  3543. _mm256_setzero_pd (),
  3544. (__mmask8) -1);
  3545. }
  3546. extern __inline __m256d
  3547. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3548. _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
  3549. __m256d __B)
  3550. {
  3551. return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
  3552. (__v4df) __B,
  3553. (__v4df) __W,
  3554. (__mmask8) __U);
  3555. }
  3556. extern __inline __m256d
  3557. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3558. _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
  3559. {
  3560. return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
  3561. (__v4df) __B,
  3562. (__v4df)
  3563. _mm256_setzero_pd (),
  3564. (__mmask8) __U);
  3565. }
  3566. extern __inline __m256
  3567. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3568. _mm256_scalef_ps (__m256 __A, __m256 __B)
  3569. {
  3570. return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
  3571. (__v8sf) __B,
  3572. (__v8sf)
  3573. _mm256_setzero_ps (),
  3574. (__mmask8) -1);
  3575. }
  3576. extern __inline __m256
  3577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3578. _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
  3579. __m256 __B)
  3580. {
  3581. return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
  3582. (__v8sf) __B,
  3583. (__v8sf) __W,
  3584. (__mmask8) __U);
  3585. }
  3586. extern __inline __m256
  3587. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3588. _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
  3589. {
  3590. return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
  3591. (__v8sf) __B,
  3592. (__v8sf)
  3593. _mm256_setzero_ps (),
  3594. (__mmask8) __U);
  3595. }
  3596. extern __inline __m128d
  3597. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3598. _mm_scalef_pd (__m128d __A, __m128d __B)
  3599. {
  3600. return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
  3601. (__v2df) __B,
  3602. (__v2df)
  3603. _mm_setzero_pd (),
  3604. (__mmask8) -1);
  3605. }
  3606. extern __inline __m128d
  3607. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3608. _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
  3609. __m128d __B)
  3610. {
  3611. return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
  3612. (__v2df) __B,
  3613. (__v2df) __W,
  3614. (__mmask8) __U);
  3615. }
  3616. extern __inline __m128d
  3617. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3618. _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
  3619. {
  3620. return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
  3621. (__v2df) __B,
  3622. (__v2df)
  3623. _mm_setzero_pd (),
  3624. (__mmask8) __U);
  3625. }
  3626. extern __inline __m128
  3627. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3628. _mm_scalef_ps (__m128 __A, __m128 __B)
  3629. {
  3630. return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
  3631. (__v4sf) __B,
  3632. (__v4sf)
  3633. _mm_setzero_ps (),
  3634. (__mmask8) -1);
  3635. }
  3636. extern __inline __m128
  3637. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3638. _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  3639. {
  3640. return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
  3641. (__v4sf) __B,
  3642. (__v4sf) __W,
  3643. (__mmask8) __U);
  3644. }
  3645. extern __inline __m128
  3646. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3647. _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
  3648. {
  3649. return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
  3650. (__v4sf) __B,
  3651. (__v4sf)
  3652. _mm_setzero_ps (),
  3653. (__mmask8) __U);
  3654. }
  3655. extern __inline __m256d
  3656. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3657. _mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
  3658. __m256d __C)
  3659. {
  3660. return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
  3661. (__v4df) __B,
  3662. (__v4df) __C,
  3663. (__mmask8) __U);
  3664. }
  3665. extern __inline __m256d
  3666. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3667. _mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
  3668. __mmask8 __U)
  3669. {
  3670. return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
  3671. (__v4df) __B,
  3672. (__v4df) __C,
  3673. (__mmask8) __U);
  3674. }
  3675. extern __inline __m256d
  3676. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3677. _mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
  3678. __m256d __C)
  3679. {
  3680. return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
  3681. (__v4df) __B,
  3682. (__v4df) __C,
  3683. (__mmask8) __U);
  3684. }
  3685. extern __inline __m128d
  3686. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3687. _mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
  3688. {
  3689. return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
  3690. (__v2df) __B,
  3691. (__v2df) __C,
  3692. (__mmask8) __U);
  3693. }
  3694. extern __inline __m128d
  3695. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3696. _mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
  3697. __mmask8 __U)
  3698. {
  3699. return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
  3700. (__v2df) __B,
  3701. (__v2df) __C,
  3702. (__mmask8) __U);
  3703. }
  3704. extern __inline __m128d
  3705. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3706. _mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
  3707. __m128d __C)
  3708. {
  3709. return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
  3710. (__v2df) __B,
  3711. (__v2df) __C,
  3712. (__mmask8) __U);
  3713. }
  3714. extern __inline __m256
  3715. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3716. _mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
  3717. {
  3718. return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
  3719. (__v8sf) __B,
  3720. (__v8sf) __C,
  3721. (__mmask8) __U);
  3722. }
  3723. extern __inline __m256
  3724. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3725. _mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
  3726. __mmask8 __U)
  3727. {
  3728. return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
  3729. (__v8sf) __B,
  3730. (__v8sf) __C,
  3731. (__mmask8) __U);
  3732. }
  3733. extern __inline __m256
  3734. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3735. _mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
  3736. __m256 __C)
  3737. {
  3738. return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
  3739. (__v8sf) __B,
  3740. (__v8sf) __C,
  3741. (__mmask8) __U);
  3742. }
  3743. extern __inline __m128
  3744. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3745. _mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  3746. {
  3747. return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
  3748. (__v4sf) __B,
  3749. (__v4sf) __C,
  3750. (__mmask8) __U);
  3751. }
  3752. extern __inline __m128
  3753. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3754. _mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  3755. {
  3756. return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
  3757. (__v4sf) __B,
  3758. (__v4sf) __C,
  3759. (__mmask8) __U);
  3760. }
  3761. extern __inline __m128
  3762. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3763. _mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  3764. {
  3765. return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
  3766. (__v4sf) __B,
  3767. (__v4sf) __C,
  3768. (__mmask8) __U);
  3769. }
  3770. extern __inline __m256d
  3771. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3772. _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
  3773. __m256d __C)
  3774. {
  3775. return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A,
  3776. (__v4df) __B,
  3777. (__v4df) __C,
  3778. (__mmask8) __U);
  3779. }
  3780. extern __inline __m256d
  3781. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3782. _mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
  3783. __mmask8 __U)
  3784. {
  3785. return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
  3786. (__v4df) __B,
  3787. (__v4df) __C,
  3788. (__mmask8) __U);
  3789. }
  3790. extern __inline __m256d
  3791. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3792. _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
  3793. __m256d __C)
  3794. {
  3795. return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A,
  3796. (__v4df) __B,
  3797. (__v4df) __C,
  3798. (__mmask8) __U);
  3799. }
  3800. extern __inline __m128d
  3801. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3802. _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
  3803. {
  3804. return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A,
  3805. (__v2df) __B,
  3806. (__v2df) __C,
  3807. (__mmask8) __U);
  3808. }
  3809. extern __inline __m128d
  3810. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3811. _mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
  3812. __mmask8 __U)
  3813. {
  3814. return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
  3815. (__v2df) __B,
  3816. (__v2df) __C,
  3817. (__mmask8) __U);
  3818. }
  3819. extern __inline __m128d
  3820. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3821. _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
  3822. __m128d __C)
  3823. {
  3824. return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A,
  3825. (__v2df) __B,
  3826. (__v2df) __C,
  3827. (__mmask8) __U);
  3828. }
  3829. extern __inline __m256
  3830. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3831. _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
  3832. {
  3833. return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A,
  3834. (__v8sf) __B,
  3835. (__v8sf) __C,
  3836. (__mmask8) __U);
  3837. }
  3838. extern __inline __m256
  3839. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3840. _mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
  3841. __mmask8 __U)
  3842. {
  3843. return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
  3844. (__v8sf) __B,
  3845. (__v8sf) __C,
  3846. (__mmask8) __U);
  3847. }
  3848. extern __inline __m256
  3849. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3850. _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
  3851. __m256 __C)
  3852. {
  3853. return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A,
  3854. (__v8sf) __B,
  3855. (__v8sf) __C,
  3856. (__mmask8) __U);
  3857. }
  3858. extern __inline __m128
  3859. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3860. _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  3861. {
  3862. return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A,
  3863. (__v4sf) __B,
  3864. (__v4sf) __C,
  3865. (__mmask8) __U);
  3866. }
  3867. extern __inline __m128
  3868. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3869. _mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  3870. {
  3871. return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
  3872. (__v4sf) __B,
  3873. (__v4sf) __C,
  3874. (__mmask8) __U);
  3875. }
  3876. extern __inline __m128
  3877. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3878. _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  3879. {
  3880. return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A,
  3881. (__v4sf) __B,
  3882. (__v4sf) __C,
  3883. (__mmask8) __U);
  3884. }
  3885. extern __inline __m256d
  3886. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3887. _mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
  3888. __m256d __C)
  3889. {
  3890. return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
  3891. (__v4df) __B,
  3892. (__v4df) __C,
  3893. (__mmask8) __U);
  3894. }
  3895. extern __inline __m256d
  3896. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3897. _mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
  3898. __mmask8 __U)
  3899. {
  3900. return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
  3901. (__v4df) __B,
  3902. (__v4df) __C,
  3903. (__mmask8)
  3904. __U);
  3905. }
  3906. extern __inline __m256d
  3907. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3908. _mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
  3909. __m256d __C)
  3910. {
  3911. return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
  3912. (__v4df) __B,
  3913. (__v4df) __C,
  3914. (__mmask8)
  3915. __U);
  3916. }
  3917. extern __inline __m128d
  3918. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3919. _mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
  3920. __m128d __C)
  3921. {
  3922. return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
  3923. (__v2df) __B,
  3924. (__v2df) __C,
  3925. (__mmask8) __U);
  3926. }
  3927. extern __inline __m128d
  3928. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3929. _mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
  3930. __mmask8 __U)
  3931. {
  3932. return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
  3933. (__v2df) __B,
  3934. (__v2df) __C,
  3935. (__mmask8)
  3936. __U);
  3937. }
  3938. extern __inline __m128d
  3939. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3940. _mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
  3941. __m128d __C)
  3942. {
  3943. return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
  3944. (__v2df) __B,
  3945. (__v2df) __C,
  3946. (__mmask8)
  3947. __U);
  3948. }
  3949. extern __inline __m256
  3950. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3951. _mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
  3952. __m256 __C)
  3953. {
  3954. return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
  3955. (__v8sf) __B,
  3956. (__v8sf) __C,
  3957. (__mmask8) __U);
  3958. }
  3959. extern __inline __m256
  3960. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3961. _mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
  3962. __mmask8 __U)
  3963. {
  3964. return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
  3965. (__v8sf) __B,
  3966. (__v8sf) __C,
  3967. (__mmask8) __U);
  3968. }
  3969. extern __inline __m256
  3970. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3971. _mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
  3972. __m256 __C)
  3973. {
  3974. return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
  3975. (__v8sf) __B,
  3976. (__v8sf) __C,
  3977. (__mmask8) __U);
  3978. }
  3979. extern __inline __m128
  3980. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3981. _mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  3982. {
  3983. return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
  3984. (__v4sf) __B,
  3985. (__v4sf) __C,
  3986. (__mmask8) __U);
  3987. }
  3988. extern __inline __m128
  3989. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3990. _mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
  3991. __mmask8 __U)
  3992. {
  3993. return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
  3994. (__v4sf) __B,
  3995. (__v4sf) __C,
  3996. (__mmask8) __U);
  3997. }
  3998. extern __inline __m128
  3999. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4000. _mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
  4001. __m128 __C)
  4002. {
  4003. return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
  4004. (__v4sf) __B,
  4005. (__v4sf) __C,
  4006. (__mmask8) __U);
  4007. }
  4008. extern __inline __m256d
  4009. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4010. _mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
  4011. __m256d __C)
  4012. {
  4013. return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
  4014. (__v4df) __B,
  4015. -(__v4df) __C,
  4016. (__mmask8) __U);
  4017. }
  4018. extern __inline __m256d
  4019. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4020. _mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
  4021. __mmask8 __U)
  4022. {
  4023. return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
  4024. (__v4df) __B,
  4025. (__v4df) __C,
  4026. (__mmask8)
  4027. __U);
  4028. }
  4029. extern __inline __m256d
  4030. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4031. _mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
  4032. __m256d __C)
  4033. {
  4034. return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
  4035. (__v4df) __B,
  4036. -(__v4df) __C,
  4037. (__mmask8)
  4038. __U);
  4039. }
  4040. extern __inline __m128d
  4041. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4042. _mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
  4043. __m128d __C)
  4044. {
  4045. return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
  4046. (__v2df) __B,
  4047. -(__v2df) __C,
  4048. (__mmask8) __U);
  4049. }
  4050. extern __inline __m128d
  4051. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4052. _mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
  4053. __mmask8 __U)
  4054. {
  4055. return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
  4056. (__v2df) __B,
  4057. (__v2df) __C,
  4058. (__mmask8)
  4059. __U);
  4060. }
  4061. extern __inline __m128d
  4062. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4063. _mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
  4064. __m128d __C)
  4065. {
  4066. return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
  4067. (__v2df) __B,
  4068. -(__v2df) __C,
  4069. (__mmask8)
  4070. __U);
  4071. }
  4072. extern __inline __m256
  4073. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4074. _mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
  4075. __m256 __C)
  4076. {
  4077. return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
  4078. (__v8sf) __B,
  4079. -(__v8sf) __C,
  4080. (__mmask8) __U);
  4081. }
  4082. extern __inline __m256
  4083. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4084. _mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
  4085. __mmask8 __U)
  4086. {
  4087. return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
  4088. (__v8sf) __B,
  4089. (__v8sf) __C,
  4090. (__mmask8) __U);
  4091. }
  4092. extern __inline __m256
  4093. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4094. _mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
  4095. __m256 __C)
  4096. {
  4097. return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
  4098. (__v8sf) __B,
  4099. -(__v8sf) __C,
  4100. (__mmask8) __U);
  4101. }
  4102. extern __inline __m128
  4103. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4104. _mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  4105. {
  4106. return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
  4107. (__v4sf) __B,
  4108. -(__v4sf) __C,
  4109. (__mmask8) __U);
  4110. }
  4111. extern __inline __m128
  4112. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4113. _mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
  4114. __mmask8 __U)
  4115. {
  4116. return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
  4117. (__v4sf) __B,
  4118. (__v4sf) __C,
  4119. (__mmask8) __U);
  4120. }
  4121. extern __inline __m128
  4122. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4123. _mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
  4124. __m128 __C)
  4125. {
  4126. return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
  4127. (__v4sf) __B,
  4128. -(__v4sf) __C,
  4129. (__mmask8) __U);
  4130. }
  4131. extern __inline __m256d
  4132. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4133. _mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
  4134. __m256d __C)
  4135. {
  4136. return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
  4137. (__v4df) __B,
  4138. (__v4df) __C,
  4139. (__mmask8) __U);
  4140. }
  4141. extern __inline __m256d
  4142. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4143. _mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
  4144. __mmask8 __U)
  4145. {
  4146. return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A,
  4147. (__v4df) __B,
  4148. (__v4df) __C,
  4149. (__mmask8) __U);
  4150. }
  4151. extern __inline __m256d
  4152. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4153. _mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
  4154. __m256d __C)
  4155. {
  4156. return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A,
  4157. (__v4df) __B,
  4158. (__v4df) __C,
  4159. (__mmask8) __U);
  4160. }
  4161. extern __inline __m128d
  4162. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4163. _mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
  4164. __m128d __C)
  4165. {
  4166. return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
  4167. (__v2df) __B,
  4168. (__v2df) __C,
  4169. (__mmask8) __U);
  4170. }
  4171. extern __inline __m128d
  4172. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4173. _mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
  4174. __mmask8 __U)
  4175. {
  4176. return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A,
  4177. (__v2df) __B,
  4178. (__v2df) __C,
  4179. (__mmask8) __U);
  4180. }
  4181. extern __inline __m128d
  4182. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4183. _mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
  4184. __m128d __C)
  4185. {
  4186. return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A,
  4187. (__v2df) __B,
  4188. (__v2df) __C,
  4189. (__mmask8) __U);
  4190. }
  4191. extern __inline __m256
  4192. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4193. _mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
  4194. __m256 __C)
  4195. {
  4196. return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
  4197. (__v8sf) __B,
  4198. (__v8sf) __C,
  4199. (__mmask8) __U);
  4200. }
  4201. extern __inline __m256
  4202. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4203. _mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
  4204. __mmask8 __U)
  4205. {
  4206. return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A,
  4207. (__v8sf) __B,
  4208. (__v8sf) __C,
  4209. (__mmask8) __U);
  4210. }
  4211. extern __inline __m256
  4212. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4213. _mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
  4214. __m256 __C)
  4215. {
  4216. return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A,
  4217. (__v8sf) __B,
  4218. (__v8sf) __C,
  4219. (__mmask8) __U);
  4220. }
  4221. extern __inline __m128
  4222. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4223. _mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  4224. {
  4225. return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
  4226. (__v4sf) __B,
  4227. (__v4sf) __C,
  4228. (__mmask8) __U);
  4229. }
  4230. extern __inline __m128
  4231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4232. _mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  4233. {
  4234. return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A,
  4235. (__v4sf) __B,
  4236. (__v4sf) __C,
  4237. (__mmask8) __U);
  4238. }
  4239. extern __inline __m128
  4240. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4241. _mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  4242. {
  4243. return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A,
  4244. (__v4sf) __B,
  4245. (__v4sf) __C,
  4246. (__mmask8) __U);
  4247. }
  4248. extern __inline __m256d
  4249. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4250. _mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
  4251. __m256d __C)
  4252. {
  4253. return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
  4254. (__v4df) __B,
  4255. (__v4df) __C,
  4256. (__mmask8) __U);
  4257. }
  4258. extern __inline __m256d
  4259. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4260. _mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
  4261. __mmask8 __U)
  4262. {
  4263. return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
  4264. (__v4df) __B,
  4265. (__v4df) __C,
  4266. (__mmask8) __U);
  4267. }
  4268. extern __inline __m256d
  4269. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4270. _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
  4271. __m256d __C)
  4272. {
  4273. return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A,
  4274. (__v4df) __B,
  4275. (__v4df) __C,
  4276. (__mmask8) __U);
  4277. }
  4278. extern __inline __m128d
  4279. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4280. _mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
  4281. __m128d __C)
  4282. {
  4283. return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
  4284. (__v2df) __B,
  4285. (__v2df) __C,
  4286. (__mmask8) __U);
  4287. }
  4288. extern __inline __m128d
  4289. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4290. _mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
  4291. __mmask8 __U)
  4292. {
  4293. return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
  4294. (__v2df) __B,
  4295. (__v2df) __C,
  4296. (__mmask8) __U);
  4297. }
  4298. extern __inline __m128d
  4299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4300. _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
  4301. __m128d __C)
  4302. {
  4303. return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A,
  4304. (__v2df) __B,
  4305. (__v2df) __C,
  4306. (__mmask8) __U);
  4307. }
  4308. extern __inline __m256
  4309. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4310. _mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
  4311. __m256 __C)
  4312. {
  4313. return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
  4314. (__v8sf) __B,
  4315. (__v8sf) __C,
  4316. (__mmask8) __U);
  4317. }
  4318. extern __inline __m256
  4319. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4320. _mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
  4321. __mmask8 __U)
  4322. {
  4323. return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
  4324. (__v8sf) __B,
  4325. (__v8sf) __C,
  4326. (__mmask8) __U);
  4327. }
  4328. extern __inline __m256
  4329. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4330. _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
  4331. __m256 __C)
  4332. {
  4333. return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A,
  4334. (__v8sf) __B,
  4335. (__v8sf) __C,
  4336. (__mmask8) __U);
  4337. }
  4338. extern __inline __m128
  4339. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4340. _mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  4341. {
  4342. return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
  4343. (__v4sf) __B,
  4344. (__v4sf) __C,
  4345. (__mmask8) __U);
  4346. }
  4347. extern __inline __m128
  4348. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4349. _mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  4350. {
  4351. return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
  4352. (__v4sf) __B,
  4353. (__v4sf) __C,
  4354. (__mmask8) __U);
  4355. }
  4356. extern __inline __m128
  4357. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4358. _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  4359. {
  4360. return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A,
  4361. (__v4sf) __B,
  4362. (__v4sf) __C,
  4363. (__mmask8) __U);
  4364. }
  4365. extern __inline __m128i
  4366. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4367. _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  4368. __m128i __B)
  4369. {
  4370. return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
  4371. (__v4si) __B,
  4372. (__v4si) __W,
  4373. (__mmask8) __U);
  4374. }
  4375. extern __inline __m128i
  4376. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4377. _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4378. {
  4379. return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
  4380. (__v4si) __B,
  4381. (__v4si)
  4382. _mm_setzero_si128 (),
  4383. (__mmask8) __U);
  4384. }
  4385. extern __inline __m256i
  4386. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4387. _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  4388. __m256i __B)
  4389. {
  4390. return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
  4391. (__v8si) __B,
  4392. (__v8si) __W,
  4393. (__mmask8) __U);
  4394. }
  4395. extern __inline __m256i
  4396. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4397. _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  4398. {
  4399. return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
  4400. (__v8si) __B,
  4401. (__v8si)
  4402. _mm256_setzero_si256 (),
  4403. (__mmask8) __U);
  4404. }
  4405. extern __inline __m128i
  4406. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4407. _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  4408. __m128i __B)
  4409. {
  4410. return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
  4411. (__v4si) __B,
  4412. (__v4si) __W,
  4413. (__mmask8) __U);
  4414. }
  4415. extern __inline __m128i
  4416. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4417. _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4418. {
  4419. return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
  4420. (__v4si) __B,
  4421. (__v4si)
  4422. _mm_setzero_si128 (),
  4423. (__mmask8) __U);
  4424. }
  4425. extern __inline __m256i
  4426. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4427. _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  4428. __m256i __B)
  4429. {
  4430. return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
  4431. (__v8si) __B,
  4432. (__v8si) __W,
  4433. (__mmask8) __U);
  4434. }
  4435. extern __inline __m256i
  4436. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4437. _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  4438. {
  4439. return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
  4440. (__v8si) __B,
  4441. (__v8si)
  4442. _mm256_setzero_si256 (),
  4443. (__mmask8) __U);
  4444. }
  4445. extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  4446. _mm256_or_epi32 (__m256i __A, __m256i __B)
  4447. {
  4448. return (__m256i) ((__v8su)__A | (__v8su)__B);
  4449. }
  4450. extern __inline __m128i
  4451. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4452. _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  4453. {
  4454. return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
  4455. (__v4si) __B,
  4456. (__v4si) __W,
  4457. (__mmask8) __U);
  4458. }
  4459. extern __inline __m128i
  4460. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4461. _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4462. {
  4463. return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
  4464. (__v4si) __B,
  4465. (__v4si)
  4466. _mm_setzero_si128 (),
  4467. (__mmask8) __U);
  4468. }
  4469. extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  4470. _mm_or_epi32 (__m128i __A, __m128i __B)
  4471. {
  4472. return (__m128i) ((__v4su)__A | (__v4su)__B);
  4473. }
  4474. extern __inline __m256i
  4475. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4476. _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  4477. __m256i __B)
  4478. {
  4479. return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
  4480. (__v8si) __B,
  4481. (__v8si) __W,
  4482. (__mmask8) __U);
  4483. }
  4484. extern __inline __m256i
  4485. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4486. _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  4487. {
  4488. return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
  4489. (__v8si) __B,
  4490. (__v8si)
  4491. _mm256_setzero_si256 (),
  4492. (__mmask8) __U);
  4493. }
  4494. extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  4495. _mm256_xor_epi32 (__m256i __A, __m256i __B)
  4496. {
  4497. return (__m256i) ((__v8su)__A ^ (__v8su)__B);
  4498. }
  4499. extern __inline __m128i
  4500. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4501. _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  4502. __m128i __B)
  4503. {
  4504. return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
  4505. (__v4si) __B,
  4506. (__v4si) __W,
  4507. (__mmask8) __U);
  4508. }
  4509. extern __inline __m128i
  4510. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4511. _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4512. {
  4513. return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
  4514. (__v4si) __B,
  4515. (__v4si)
  4516. _mm_setzero_si128 (),
  4517. (__mmask8) __U);
  4518. }
  4519. extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  4520. _mm_xor_epi32 (__m128i __A, __m128i __B)
  4521. {
  4522. return (__m128i) ((__v4su)__A ^ (__v4su)__B);
  4523. }
  4524. extern __inline __m128
  4525. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4526. _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
  4527. {
  4528. return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
  4529. (__v4sf) __W,
  4530. (__mmask8) __U);
  4531. }
  4532. extern __inline __m128
  4533. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4534. _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
  4535. {
  4536. return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
  4537. (__v4sf)
  4538. _mm_setzero_ps (),
  4539. (__mmask8) __U);
  4540. }
  4541. extern __inline __m128
  4542. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4543. _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
  4544. {
  4545. return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
  4546. (__v4sf) __W,
  4547. (__mmask8) __U);
  4548. }
  4549. extern __inline __m128
  4550. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4551. _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
  4552. {
  4553. return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
  4554. (__v4sf)
  4555. _mm_setzero_ps (),
  4556. (__mmask8) __U);
  4557. }
  4558. extern __inline __m256i
  4559. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4560. _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
  4561. {
  4562. return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
  4563. (__v8si) __W,
  4564. (__mmask8) __U);
  4565. }
  4566. extern __inline __m256i
  4567. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4568. _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
  4569. {
  4570. return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
  4571. (__v8si)
  4572. _mm256_setzero_si256 (),
  4573. (__mmask8) __U);
  4574. }
  4575. extern __inline __m128i
  4576. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4577. _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
  4578. {
  4579. return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
  4580. (__v4si) __W,
  4581. (__mmask8) __U);
  4582. }
  4583. extern __inline __m128i
  4584. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4585. _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
  4586. {
  4587. return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
  4588. (__v4si)
  4589. _mm_setzero_si128 (),
  4590. (__mmask8) __U);
  4591. }
  4592. extern __inline __m256i
  4593. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4594. _mm256_cvtps_epu32 (__m256 __A)
  4595. {
  4596. return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
  4597. (__v8si)
  4598. _mm256_setzero_si256 (),
  4599. (__mmask8) -1);
  4600. }
  4601. extern __inline __m256i
  4602. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4603. _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
  4604. {
  4605. return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
  4606. (__v8si) __W,
  4607. (__mmask8) __U);
  4608. }
  4609. extern __inline __m256i
  4610. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4611. _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
  4612. {
  4613. return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
  4614. (__v8si)
  4615. _mm256_setzero_si256 (),
  4616. (__mmask8) __U);
  4617. }
  4618. extern __inline __m128i
  4619. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4620. _mm_cvtps_epu32 (__m128 __A)
  4621. {
  4622. return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
  4623. (__v4si)
  4624. _mm_setzero_si128 (),
  4625. (__mmask8) -1);
  4626. }
  4627. extern __inline __m128i
  4628. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4629. _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
  4630. {
  4631. return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
  4632. (__v4si) __W,
  4633. (__mmask8) __U);
  4634. }
  4635. extern __inline __m128i
  4636. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4637. _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
  4638. {
  4639. return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
  4640. (__v4si)
  4641. _mm_setzero_si128 (),
  4642. (__mmask8) __U);
  4643. }
  4644. extern __inline __m256d
  4645. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4646. _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
  4647. {
  4648. return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
  4649. (__v4df) __W,
  4650. (__mmask8) __U);
  4651. }
  4652. extern __inline __m256d
  4653. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4654. _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
  4655. {
  4656. return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
  4657. (__v4df)
  4658. _mm256_setzero_pd (),
  4659. (__mmask8) __U);
  4660. }
  4661. extern __inline __m128d
  4662. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4663. _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
  4664. {
  4665. return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
  4666. (__v2df) __W,
  4667. (__mmask8) __U);
  4668. }
  4669. extern __inline __m128d
  4670. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4671. _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
  4672. {
  4673. return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
  4674. (__v2df)
  4675. _mm_setzero_pd (),
  4676. (__mmask8) __U);
  4677. }
  4678. extern __inline __m256
  4679. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4680. _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
  4681. {
  4682. return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
  4683. (__v8sf) __W,
  4684. (__mmask8) __U);
  4685. }
  4686. extern __inline __m256
  4687. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4688. _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
  4689. {
  4690. return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
  4691. (__v8sf)
  4692. _mm256_setzero_ps (),
  4693. (__mmask8) __U);
  4694. }
  4695. extern __inline __m128
  4696. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4697. _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
  4698. {
  4699. return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
  4700. (__v4sf) __W,
  4701. (__mmask8) __U);
  4702. }
  4703. extern __inline __m128
  4704. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4705. _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
  4706. {
  4707. return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
  4708. (__v4sf)
  4709. _mm_setzero_ps (),
  4710. (__mmask8) __U);
  4711. }
  4712. extern __inline __m256
  4713. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4714. _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
  4715. {
  4716. return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
  4717. (__v8sf) __W,
  4718. (__mmask8) __U);
  4719. }
  4720. extern __inline __m256
  4721. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4722. _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
  4723. {
  4724. return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
  4725. (__v8sf)
  4726. _mm256_setzero_ps (),
  4727. (__mmask8) __U);
  4728. }
  4729. extern __inline __m128
  4730. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4731. _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
  4732. {
  4733. return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
  4734. (__v4sf) __W,
  4735. (__mmask8) __U);
  4736. }
  4737. extern __inline __m128
  4738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4739. _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
  4740. {
  4741. return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
  4742. (__v4sf)
  4743. _mm_setzero_ps (),
  4744. (__mmask8) __U);
  4745. }
  4746. extern __inline __m128i
  4747. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4748. _mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  4749. __m128i __B)
  4750. {
  4751. return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
  4752. (__v4si) __B,
  4753. (__v4si) __W,
  4754. (__mmask8) __U);
  4755. }
  4756. extern __inline __m128i
  4757. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4758. _mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4759. {
  4760. return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
  4761. (__v4si) __B,
  4762. (__v4si)
  4763. _mm_setzero_si128 (),
  4764. (__mmask8) __U);
  4765. }
  4766. extern __inline __m256i
  4767. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4768. _mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  4769. __m256i __B)
  4770. {
  4771. return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
  4772. (__v8si) __B,
  4773. (__v8si) __W,
  4774. (__mmask8) __U);
  4775. }
  4776. extern __inline __m256i
  4777. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4778. _mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  4779. {
  4780. return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
  4781. (__v8si) __B,
  4782. (__v8si)
  4783. _mm256_setzero_si256 (),
  4784. (__mmask8) __U);
  4785. }
  4786. extern __inline __m128i
  4787. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4788. _mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  4789. __m128i __B)
  4790. {
  4791. return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
  4792. (__v2di) __B,
  4793. (__v2di) __W,
  4794. (__mmask8) __U);
  4795. }
  4796. extern __inline __m128i
  4797. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4798. _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  4799. {
  4800. return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
  4801. (__v2di) __B,
  4802. (__v2di)
  4803. _mm_setzero_si128 (),
  4804. (__mmask8) __U);
  4805. }
  4806. extern __inline __m256i
  4807. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4808. _mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  4809. __m256i __B)
  4810. {
  4811. return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
  4812. (__v4di) __B,
  4813. (__v4di) __W,
  4814. (__mmask8) __U);
  4815. }
  4816. extern __inline __m256i
  4817. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4818. _mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  4819. {
  4820. return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
  4821. (__v4di) __B,
  4822. (__v4di)
  4823. _mm256_setzero_si256 (),
  4824. (__mmask8) __U);
  4825. }
  4826. extern __inline __m128i
  4827. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4828. _mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  4829. __m128i __B)
  4830. {
  4831. return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
  4832. (__v4si) __B,
  4833. (__v4si) __W,
  4834. (__mmask8) __U);
  4835. }
  4836. extern __inline __m128i
  4837. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4838. _mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4839. {
  4840. return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
  4841. (__v4si) __B,
  4842. (__v4si)
  4843. _mm_setzero_si128 (),
  4844. (__mmask8) __U);
  4845. }
  4846. extern __inline __m256i
  4847. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4848. _mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  4849. __m256i __B)
  4850. {
  4851. return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
  4852. (__v8si) __B,
  4853. (__v8si) __W,
  4854. (__mmask8) __U);
  4855. }
  4856. extern __inline __m256i
  4857. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4858. _mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  4859. {
  4860. return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
  4861. (__v8si) __B,
  4862. (__v8si)
  4863. _mm256_setzero_si256 (),
  4864. (__mmask8) __U);
  4865. }
  4866. extern __inline __m128i
  4867. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4868. _mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  4869. __m128i __B)
  4870. {
  4871. return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
  4872. (__v2di) __B,
  4873. (__v2di) __W,
  4874. (__mmask8) __U);
  4875. }
  4876. extern __inline __m128i
  4877. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4878. _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  4879. {
  4880. return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
  4881. (__v2di) __B,
  4882. (__v2di)
  4883. _mm_setzero_si128 (),
  4884. (__mmask8) __U);
  4885. }
  4886. extern __inline __m256i
  4887. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4888. _mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  4889. __m256i __B)
  4890. {
  4891. return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
  4892. (__v4di) __B,
  4893. (__v4di) __W,
  4894. (__mmask8) __U);
  4895. }
  4896. extern __inline __m256i
  4897. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4898. _mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  4899. {
  4900. return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
  4901. (__v4di) __B,
  4902. (__v4di)
  4903. _mm256_setzero_si256 (),
  4904. (__mmask8) __U);
  4905. }
  4906. extern __inline __mmask8
  4907. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4908. _mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
  4909. {
  4910. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
  4911. (__v4si) __B, 0,
  4912. (__mmask8) -1);
  4913. }
  4914. extern __inline __mmask8
  4915. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4916. _mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
  4917. {
  4918. return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
  4919. (__v4si) __B,
  4920. (__mmask8) -1);
  4921. }
  4922. extern __inline __mmask8
  4923. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4924. _mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  4925. {
  4926. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
  4927. (__v4si) __B, 0, __U);
  4928. }
  4929. extern __inline __mmask8
  4930. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4931. _mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  4932. {
  4933. return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
  4934. (__v4si) __B, __U);
  4935. }
  4936. extern __inline __mmask8
  4937. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4938. _mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
  4939. {
  4940. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
  4941. (__v8si) __B, 0,
  4942. (__mmask8) -1);
  4943. }
  4944. extern __inline __mmask8
  4945. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4946. _mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
  4947. {
  4948. return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
  4949. (__v8si) __B,
  4950. (__mmask8) -1);
  4951. }
  4952. extern __inline __mmask8
  4953. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4954. _mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  4955. {
  4956. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
  4957. (__v8si) __B, 0, __U);
  4958. }
  4959. extern __inline __mmask8
  4960. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4961. _mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  4962. {
  4963. return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
  4964. (__v8si) __B, __U);
  4965. }
  4966. extern __inline __mmask8
  4967. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4968. _mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
  4969. {
  4970. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
  4971. (__v2di) __B, 0,
  4972. (__mmask8) -1);
  4973. }
  4974. extern __inline __mmask8
  4975. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4976. _mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
  4977. {
  4978. return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
  4979. (__v2di) __B,
  4980. (__mmask8) -1);
  4981. }
  4982. extern __inline __mmask8
  4983. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4984. _mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  4985. {
  4986. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
  4987. (__v2di) __B, 0, __U);
  4988. }
  4989. extern __inline __mmask8
  4990. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4991. _mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  4992. {
  4993. return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
  4994. (__v2di) __B, __U);
  4995. }
  4996. extern __inline __mmask8
  4997. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4998. _mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
  4999. {
  5000. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
  5001. (__v4di) __B, 0,
  5002. (__mmask8) -1);
  5003. }
  5004. extern __inline __mmask8
  5005. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5006. _mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
  5007. {
  5008. return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
  5009. (__v4di) __B,
  5010. (__mmask8) -1);
  5011. }
  5012. extern __inline __mmask8
  5013. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5014. _mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5015. {
  5016. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
  5017. (__v4di) __B, 0, __U);
  5018. }
  5019. extern __inline __mmask8
  5020. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5021. _mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5022. {
  5023. return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
  5024. (__v4di) __B, __U);
  5025. }
  5026. extern __inline __mmask8
  5027. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5028. _mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
  5029. {
  5030. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
  5031. (__v4si) __B, 6,
  5032. (__mmask8) -1);
  5033. }
  5034. extern __inline __mmask8
  5035. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5036. _mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
  5037. {
  5038. return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
  5039. (__v4si) __B,
  5040. (__mmask8) -1);
  5041. }
  5042. extern __inline __mmask8
  5043. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5044. _mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5045. {
  5046. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
  5047. (__v4si) __B, 6, __U);
  5048. }
  5049. extern __inline __mmask8
  5050. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5051. _mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5052. {
  5053. return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
  5054. (__v4si) __B, __U);
  5055. }
  5056. extern __inline __mmask8
  5057. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5058. _mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
  5059. {
  5060. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
  5061. (__v8si) __B, 6,
  5062. (__mmask8) -1);
  5063. }
  5064. extern __inline __mmask8
  5065. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5066. _mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
  5067. {
  5068. return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
  5069. (__v8si) __B,
  5070. (__mmask8) -1);
  5071. }
  5072. extern __inline __mmask8
  5073. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5074. _mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5075. {
  5076. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
  5077. (__v8si) __B, 6, __U);
  5078. }
  5079. extern __inline __mmask8
  5080. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5081. _mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5082. {
  5083. return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
  5084. (__v8si) __B, __U);
  5085. }
  5086. extern __inline __mmask8
  5087. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5088. _mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
  5089. {
  5090. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
  5091. (__v2di) __B, 6,
  5092. (__mmask8) -1);
  5093. }
  5094. extern __inline __mmask8
  5095. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5096. _mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
  5097. {
  5098. return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
  5099. (__v2di) __B,
  5100. (__mmask8) -1);
  5101. }
  5102. extern __inline __mmask8
  5103. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5104. _mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5105. {
  5106. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
  5107. (__v2di) __B, 6, __U);
  5108. }
  5109. extern __inline __mmask8
  5110. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5111. _mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5112. {
  5113. return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
  5114. (__v2di) __B, __U);
  5115. }
  5116. extern __inline __mmask8
  5117. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5118. _mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
  5119. {
  5120. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
  5121. (__v4di) __B, 6,
  5122. (__mmask8) -1);
  5123. }
  5124. extern __inline __mmask8
  5125. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5126. _mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
  5127. {
  5128. return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
  5129. (__v4di) __B,
  5130. (__mmask8) -1);
  5131. }
  5132. extern __inline __mmask8
  5133. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5134. _mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5135. {
  5136. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
  5137. (__v4di) __B, 6, __U);
  5138. }
  5139. extern __inline __mmask8
  5140. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5141. _mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5142. {
  5143. return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
  5144. (__v4di) __B, __U);
  5145. }
  5146. extern __inline __mmask8
  5147. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5148. _mm_test_epi32_mask (__m128i __A, __m128i __B)
  5149. {
  5150. return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
  5151. (__v4si) __B,
  5152. (__mmask8) -1);
  5153. }
  5154. extern __inline __mmask8
  5155. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5156. _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5157. {
  5158. return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
  5159. (__v4si) __B, __U);
  5160. }
  5161. extern __inline __mmask8
  5162. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5163. _mm256_test_epi32_mask (__m256i __A, __m256i __B)
  5164. {
  5165. return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
  5166. (__v8si) __B,
  5167. (__mmask8) -1);
  5168. }
  5169. extern __inline __mmask8
  5170. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5171. _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5172. {
  5173. return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
  5174. (__v8si) __B, __U);
  5175. }
  5176. extern __inline __mmask8
  5177. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5178. _mm_test_epi64_mask (__m128i __A, __m128i __B)
  5179. {
  5180. return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
  5181. (__v2di) __B,
  5182. (__mmask8) -1);
  5183. }
  5184. extern __inline __mmask8
  5185. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5186. _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5187. {
  5188. return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
  5189. (__v2di) __B, __U);
  5190. }
  5191. extern __inline __mmask8
  5192. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5193. _mm256_test_epi64_mask (__m256i __A, __m256i __B)
  5194. {
  5195. return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
  5196. (__v4di) __B,
  5197. (__mmask8) -1);
  5198. }
  5199. extern __inline __mmask8
  5200. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5201. _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5202. {
  5203. return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
  5204. (__v4di) __B, __U);
  5205. }
  5206. extern __inline __mmask8
  5207. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5208. _mm_testn_epi32_mask (__m128i __A, __m128i __B)
  5209. {
  5210. return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
  5211. (__v4si) __B,
  5212. (__mmask8) -1);
  5213. }
  5214. extern __inline __mmask8
  5215. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5216. _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5217. {
  5218. return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
  5219. (__v4si) __B, __U);
  5220. }
  5221. extern __inline __mmask8
  5222. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5223. _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
  5224. {
  5225. return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
  5226. (__v8si) __B,
  5227. (__mmask8) -1);
  5228. }
  5229. extern __inline __mmask8
  5230. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5231. _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5232. {
  5233. return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
  5234. (__v8si) __B, __U);
  5235. }
  5236. extern __inline __mmask8
  5237. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5238. _mm_testn_epi64_mask (__m128i __A, __m128i __B)
  5239. {
  5240. return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
  5241. (__v2di) __B,
  5242. (__mmask8) -1);
  5243. }
  5244. extern __inline __mmask8
  5245. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5246. _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5247. {
  5248. return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
  5249. (__v2di) __B, __U);
  5250. }
  5251. extern __inline __mmask8
  5252. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5253. _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
  5254. {
  5255. return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
  5256. (__v4di) __B,
  5257. (__mmask8) -1);
  5258. }
  5259. extern __inline __mmask8
  5260. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5261. _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5262. {
  5263. return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
  5264. (__v4di) __B, __U);
  5265. }
  5266. extern __inline __m256d
  5267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5268. _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
  5269. {
  5270. return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
  5271. (__v4df) __W,
  5272. (__mmask8) __U);
  5273. }
  5274. extern __inline __m256d
  5275. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5276. _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
  5277. {
  5278. return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
  5279. (__v4df)
  5280. _mm256_setzero_pd (),
  5281. (__mmask8) __U);
  5282. }
  5283. extern __inline void
  5284. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5285. _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
  5286. {
  5287. __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
  5288. (__v4df) __A,
  5289. (__mmask8) __U);
  5290. }
  5291. extern __inline __m128d
  5292. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5293. _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
  5294. {
  5295. return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
  5296. (__v2df) __W,
  5297. (__mmask8) __U);
  5298. }
  5299. extern __inline __m128d
  5300. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5301. _mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
  5302. {
  5303. return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
  5304. (__v2df)
  5305. _mm_setzero_pd (),
  5306. (__mmask8) __U);
  5307. }
  5308. extern __inline void
  5309. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5310. _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
  5311. {
  5312. __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
  5313. (__v2df) __A,
  5314. (__mmask8) __U);
  5315. }
  5316. extern __inline __m256
  5317. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5318. _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
  5319. {
  5320. return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
  5321. (__v8sf) __W,
  5322. (__mmask8) __U);
  5323. }
  5324. extern __inline __m256
  5325. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5326. _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
  5327. {
  5328. return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
  5329. (__v8sf)
  5330. _mm256_setzero_ps (),
  5331. (__mmask8) __U);
  5332. }
  5333. extern __inline void
  5334. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5335. _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
  5336. {
  5337. __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
  5338. (__v8sf) __A,
  5339. (__mmask8) __U);
  5340. }
  5341. extern __inline __m128
  5342. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5343. _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
  5344. {
  5345. return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
  5346. (__v4sf) __W,
  5347. (__mmask8) __U);
  5348. }
  5349. extern __inline __m128
  5350. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5351. _mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
  5352. {
  5353. return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
  5354. (__v4sf)
  5355. _mm_setzero_ps (),
  5356. (__mmask8) __U);
  5357. }
  5358. extern __inline void
  5359. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5360. _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
  5361. {
  5362. __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
  5363. (__v4sf) __A,
  5364. (__mmask8) __U);
  5365. }
  5366. extern __inline __m256i
  5367. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5368. _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  5369. {
  5370. return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
  5371. (__v4di) __W,
  5372. (__mmask8) __U);
  5373. }
  5374. extern __inline __m256i
  5375. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5376. _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
  5377. {
  5378. return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
  5379. (__v4di)
  5380. _mm256_setzero_si256 (),
  5381. (__mmask8) __U);
  5382. }
  5383. extern __inline void
  5384. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5385. _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
  5386. {
  5387. __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
  5388. (__v4di) __A,
  5389. (__mmask8) __U);
  5390. }
  5391. extern __inline __m128i
  5392. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5393. _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  5394. {
  5395. return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
  5396. (__v2di) __W,
  5397. (__mmask8) __U);
  5398. }
  5399. extern __inline __m128i
  5400. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5401. _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
  5402. {
  5403. return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
  5404. (__v2di)
  5405. _mm_setzero_si128 (),
  5406. (__mmask8) __U);
  5407. }
  5408. extern __inline void
  5409. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5410. _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
  5411. {
  5412. __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
  5413. (__v2di) __A,
  5414. (__mmask8) __U);
  5415. }
  5416. extern __inline __m256i
  5417. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5418. _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  5419. {
  5420. return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
  5421. (__v8si) __W,
  5422. (__mmask8) __U);
  5423. }
  5424. extern __inline __m256i
  5425. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5426. _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
  5427. {
  5428. return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
  5429. (__v8si)
  5430. _mm256_setzero_si256 (),
  5431. (__mmask8) __U);
  5432. }
  5433. extern __inline void
  5434. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5435. _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
  5436. {
  5437. __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
  5438. (__v8si) __A,
  5439. (__mmask8) __U);
  5440. }
  5441. extern __inline __m128i
  5442. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5443. _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  5444. {
  5445. return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
  5446. (__v4si) __W,
  5447. (__mmask8) __U);
  5448. }
  5449. extern __inline __m128i
  5450. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5451. _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
  5452. {
  5453. return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
  5454. (__v4si)
  5455. _mm_setzero_si128 (),
  5456. (__mmask8) __U);
  5457. }
  5458. extern __inline void
  5459. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5460. _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
  5461. {
  5462. __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
  5463. (__v4si) __A,
  5464. (__mmask8) __U);
  5465. }
  5466. extern __inline __m256d
  5467. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5468. _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
  5469. {
  5470. return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
  5471. (__v4df) __W,
  5472. (__mmask8) __U);
  5473. }
  5474. extern __inline __m256d
  5475. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5476. _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
  5477. {
  5478. return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
  5479. (__v4df)
  5480. _mm256_setzero_pd (),
  5481. (__mmask8) __U);
  5482. }
  5483. extern __inline __m256d
  5484. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5485. _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
  5486. {
  5487. return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
  5488. (__v4df) __W,
  5489. (__mmask8)
  5490. __U);
  5491. }
  5492. extern __inline __m256d
  5493. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5494. _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
  5495. {
  5496. return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
  5497. (__v4df)
  5498. _mm256_setzero_pd (),
  5499. (__mmask8)
  5500. __U);
  5501. }
  5502. extern __inline __m128d
  5503. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5504. _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
  5505. {
  5506. return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
  5507. (__v2df) __W,
  5508. (__mmask8) __U);
  5509. }
  5510. extern __inline __m128d
  5511. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5512. _mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
  5513. {
  5514. return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
  5515. (__v2df)
  5516. _mm_setzero_pd (),
  5517. (__mmask8) __U);
  5518. }
  5519. extern __inline __m128d
  5520. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5521. _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
  5522. {
  5523. return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
  5524. (__v2df) __W,
  5525. (__mmask8)
  5526. __U);
  5527. }
  5528. extern __inline __m128d
  5529. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5530. _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
  5531. {
  5532. return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
  5533. (__v2df)
  5534. _mm_setzero_pd (),
  5535. (__mmask8)
  5536. __U);
  5537. }
  5538. extern __inline __m256
  5539. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5540. _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
  5541. {
  5542. return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
  5543. (__v8sf) __W,
  5544. (__mmask8) __U);
  5545. }
  5546. extern __inline __m256
  5547. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5548. _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
  5549. {
  5550. return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
  5551. (__v8sf)
  5552. _mm256_setzero_ps (),
  5553. (__mmask8) __U);
  5554. }
  5555. extern __inline __m256
  5556. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5557. _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
  5558. {
  5559. return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
  5560. (__v8sf) __W,
  5561. (__mmask8) __U);
  5562. }
  5563. extern __inline __m256
  5564. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5565. _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
  5566. {
  5567. return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
  5568. (__v8sf)
  5569. _mm256_setzero_ps (),
  5570. (__mmask8)
  5571. __U);
  5572. }
  5573. extern __inline __m128
  5574. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5575. _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
  5576. {
  5577. return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
  5578. (__v4sf) __W,
  5579. (__mmask8) __U);
  5580. }
  5581. extern __inline __m128
  5582. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5583. _mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
  5584. {
  5585. return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
  5586. (__v4sf)
  5587. _mm_setzero_ps (),
  5588. (__mmask8) __U);
  5589. }
  5590. extern __inline __m128
  5591. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5592. _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
  5593. {
  5594. return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
  5595. (__v4sf) __W,
  5596. (__mmask8) __U);
  5597. }
  5598. extern __inline __m128
  5599. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5600. _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
  5601. {
  5602. return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
  5603. (__v4sf)
  5604. _mm_setzero_ps (),
  5605. (__mmask8)
  5606. __U);
  5607. }
  5608. extern __inline __m256i
  5609. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5610. _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  5611. {
  5612. return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
  5613. (__v4di) __W,
  5614. (__mmask8) __U);
  5615. }
  5616. extern __inline __m256i
  5617. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5618. _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
  5619. {
  5620. return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
  5621. (__v4di)
  5622. _mm256_setzero_si256 (),
  5623. (__mmask8) __U);
  5624. }
  5625. extern __inline __m256i
  5626. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5627. _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
  5628. void const *__P)
  5629. {
  5630. return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
  5631. (__v4di) __W,
  5632. (__mmask8)
  5633. __U);
  5634. }
  5635. extern __inline __m256i
  5636. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5637. _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
  5638. {
  5639. return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
  5640. (__v4di)
  5641. _mm256_setzero_si256 (),
  5642. (__mmask8)
  5643. __U);
  5644. }
  5645. extern __inline __m128i
  5646. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5647. _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  5648. {
  5649. return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
  5650. (__v2di) __W,
  5651. (__mmask8) __U);
  5652. }
  5653. extern __inline __m128i
  5654. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5655. _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
  5656. {
  5657. return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
  5658. (__v2di)
  5659. _mm_setzero_si128 (),
  5660. (__mmask8) __U);
  5661. }
  5662. extern __inline __m128i
  5663. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5664. _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
  5665. {
  5666. return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
  5667. (__v2di) __W,
  5668. (__mmask8)
  5669. __U);
  5670. }
  5671. extern __inline __m128i
  5672. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5673. _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
  5674. {
  5675. return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
  5676. (__v2di)
  5677. _mm_setzero_si128 (),
  5678. (__mmask8)
  5679. __U);
  5680. }
  5681. extern __inline __m256i
  5682. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5683. _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  5684. {
  5685. return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
  5686. (__v8si) __W,
  5687. (__mmask8) __U);
  5688. }
  5689. extern __inline __m256i
  5690. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5691. _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
  5692. {
  5693. return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
  5694. (__v8si)
  5695. _mm256_setzero_si256 (),
  5696. (__mmask8) __U);
  5697. }
  5698. extern __inline __m256i
  5699. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5700. _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
  5701. void const *__P)
  5702. {
  5703. return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
  5704. (__v8si) __W,
  5705. (__mmask8)
  5706. __U);
  5707. }
  5708. extern __inline __m256i
  5709. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5710. _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
  5711. {
  5712. return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
  5713. (__v8si)
  5714. _mm256_setzero_si256 (),
  5715. (__mmask8)
  5716. __U);
  5717. }
  5718. extern __inline __m128i
  5719. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5720. _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  5721. {
  5722. return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
  5723. (__v4si) __W,
  5724. (__mmask8) __U);
  5725. }
  5726. extern __inline __m128i
  5727. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5728. _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
  5729. {
  5730. return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
  5731. (__v4si)
  5732. _mm_setzero_si128 (),
  5733. (__mmask8) __U);
  5734. }
  5735. extern __inline __m128i
  5736. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5737. _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
  5738. {
  5739. return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
  5740. (__v4si) __W,
  5741. (__mmask8)
  5742. __U);
  5743. }
  5744. extern __inline __m128i
  5745. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5746. _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
  5747. {
  5748. return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
  5749. (__v4si)
  5750. _mm_setzero_si128 (),
  5751. (__mmask8)
  5752. __U);
  5753. }
  5754. extern __inline __m256d
  5755. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5756. _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
  5757. {
  5758. return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
  5759. /* idx */ ,
  5760. (__v4df) __A,
  5761. (__v4df) __B,
  5762. (__mmask8) -1);
  5763. }
  5764. extern __inline __m256d
  5765. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5766. _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
  5767. __m256d __B)
  5768. {
  5769. return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
  5770. /* idx */ ,
  5771. (__v4df) __A,
  5772. (__v4df) __B,
  5773. (__mmask8)
  5774. __U);
  5775. }
  5776. extern __inline __m256d
  5777. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5778. _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
  5779. __m256d __B)
  5780. {
  5781. return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
  5782. (__v4di) __I
  5783. /* idx */ ,
  5784. (__v4df) __B,
  5785. (__mmask8)
  5786. __U);
  5787. }
  5788. extern __inline __m256d
  5789. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5790. _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
  5791. __m256d __B)
  5792. {
  5793. return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
  5794. /* idx */ ,
  5795. (__v4df) __A,
  5796. (__v4df) __B,
  5797. (__mmask8)
  5798. __U);
  5799. }
  5800. extern __inline __m256
  5801. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5802. _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
  5803. {
  5804. return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
  5805. /* idx */ ,
  5806. (__v8sf) __A,
  5807. (__v8sf) __B,
  5808. (__mmask8) -1);
  5809. }
  5810. extern __inline __m256
  5811. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5812. _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
  5813. __m256 __B)
  5814. {
  5815. return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
  5816. /* idx */ ,
  5817. (__v8sf) __A,
  5818. (__v8sf) __B,
  5819. (__mmask8) __U);
  5820. }
  5821. extern __inline __m256
  5822. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5823. _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
  5824. __m256 __B)
  5825. {
  5826. return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
  5827. (__v8si) __I
  5828. /* idx */ ,
  5829. (__v8sf) __B,
  5830. (__mmask8) __U);
  5831. }
  5832. extern __inline __m256
  5833. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5834. _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
  5835. __m256 __B)
  5836. {
  5837. return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
  5838. /* idx */ ,
  5839. (__v8sf) __A,
  5840. (__v8sf) __B,
  5841. (__mmask8)
  5842. __U);
  5843. }
  5844. extern __inline __m128i
  5845. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5846. _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
  5847. {
  5848. return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
  5849. /* idx */ ,
  5850. (__v2di) __A,
  5851. (__v2di) __B,
  5852. (__mmask8) -1);
  5853. }
  5854. extern __inline __m128i
  5855. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5856. _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
  5857. __m128i __B)
  5858. {
  5859. return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
  5860. /* idx */ ,
  5861. (__v2di) __A,
  5862. (__v2di) __B,
  5863. (__mmask8) __U);
  5864. }
  5865. extern __inline __m128i
  5866. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5867. _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
  5868. __m128i __B)
  5869. {
  5870. return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
  5871. (__v2di) __I
  5872. /* idx */ ,
  5873. (__v2di) __B,
  5874. (__mmask8) __U);
  5875. }
  5876. extern __inline __m128i
  5877. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5878. _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
  5879. __m128i __B)
  5880. {
  5881. return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
  5882. /* idx */ ,
  5883. (__v2di) __A,
  5884. (__v2di) __B,
  5885. (__mmask8)
  5886. __U);
  5887. }
  5888. extern __inline __m128i
  5889. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5890. _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
  5891. {
  5892. return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
  5893. /* idx */ ,
  5894. (__v4si) __A,
  5895. (__v4si) __B,
  5896. (__mmask8) -1);
  5897. }
  5898. extern __inline __m128i
  5899. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5900. _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
  5901. __m128i __B)
  5902. {
  5903. return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
  5904. /* idx */ ,
  5905. (__v4si) __A,
  5906. (__v4si) __B,
  5907. (__mmask8) __U);
  5908. }
  5909. extern __inline __m128i
  5910. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5911. _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
  5912. __m128i __B)
  5913. {
  5914. return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
  5915. (__v4si) __I
  5916. /* idx */ ,
  5917. (__v4si) __B,
  5918. (__mmask8) __U);
  5919. }
  5920. extern __inline __m128i
  5921. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5922. _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
  5923. __m128i __B)
  5924. {
  5925. return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
  5926. /* idx */ ,
  5927. (__v4si) __A,
  5928. (__v4si) __B,
  5929. (__mmask8)
  5930. __U);
  5931. }
  5932. extern __inline __m256i
  5933. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5934. _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
  5935. {
  5936. return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
  5937. /* idx */ ,
  5938. (__v4di) __A,
  5939. (__v4di) __B,
  5940. (__mmask8) -1);
  5941. }
  5942. extern __inline __m256i
  5943. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5944. _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
  5945. __m256i __B)
  5946. {
  5947. return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
  5948. /* idx */ ,
  5949. (__v4di) __A,
  5950. (__v4di) __B,
  5951. (__mmask8) __U);
  5952. }
  5953. extern __inline __m256i
  5954. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5955. _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
  5956. __mmask8 __U, __m256i __B)
  5957. {
  5958. return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
  5959. (__v4di) __I
  5960. /* idx */ ,
  5961. (__v4di) __B,
  5962. (__mmask8) __U);
  5963. }
  5964. extern __inline __m256i
  5965. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5966. _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
  5967. __m256i __I, __m256i __B)
  5968. {
  5969. return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
  5970. /* idx */ ,
  5971. (__v4di) __A,
  5972. (__v4di) __B,
  5973. (__mmask8)
  5974. __U);
  5975. }
  5976. extern __inline __m256i
  5977. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5978. _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
  5979. {
  5980. return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
  5981. /* idx */ ,
  5982. (__v8si) __A,
  5983. (__v8si) __B,
  5984. (__mmask8) -1);
  5985. }
  5986. extern __inline __m256i
  5987. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5988. _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
  5989. __m256i __B)
  5990. {
  5991. return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
  5992. /* idx */ ,
  5993. (__v8si) __A,
  5994. (__v8si) __B,
  5995. (__mmask8) __U);
  5996. }
  5997. extern __inline __m256i
  5998. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5999. _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
  6000. __mmask8 __U, __m256i __B)
  6001. {
  6002. return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
  6003. (__v8si) __I
  6004. /* idx */ ,
  6005. (__v8si) __B,
  6006. (__mmask8) __U);
  6007. }
  6008. extern __inline __m256i
  6009. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6010. _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
  6011. __m256i __I, __m256i __B)
  6012. {
  6013. return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
  6014. /* idx */ ,
  6015. (__v8si) __A,
  6016. (__v8si) __B,
  6017. (__mmask8)
  6018. __U);
  6019. }
  6020. extern __inline __m128d
  6021. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6022. _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
  6023. {
  6024. return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
  6025. /* idx */ ,
  6026. (__v2df) __A,
  6027. (__v2df) __B,
  6028. (__mmask8) -1);
  6029. }
  6030. extern __inline __m128d
  6031. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6032. _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
  6033. __m128d __B)
  6034. {
  6035. return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
  6036. /* idx */ ,
  6037. (__v2df) __A,
  6038. (__v2df) __B,
  6039. (__mmask8)
  6040. __U);
  6041. }
  6042. extern __inline __m128d
  6043. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6044. _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
  6045. __m128d __B)
  6046. {
  6047. return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
  6048. (__v2di) __I
  6049. /* idx */ ,
  6050. (__v2df) __B,
  6051. (__mmask8)
  6052. __U);
  6053. }
  6054. extern __inline __m128d
  6055. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6056. _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
  6057. __m128d __B)
  6058. {
  6059. return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
  6060. /* idx */ ,
  6061. (__v2df) __A,
  6062. (__v2df) __B,
  6063. (__mmask8)
  6064. __U);
  6065. }
  6066. extern __inline __m128
  6067. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6068. _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
  6069. {
  6070. return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
  6071. /* idx */ ,
  6072. (__v4sf) __A,
  6073. (__v4sf) __B,
  6074. (__mmask8) -1);
  6075. }
  6076. extern __inline __m128
  6077. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6078. _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
  6079. __m128 __B)
  6080. {
  6081. return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
  6082. /* idx */ ,
  6083. (__v4sf) __A,
  6084. (__v4sf) __B,
  6085. (__mmask8) __U);
  6086. }
  6087. extern __inline __m128
  6088. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6089. _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
  6090. __m128 __B)
  6091. {
  6092. return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
  6093. (__v4si) __I
  6094. /* idx */ ,
  6095. (__v4sf) __B,
  6096. (__mmask8) __U);
  6097. }
  6098. extern __inline __m128
  6099. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6100. _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
  6101. __m128 __B)
  6102. {
  6103. return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
  6104. /* idx */ ,
  6105. (__v4sf) __A,
  6106. (__v4sf) __B,
  6107. (__mmask8)
  6108. __U);
  6109. }
  6110. extern __inline __m128i
  6111. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6112. _mm_srav_epi64 (__m128i __X, __m128i __Y)
  6113. {
  6114. return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
  6115. (__v2di) __Y,
  6116. (__v2di)
  6117. _mm_setzero_si128 (),
  6118. (__mmask8) -1);
  6119. }
  6120. extern __inline __m128i
  6121. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6122. _mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
  6123. __m128i __Y)
  6124. {
  6125. return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
  6126. (__v2di) __Y,
  6127. (__v2di) __W,
  6128. (__mmask8) __U);
  6129. }
  6130. extern __inline __m128i
  6131. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6132. _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
  6133. {
  6134. return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
  6135. (__v2di) __Y,
  6136. (__v2di)
  6137. _mm_setzero_si128 (),
  6138. (__mmask8) __U);
  6139. }
  6140. extern __inline __m256i
  6141. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6142. _mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
  6143. __m256i __Y)
  6144. {
  6145. return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
  6146. (__v8si) __Y,
  6147. (__v8si) __W,
  6148. (__mmask8) __U);
  6149. }
  6150. extern __inline __m256i
  6151. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6152. _mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
  6153. {
  6154. return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
  6155. (__v8si) __Y,
  6156. (__v8si)
  6157. _mm256_setzero_si256 (),
  6158. (__mmask8) __U);
  6159. }
  6160. extern __inline __m128i
  6161. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6162. _mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
  6163. __m128i __Y)
  6164. {
  6165. return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
  6166. (__v4si) __Y,
  6167. (__v4si) __W,
  6168. (__mmask8) __U);
  6169. }
  6170. extern __inline __m128i
  6171. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6172. _mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
  6173. {
  6174. return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
  6175. (__v4si) __Y,
  6176. (__v4si)
  6177. _mm_setzero_si128 (),
  6178. (__mmask8) __U);
  6179. }
  6180. extern __inline __m256i
  6181. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6182. _mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
  6183. __m256i __Y)
  6184. {
  6185. return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
  6186. (__v4di) __Y,
  6187. (__v4di) __W,
  6188. (__mmask8) __U);
  6189. }
  6190. extern __inline __m256i
  6191. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6192. _mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
  6193. {
  6194. return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
  6195. (__v4di) __Y,
  6196. (__v4di)
  6197. _mm256_setzero_si256 (),
  6198. (__mmask8) __U);
  6199. }
  6200. extern __inline __m128i
  6201. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6202. _mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
  6203. __m128i __Y)
  6204. {
  6205. return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
  6206. (__v2di) __Y,
  6207. (__v2di) __W,
  6208. (__mmask8) __U);
  6209. }
  6210. extern __inline __m128i
  6211. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6212. _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
  6213. {
  6214. return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
  6215. (__v2di) __Y,
  6216. (__v2di)
  6217. _mm_setzero_si128 (),
  6218. (__mmask8) __U);
  6219. }
  6220. extern __inline __m256i
  6221. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6222. _mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
  6223. __m256i __Y)
  6224. {
  6225. return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
  6226. (__v8si) __Y,
  6227. (__v8si) __W,
  6228. (__mmask8) __U);
  6229. }
  6230. extern __inline __m256i
  6231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6232. _mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
  6233. {
  6234. return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
  6235. (__v8si) __Y,
  6236. (__v8si)
  6237. _mm256_setzero_si256 (),
  6238. (__mmask8) __U);
  6239. }
  6240. extern __inline __m128i
  6241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6242. _mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
  6243. __m128i __Y)
  6244. {
  6245. return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
  6246. (__v4si) __Y,
  6247. (__v4si) __W,
  6248. (__mmask8) __U);
  6249. }
  6250. extern __inline __m128i
  6251. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6252. _mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
  6253. {
  6254. return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
  6255. (__v4si) __Y,
  6256. (__v4si)
  6257. _mm_setzero_si128 (),
  6258. (__mmask8) __U);
  6259. }
  6260. extern __inline __m256i
  6261. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6262. _mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
  6263. __m256i __Y)
  6264. {
  6265. return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
  6266. (__v8si) __Y,
  6267. (__v8si) __W,
  6268. (__mmask8) __U);
  6269. }
  6270. extern __inline __m256i
  6271. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6272. _mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
  6273. {
  6274. return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
  6275. (__v8si) __Y,
  6276. (__v8si)
  6277. _mm256_setzero_si256 (),
  6278. (__mmask8) __U);
  6279. }
  6280. extern __inline __m128i
  6281. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6282. _mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
  6283. __m128i __Y)
  6284. {
  6285. return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
  6286. (__v4si) __Y,
  6287. (__v4si) __W,
  6288. (__mmask8) __U);
  6289. }
  6290. extern __inline __m128i
  6291. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6292. _mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
  6293. {
  6294. return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
  6295. (__v4si) __Y,
  6296. (__v4si)
  6297. _mm_setzero_si128 (),
  6298. (__mmask8) __U);
  6299. }
  6300. extern __inline __m256i
  6301. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6302. _mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
  6303. __m256i __Y)
  6304. {
  6305. return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
  6306. (__v4di) __Y,
  6307. (__v4di) __W,
  6308. (__mmask8) __U);
  6309. }
  6310. extern __inline __m256i
  6311. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6312. _mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
  6313. {
  6314. return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
  6315. (__v4di) __Y,
  6316. (__v4di)
  6317. _mm256_setzero_si256 (),
  6318. (__mmask8) __U);
  6319. }
  6320. extern __inline __m128i
  6321. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6322. _mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
  6323. __m128i __Y)
  6324. {
  6325. return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
  6326. (__v2di) __Y,
  6327. (__v2di) __W,
  6328. (__mmask8) __U);
  6329. }
  6330. extern __inline __m128i
  6331. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6332. _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
  6333. {
  6334. return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
  6335. (__v2di) __Y,
  6336. (__v2di)
  6337. _mm_setzero_si128 (),
  6338. (__mmask8) __U);
  6339. }
  6340. extern __inline __m256i
  6341. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6342. _mm256_rolv_epi32 (__m256i __A, __m256i __B)
  6343. {
  6344. return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
  6345. (__v8si) __B,
  6346. (__v8si)
  6347. _mm256_setzero_si256 (),
  6348. (__mmask8) -1);
  6349. }
  6350. extern __inline __m256i
  6351. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6352. _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  6353. __m256i __B)
  6354. {
  6355. return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
  6356. (__v8si) __B,
  6357. (__v8si) __W,
  6358. (__mmask8) __U);
  6359. }
  6360. extern __inline __m256i
  6361. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6362. _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  6363. {
  6364. return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
  6365. (__v8si) __B,
  6366. (__v8si)
  6367. _mm256_setzero_si256 (),
  6368. (__mmask8) __U);
  6369. }
  6370. extern __inline __m128i
  6371. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6372. _mm_rolv_epi32 (__m128i __A, __m128i __B)
  6373. {
  6374. return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
  6375. (__v4si) __B,
  6376. (__v4si)
  6377. _mm_setzero_si128 (),
  6378. (__mmask8) -1);
  6379. }
  6380. extern __inline __m128i
  6381. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6382. _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  6383. __m128i __B)
  6384. {
  6385. return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
  6386. (__v4si) __B,
  6387. (__v4si) __W,
  6388. (__mmask8) __U);
  6389. }
  6390. extern __inline __m128i
  6391. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6392. _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  6393. {
  6394. return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
  6395. (__v4si) __B,
  6396. (__v4si)
  6397. _mm_setzero_si128 (),
  6398. (__mmask8) __U);
  6399. }
  6400. extern __inline __m256i
  6401. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6402. _mm256_rorv_epi32 (__m256i __A, __m256i __B)
  6403. {
  6404. return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
  6405. (__v8si) __B,
  6406. (__v8si)
  6407. _mm256_setzero_si256 (),
  6408. (__mmask8) -1);
  6409. }
  6410. extern __inline __m256i
  6411. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6412. _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  6413. __m256i __B)
  6414. {
  6415. return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
  6416. (__v8si) __B,
  6417. (__v8si) __W,
  6418. (__mmask8) __U);
  6419. }
  6420. extern __inline __m256i
  6421. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6422. _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  6423. {
  6424. return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
  6425. (__v8si) __B,
  6426. (__v8si)
  6427. _mm256_setzero_si256 (),
  6428. (__mmask8) __U);
  6429. }
  6430. extern __inline __m128i
  6431. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6432. _mm_rorv_epi32 (__m128i __A, __m128i __B)
  6433. {
  6434. return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
  6435. (__v4si) __B,
  6436. (__v4si)
  6437. _mm_setzero_si128 (),
  6438. (__mmask8) -1);
  6439. }
  6440. extern __inline __m128i
  6441. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6442. _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  6443. __m128i __B)
  6444. {
  6445. return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
  6446. (__v4si) __B,
  6447. (__v4si) __W,
  6448. (__mmask8) __U);
  6449. }
  6450. extern __inline __m128i
  6451. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6452. _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  6453. {
  6454. return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
  6455. (__v4si) __B,
  6456. (__v4si)
  6457. _mm_setzero_si128 (),
  6458. (__mmask8) __U);
  6459. }
  6460. extern __inline __m256i
  6461. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6462. _mm256_rolv_epi64 (__m256i __A, __m256i __B)
  6463. {
  6464. return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
  6465. (__v4di) __B,
  6466. (__v4di)
  6467. _mm256_setzero_si256 (),
  6468. (__mmask8) -1);
  6469. }
  6470. extern __inline __m256i
  6471. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6472. _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6473. __m256i __B)
  6474. {
  6475. return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
  6476. (__v4di) __B,
  6477. (__v4di) __W,
  6478. (__mmask8) __U);
  6479. }
  6480. extern __inline __m256i
  6481. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6482. _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6483. {
  6484. return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
  6485. (__v4di) __B,
  6486. (__v4di)
  6487. _mm256_setzero_si256 (),
  6488. (__mmask8) __U);
  6489. }
  6490. extern __inline __m128i
  6491. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6492. _mm_rolv_epi64 (__m128i __A, __m128i __B)
  6493. {
  6494. return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
  6495. (__v2di) __B,
  6496. (__v2di)
  6497. _mm_setzero_si128 (),
  6498. (__mmask8) -1);
  6499. }
  6500. extern __inline __m128i
  6501. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6502. _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  6503. __m128i __B)
  6504. {
  6505. return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
  6506. (__v2di) __B,
  6507. (__v2di) __W,
  6508. (__mmask8) __U);
  6509. }
  6510. extern __inline __m128i
  6511. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6512. _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6513. {
  6514. return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
  6515. (__v2di) __B,
  6516. (__v2di)
  6517. _mm_setzero_si128 (),
  6518. (__mmask8) __U);
  6519. }
  6520. extern __inline __m256i
  6521. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6522. _mm256_rorv_epi64 (__m256i __A, __m256i __B)
  6523. {
  6524. return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
  6525. (__v4di) __B,
  6526. (__v4di)
  6527. _mm256_setzero_si256 (),
  6528. (__mmask8) -1);
  6529. }
  6530. extern __inline __m256i
  6531. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6532. _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6533. __m256i __B)
  6534. {
  6535. return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
  6536. (__v4di) __B,
  6537. (__v4di) __W,
  6538. (__mmask8) __U);
  6539. }
  6540. extern __inline __m256i
  6541. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6542. _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6543. {
  6544. return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
  6545. (__v4di) __B,
  6546. (__v4di)
  6547. _mm256_setzero_si256 (),
  6548. (__mmask8) __U);
  6549. }
  6550. extern __inline __m128i
  6551. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6552. _mm_rorv_epi64 (__m128i __A, __m128i __B)
  6553. {
  6554. return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
  6555. (__v2di) __B,
  6556. (__v2di)
  6557. _mm_setzero_si128 (),
  6558. (__mmask8) -1);
  6559. }
  6560. extern __inline __m128i
  6561. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6562. _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  6563. __m128i __B)
  6564. {
  6565. return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
  6566. (__v2di) __B,
  6567. (__v2di) __W,
  6568. (__mmask8) __U);
  6569. }
  6570. extern __inline __m128i
  6571. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6572. _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6573. {
  6574. return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
  6575. (__v2di) __B,
  6576. (__v2di)
  6577. _mm_setzero_si128 (),
  6578. (__mmask8) __U);
  6579. }
  6580. extern __inline __m256i
  6581. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6582. _mm256_srav_epi64 (__m256i __X, __m256i __Y)
  6583. {
  6584. return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
  6585. (__v4di) __Y,
  6586. (__v4di)
  6587. _mm256_setzero_si256 (),
  6588. (__mmask8) -1);
  6589. }
  6590. extern __inline __m256i
  6591. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6592. _mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
  6593. __m256i __Y)
  6594. {
  6595. return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
  6596. (__v4di) __Y,
  6597. (__v4di) __W,
  6598. (__mmask8) __U);
  6599. }
  6600. extern __inline __m256i
  6601. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6602. _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
  6603. {
  6604. return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
  6605. (__v4di) __Y,
  6606. (__v4di)
  6607. _mm256_setzero_si256 (),
  6608. (__mmask8) __U);
  6609. }
  6610. extern __inline __m256i
  6611. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6612. _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6613. __m256i __B)
  6614. {
  6615. return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
  6616. (__v4di) __B,
  6617. (__v4di) __W, __U);
  6618. }
  6619. extern __inline __m256i
  6620. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6621. _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6622. {
  6623. return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
  6624. (__v4di) __B,
  6625. (__v4di)
  6626. _mm256_setzero_pd (),
  6627. __U);
  6628. }
  6629. extern __inline __m128i
  6630. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6631. _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  6632. __m128i __B)
  6633. {
  6634. return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
  6635. (__v2di) __B,
  6636. (__v2di) __W, __U);
  6637. }
  6638. extern __inline __m128i
  6639. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6640. _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6641. {
  6642. return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
  6643. (__v2di) __B,
  6644. (__v2di)
  6645. _mm_setzero_pd (),
  6646. __U);
  6647. }
  6648. extern __inline __m256i
  6649. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6650. _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6651. __m256i __B)
  6652. {
  6653. return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
  6654. (__v4di) __B,
  6655. (__v4di) __W, __U);
  6656. }
  6657. extern __inline __m256i
  6658. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6659. _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6660. {
  6661. return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
  6662. (__v4di) __B,
  6663. (__v4di)
  6664. _mm256_setzero_pd (),
  6665. __U);
  6666. }
  6667. extern __inline __m128i
  6668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6669. _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  6670. __m128i __B)
  6671. {
  6672. return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
  6673. (__v2di) __B,
  6674. (__v2di) __W, __U);
  6675. }
  6676. extern __inline __m128i
  6677. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6678. _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6679. {
  6680. return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
  6681. (__v2di) __B,
  6682. (__v2di)
  6683. _mm_setzero_pd (),
  6684. __U);
  6685. }
  6686. extern __inline __m256i
  6687. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6688. _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6689. __m256i __B)
  6690. {
  6691. return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
  6692. (__v4di) __B,
  6693. (__v4di) __W,
  6694. (__mmask8) __U);
  6695. }
  6696. extern __inline __m256i
  6697. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6698. _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6699. {
  6700. return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
  6701. (__v4di) __B,
  6702. (__v4di)
  6703. _mm256_setzero_si256 (),
  6704. (__mmask8) __U);
  6705. }
  6706. extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  6707. _mm256_or_epi64 (__m256i __A, __m256i __B)
  6708. {
  6709. return (__m256i) ((__v4du)__A | (__v4du)__B);
  6710. }
  6711. extern __inline __m128i
  6712. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6713. _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  6714. {
  6715. return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
  6716. (__v2di) __B,
  6717. (__v2di) __W,
  6718. (__mmask8) __U);
  6719. }
  6720. extern __inline __m128i
  6721. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6722. _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6723. {
  6724. return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
  6725. (__v2di) __B,
  6726. (__v2di)
  6727. _mm_setzero_si128 (),
  6728. (__mmask8) __U);
  6729. }
  6730. extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  6731. _mm_or_epi64 (__m128i __A, __m128i __B)
  6732. {
  6733. return (__m128i) ((__v2du)__A | (__v2du)__B);
  6734. }
  6735. extern __inline __m256i
  6736. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6737. _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6738. __m256i __B)
  6739. {
  6740. return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
  6741. (__v4di) __B,
  6742. (__v4di) __W,
  6743. (__mmask8) __U);
  6744. }
  6745. extern __inline __m256i
  6746. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6747. _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6748. {
  6749. return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
  6750. (__v4di) __B,
  6751. (__v4di)
  6752. _mm256_setzero_si256 (),
  6753. (__mmask8) __U);
  6754. }
  6755. extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  6756. _mm256_xor_epi64 (__m256i __A, __m256i __B)
  6757. {
  6758. return (__m256i) ((__v4du)__A ^ (__v4du)__B);
  6759. }
  6760. extern __inline __m128i
  6761. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6762. _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  6763. __m128i __B)
  6764. {
  6765. return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
  6766. (__v2di) __B,
  6767. (__v2di) __W,
  6768. (__mmask8) __U);
  6769. }
  6770. extern __inline __m128i
  6771. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6772. _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6773. {
  6774. return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
  6775. (__v2di) __B,
  6776. (__v2di)
  6777. _mm_setzero_si128 (),
  6778. (__mmask8) __U);
  6779. }
  6780. extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  6781. _mm_xor_epi64 (__m128i __A, __m128i __B)
  6782. {
  6783. return (__m128i) ((__v2du)__A ^ (__v2du)__B);
  6784. }
  6785. extern __inline __m256d
  6786. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6787. _mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
  6788. __m256d __B)
  6789. {
  6790. return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
  6791. (__v4df) __B,
  6792. (__v4df) __W,
  6793. (__mmask8) __U);
  6794. }
  6795. extern __inline __m256d
  6796. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6797. _mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
  6798. {
  6799. return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
  6800. (__v4df) __B,
  6801. (__v4df)
  6802. _mm256_setzero_pd (),
  6803. (__mmask8) __U);
  6804. }
  6805. extern __inline __m256
  6806. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6807. _mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  6808. {
  6809. return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
  6810. (__v8sf) __B,
  6811. (__v8sf) __W,
  6812. (__mmask8) __U);
  6813. }
  6814. extern __inline __m256
  6815. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6816. _mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
  6817. {
  6818. return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
  6819. (__v8sf) __B,
  6820. (__v8sf)
  6821. _mm256_setzero_ps (),
  6822. (__mmask8) __U);
  6823. }
  6824. extern __inline __m128
  6825. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6826. _mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  6827. {
  6828. return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
  6829. (__v4sf) __B,
  6830. (__v4sf) __W,
  6831. (__mmask8) __U);
  6832. }
  6833. extern __inline __m128
  6834. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6835. _mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
  6836. {
  6837. return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
  6838. (__v4sf) __B,
  6839. (__v4sf)
  6840. _mm_setzero_ps (),
  6841. (__mmask8) __U);
  6842. }
  6843. extern __inline __m128d
  6844. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6845. _mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  6846. {
  6847. return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
  6848. (__v2df) __B,
  6849. (__v2df) __W,
  6850. (__mmask8) __U);
  6851. }
  6852. extern __inline __m128d
  6853. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6854. _mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
  6855. {
  6856. return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
  6857. (__v2df) __B,
  6858. (__v2df)
  6859. _mm_setzero_pd (),
  6860. (__mmask8) __U);
  6861. }
  6862. extern __inline __m256d
  6863. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6864. _mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
  6865. __m256d __B)
  6866. {
  6867. return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
  6868. (__v4df) __B,
  6869. (__v4df) __W,
  6870. (__mmask8) __U);
  6871. }
  6872. extern __inline __m256d
  6873. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6874. _mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
  6875. __m256d __B)
  6876. {
  6877. return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
  6878. (__v4df) __B,
  6879. (__v4df) __W,
  6880. (__mmask8) __U);
  6881. }
  6882. extern __inline __m256d
  6883. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6884. _mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
  6885. {
  6886. return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
  6887. (__v4df) __B,
  6888. (__v4df)
  6889. _mm256_setzero_pd (),
  6890. (__mmask8) __U);
  6891. }
  6892. extern __inline __m256
  6893. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6894. _mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  6895. {
  6896. return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
  6897. (__v8sf) __B,
  6898. (__v8sf) __W,
  6899. (__mmask8) __U);
  6900. }
  6901. extern __inline __m256d
  6902. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6903. _mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
  6904. {
  6905. return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
  6906. (__v4df) __B,
  6907. (__v4df)
  6908. _mm256_setzero_pd (),
  6909. (__mmask8) __U);
  6910. }
  6911. extern __inline __m256
  6912. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6913. _mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  6914. {
  6915. return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
  6916. (__v8sf) __B,
  6917. (__v8sf) __W,
  6918. (__mmask8) __U);
  6919. }
  6920. extern __inline __m256
  6921. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6922. _mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
  6923. {
  6924. return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
  6925. (__v8sf) __B,
  6926. (__v8sf)
  6927. _mm256_setzero_ps (),
  6928. (__mmask8) __U);
  6929. }
  6930. extern __inline __m256
  6931. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6932. _mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
  6933. {
  6934. return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
  6935. (__v8sf) __B,
  6936. (__v8sf)
  6937. _mm256_setzero_ps (),
  6938. (__mmask8) __U);
  6939. }
  6940. extern __inline __m128
  6941. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6942. _mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  6943. {
  6944. return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
  6945. (__v4sf) __B,
  6946. (__v4sf) __W,
  6947. (__mmask8) __U);
  6948. }
  6949. extern __inline __m128
  6950. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6951. _mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  6952. {
  6953. return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
  6954. (__v4sf) __B,
  6955. (__v4sf) __W,
  6956. (__mmask8) __U);
  6957. }
  6958. extern __inline __m128
  6959. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6960. _mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
  6961. {
  6962. return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
  6963. (__v4sf) __B,
  6964. (__v4sf)
  6965. _mm_setzero_ps (),
  6966. (__mmask8) __U);
  6967. }
  6968. extern __inline __m128
  6969. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6970. _mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
  6971. {
  6972. return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
  6973. (__v4sf) __B,
  6974. (__v4sf)
  6975. _mm_setzero_ps (),
  6976. (__mmask8) __U);
  6977. }
  6978. extern __inline __m128
  6979. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6980. _mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  6981. {
  6982. return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
  6983. (__v4sf) __B,
  6984. (__v4sf) __W,
  6985. (__mmask8) __U);
  6986. }
  6987. extern __inline __m128
  6988. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6989. _mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
  6990. {
  6991. return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
  6992. (__v4sf) __B,
  6993. (__v4sf)
  6994. _mm_setzero_ps (),
  6995. (__mmask8) __U);
  6996. }
  6997. extern __inline __m128d
  6998. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6999. _mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  7000. {
  7001. return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
  7002. (__v2df) __B,
  7003. (__v2df) __W,
  7004. (__mmask8) __U);
  7005. }
  7006. extern __inline __m128d
  7007. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7008. _mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
  7009. {
  7010. return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
  7011. (__v2df) __B,
  7012. (__v2df)
  7013. _mm_setzero_pd (),
  7014. (__mmask8) __U);
  7015. }
  7016. extern __inline __m128d
  7017. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7018. _mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  7019. {
  7020. return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
  7021. (__v2df) __B,
  7022. (__v2df) __W,
  7023. (__mmask8) __U);
  7024. }
  7025. extern __inline __m128d
  7026. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7027. _mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
  7028. {
  7029. return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
  7030. (__v2df) __B,
  7031. (__v2df)
  7032. _mm_setzero_pd (),
  7033. (__mmask8) __U);
  7034. }
  7035. extern __inline __m128d
  7036. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7037. _mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  7038. {
  7039. return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
  7040. (__v2df) __B,
  7041. (__v2df) __W,
  7042. (__mmask8) __U);
  7043. }
  7044. extern __inline __m128d
  7045. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7046. _mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
  7047. {
  7048. return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
  7049. (__v2df) __B,
  7050. (__v2df)
  7051. _mm_setzero_pd (),
  7052. (__mmask8) __U);
  7053. }
  7054. extern __inline __m256
  7055. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7056. _mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  7057. {
  7058. return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
  7059. (__v8sf) __B,
  7060. (__v8sf) __W,
  7061. (__mmask8) __U);
  7062. }
  7063. extern __inline __m256
  7064. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7065. _mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
  7066. {
  7067. return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
  7068. (__v8sf) __B,
  7069. (__v8sf)
  7070. _mm256_setzero_ps (),
  7071. (__mmask8) __U);
  7072. }
  7073. extern __inline __m256d
  7074. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7075. _mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
  7076. __m256d __B)
  7077. {
  7078. return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
  7079. (__v4df) __B,
  7080. (__v4df) __W,
  7081. (__mmask8) __U);
  7082. }
  7083. extern __inline __m256d
  7084. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7085. _mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
  7086. {
  7087. return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
  7088. (__v4df) __B,
  7089. (__v4df)
  7090. _mm256_setzero_pd (),
  7091. (__mmask8) __U);
  7092. }
  7093. extern __inline __m256i
  7094. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7095. _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
  7096. {
  7097. return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
  7098. (__v4di) __B,
  7099. (__v4di)
  7100. _mm256_setzero_si256 (),
  7101. __M);
  7102. }
  7103. extern __inline __m256i
  7104. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7105. _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
  7106. __m256i __B)
  7107. {
  7108. return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
  7109. (__v4di) __B,
  7110. (__v4di) __W, __M);
  7111. }
  7112. extern __inline __m256i
  7113. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7114. _mm256_min_epi64 (__m256i __A, __m256i __B)
  7115. {
  7116. return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
  7117. (__v4di) __B,
  7118. (__v4di)
  7119. _mm256_setzero_si256 (),
  7120. (__mmask8) -1);
  7121. }
  7122. extern __inline __m256i
  7123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7124. _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
  7125. __m256i __B)
  7126. {
  7127. return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
  7128. (__v4di) __B,
  7129. (__v4di) __W, __M);
  7130. }
  7131. extern __inline __m256i
  7132. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7133. _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
  7134. {
  7135. return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
  7136. (__v4di) __B,
  7137. (__v4di)
  7138. _mm256_setzero_si256 (),
  7139. __M);
  7140. }
  7141. extern __inline __m256i
  7142. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7143. _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
  7144. {
  7145. return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
  7146. (__v4di) __B,
  7147. (__v4di)
  7148. _mm256_setzero_si256 (),
  7149. __M);
  7150. }
  7151. extern __inline __m256i
  7152. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7153. _mm256_max_epi64 (__m256i __A, __m256i __B)
  7154. {
  7155. return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
  7156. (__v4di) __B,
  7157. (__v4di)
  7158. _mm256_setzero_si256 (),
  7159. (__mmask8) -1);
  7160. }
  7161. extern __inline __m256i
  7162. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7163. _mm256_max_epu64 (__m256i __A, __m256i __B)
  7164. {
  7165. return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
  7166. (__v4di) __B,
  7167. (__v4di)
  7168. _mm256_setzero_si256 (),
  7169. (__mmask8) -1);
  7170. }
  7171. extern __inline __m256i
  7172. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7173. _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
  7174. __m256i __B)
  7175. {
  7176. return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
  7177. (__v4di) __B,
  7178. (__v4di) __W, __M);
  7179. }
  7180. extern __inline __m256i
  7181. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7182. _mm256_min_epu64 (__m256i __A, __m256i __B)
  7183. {
  7184. return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
  7185. (__v4di) __B,
  7186. (__v4di)
  7187. _mm256_setzero_si256 (),
  7188. (__mmask8) -1);
  7189. }
  7190. extern __inline __m256i
  7191. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7192. _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
  7193. __m256i __B)
  7194. {
  7195. return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
  7196. (__v4di) __B,
  7197. (__v4di) __W, __M);
  7198. }
  7199. extern __inline __m256i
  7200. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7201. _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
  7202. {
  7203. return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
  7204. (__v4di) __B,
  7205. (__v4di)
  7206. _mm256_setzero_si256 (),
  7207. __M);
  7208. }
  7209. extern __inline __m256i
  7210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7211. _mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
  7212. {
  7213. return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
  7214. (__v8si) __B,
  7215. (__v8si)
  7216. _mm256_setzero_si256 (),
  7217. __M);
  7218. }
  7219. extern __inline __m256i
  7220. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7221. _mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
  7222. __m256i __B)
  7223. {
  7224. return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
  7225. (__v8si) __B,
  7226. (__v8si) __W, __M);
  7227. }
  7228. extern __inline __m256i
  7229. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7230. _mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
  7231. {
  7232. return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
  7233. (__v8si) __B,
  7234. (__v8si)
  7235. _mm256_setzero_si256 (),
  7236. __M);
  7237. }
  7238. extern __inline __m256i
  7239. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7240. _mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
  7241. __m256i __B)
  7242. {
  7243. return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
  7244. (__v8si) __B,
  7245. (__v8si) __W, __M);
  7246. }
  7247. extern __inline __m256i
  7248. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7249. _mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
  7250. {
  7251. return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
  7252. (__v8si) __B,
  7253. (__v8si)
  7254. _mm256_setzero_si256 (),
  7255. __M);
  7256. }
  7257. extern __inline __m256i
  7258. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7259. _mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
  7260. __m256i __B)
  7261. {
  7262. return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
  7263. (__v8si) __B,
  7264. (__v8si) __W, __M);
  7265. }
  7266. extern __inline __m256i
  7267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7268. _mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
  7269. {
  7270. return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
  7271. (__v8si) __B,
  7272. (__v8si)
  7273. _mm256_setzero_si256 (),
  7274. __M);
  7275. }
  7276. extern __inline __m256i
  7277. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7278. _mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
  7279. __m256i __B)
  7280. {
  7281. return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
  7282. (__v8si) __B,
  7283. (__v8si) __W, __M);
  7284. }
  7285. extern __inline __m128i
  7286. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7287. _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
  7288. {
  7289. return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
  7290. (__v2di) __B,
  7291. (__v2di)
  7292. _mm_setzero_si128 (),
  7293. __M);
  7294. }
  7295. extern __inline __m128i
  7296. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7297. _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
  7298. __m128i __B)
  7299. {
  7300. return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
  7301. (__v2di) __B,
  7302. (__v2di) __W, __M);
  7303. }
  7304. extern __inline __m128i
  7305. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7306. _mm_min_epi64 (__m128i __A, __m128i __B)
  7307. {
  7308. return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
  7309. (__v2di) __B,
  7310. (__v2di)
  7311. _mm_setzero_si128 (),
  7312. (__mmask8) -1);
  7313. }
  7314. extern __inline __m128i
  7315. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7316. _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
  7317. __m128i __B)
  7318. {
  7319. return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
  7320. (__v2di) __B,
  7321. (__v2di) __W, __M);
  7322. }
  7323. extern __inline __m128i
  7324. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7325. _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
  7326. {
  7327. return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
  7328. (__v2di) __B,
  7329. (__v2di)
  7330. _mm_setzero_si128 (),
  7331. __M);
  7332. }
  7333. extern __inline __m128i
  7334. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7335. _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
  7336. {
  7337. return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
  7338. (__v2di) __B,
  7339. (__v2di)
  7340. _mm_setzero_si128 (),
  7341. __M);
  7342. }
  7343. extern __inline __m128i
  7344. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7345. _mm_max_epi64 (__m128i __A, __m128i __B)
  7346. {
  7347. return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
  7348. (__v2di) __B,
  7349. (__v2di)
  7350. _mm_setzero_si128 (),
  7351. (__mmask8) -1);
  7352. }
  7353. extern __inline __m128i
  7354. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7355. _mm_max_epu64 (__m128i __A, __m128i __B)
  7356. {
  7357. return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
  7358. (__v2di) __B,
  7359. (__v2di)
  7360. _mm_setzero_si128 (),
  7361. (__mmask8) -1);
  7362. }
  7363. extern __inline __m128i
  7364. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7365. _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
  7366. __m128i __B)
  7367. {
  7368. return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
  7369. (__v2di) __B,
  7370. (__v2di) __W, __M);
  7371. }
  7372. extern __inline __m128i
  7373. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7374. _mm_min_epu64 (__m128i __A, __m128i __B)
  7375. {
  7376. return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
  7377. (__v2di) __B,
  7378. (__v2di)
  7379. _mm_setzero_si128 (),
  7380. (__mmask8) -1);
  7381. }
  7382. extern __inline __m128i
  7383. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7384. _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
  7385. __m128i __B)
  7386. {
  7387. return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
  7388. (__v2di) __B,
  7389. (__v2di) __W, __M);
  7390. }
  7391. extern __inline __m128i
  7392. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7393. _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
  7394. {
  7395. return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
  7396. (__v2di) __B,
  7397. (__v2di)
  7398. _mm_setzero_si128 (),
  7399. __M);
  7400. }
  7401. extern __inline __m128i
  7402. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7403. _mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
  7404. {
  7405. return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
  7406. (__v4si) __B,
  7407. (__v4si)
  7408. _mm_setzero_si128 (),
  7409. __M);
  7410. }
  7411. extern __inline __m128i
  7412. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7413. _mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
  7414. __m128i __B)
  7415. {
  7416. return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
  7417. (__v4si) __B,
  7418. (__v4si) __W, __M);
  7419. }
  7420. extern __inline __m128i
  7421. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7422. _mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
  7423. {
  7424. return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
  7425. (__v4si) __B,
  7426. (__v4si)
  7427. _mm_setzero_si128 (),
  7428. __M);
  7429. }
  7430. extern __inline __m128i
  7431. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7432. _mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
  7433. __m128i __B)
  7434. {
  7435. return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
  7436. (__v4si) __B,
  7437. (__v4si) __W, __M);
  7438. }
  7439. extern __inline __m128i
  7440. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7441. _mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
  7442. {
  7443. return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
  7444. (__v4si) __B,
  7445. (__v4si)
  7446. _mm_setzero_si128 (),
  7447. __M);
  7448. }
  7449. extern __inline __m128i
  7450. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7451. _mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
  7452. __m128i __B)
  7453. {
  7454. return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
  7455. (__v4si) __B,
  7456. (__v4si) __W, __M);
  7457. }
  7458. extern __inline __m128i
  7459. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7460. _mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
  7461. {
  7462. return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
  7463. (__v4si) __B,
  7464. (__v4si)
  7465. _mm_setzero_si128 (),
  7466. __M);
  7467. }
  7468. extern __inline __m128i
  7469. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7470. _mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
  7471. __m128i __B)
  7472. {
  7473. return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
  7474. (__v4si) __B,
  7475. (__v4si) __W, __M);
  7476. }
  7477. #ifndef __AVX512CD__
  7478. #pragma GCC push_options
  7479. #pragma GCC target("avx512vl,avx512cd")
  7480. #define __DISABLE_AVX512VLCD__
  7481. #endif
  7482. extern __inline __m128i
  7483. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7484. _mm_broadcastmb_epi64 (__mmask8 __A)
  7485. {
  7486. return (__m128i) __builtin_ia32_broadcastmb128 (__A);
  7487. }
  7488. extern __inline __m256i
  7489. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7490. _mm256_broadcastmb_epi64 (__mmask8 __A)
  7491. {
  7492. return (__m256i) __builtin_ia32_broadcastmb256 (__A);
  7493. }
  7494. extern __inline __m128i
  7495. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7496. _mm_broadcastmw_epi32 (__mmask16 __A)
  7497. {
  7498. return (__m128i) __builtin_ia32_broadcastmw128 (__A);
  7499. }
  7500. extern __inline __m256i
  7501. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7502. _mm256_broadcastmw_epi32 (__mmask16 __A)
  7503. {
  7504. return (__m256i) __builtin_ia32_broadcastmw256 (__A);
  7505. }
  7506. extern __inline __m256i
  7507. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7508. _mm256_lzcnt_epi32 (__m256i __A)
  7509. {
  7510. return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
  7511. (__v8si)
  7512. _mm256_setzero_si256 (),
  7513. (__mmask8) -1);
  7514. }
  7515. extern __inline __m256i
  7516. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7517. _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  7518. {
  7519. return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
  7520. (__v8si) __W,
  7521. (__mmask8) __U);
  7522. }
  7523. extern __inline __m256i
  7524. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7525. _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
  7526. {
  7527. return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
  7528. (__v8si)
  7529. _mm256_setzero_si256 (),
  7530. (__mmask8) __U);
  7531. }
  7532. extern __inline __m256i
  7533. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7534. _mm256_lzcnt_epi64 (__m256i __A)
  7535. {
  7536. return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
  7537. (__v4di)
  7538. _mm256_setzero_si256 (),
  7539. (__mmask8) -1);
  7540. }
  7541. extern __inline __m256i
  7542. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7543. _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  7544. {
  7545. return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
  7546. (__v4di) __W,
  7547. (__mmask8) __U);
  7548. }
  7549. extern __inline __m256i
  7550. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7551. _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
  7552. {
  7553. return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
  7554. (__v4di)
  7555. _mm256_setzero_si256 (),
  7556. (__mmask8) __U);
  7557. }
  7558. extern __inline __m256i
  7559. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7560. _mm256_conflict_epi64 (__m256i __A)
  7561. {
  7562. return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
  7563. (__v4di)
  7564. _mm256_setzero_si256 (),
  7565. (__mmask8) -1);
  7566. }
  7567. extern __inline __m256i
  7568. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7569. _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  7570. {
  7571. return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
  7572. (__v4di) __W,
  7573. (__mmask8)
  7574. __U);
  7575. }
  7576. extern __inline __m256i
  7577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7578. _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
  7579. {
  7580. return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
  7581. (__v4di)
  7582. _mm256_setzero_si256 (),
  7583. (__mmask8)
  7584. __U);
  7585. }
  7586. extern __inline __m256i
  7587. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7588. _mm256_conflict_epi32 (__m256i __A)
  7589. {
  7590. return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
  7591. (__v8si)
  7592. _mm256_setzero_si256 (),
  7593. (__mmask8) -1);
  7594. }
  7595. extern __inline __m256i
  7596. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7597. _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  7598. {
  7599. return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
  7600. (__v8si) __W,
  7601. (__mmask8)
  7602. __U);
  7603. }
  7604. extern __inline __m256i
  7605. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7606. _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
  7607. {
  7608. return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
  7609. (__v8si)
  7610. _mm256_setzero_si256 (),
  7611. (__mmask8)
  7612. __U);
  7613. }
  7614. extern __inline __m128i
  7615. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7616. _mm_lzcnt_epi32 (__m128i __A)
  7617. {
  7618. return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
  7619. (__v4si)
  7620. _mm_setzero_si128 (),
  7621. (__mmask8) -1);
  7622. }
  7623. extern __inline __m128i
  7624. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7625. _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  7626. {
  7627. return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
  7628. (__v4si) __W,
  7629. (__mmask8) __U);
  7630. }
  7631. extern __inline __m128i
  7632. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7633. _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
  7634. {
  7635. return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
  7636. (__v4si)
  7637. _mm_setzero_si128 (),
  7638. (__mmask8) __U);
  7639. }
  7640. extern __inline __m128i
  7641. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7642. _mm_lzcnt_epi64 (__m128i __A)
  7643. {
  7644. return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
  7645. (__v2di)
  7646. _mm_setzero_si128 (),
  7647. (__mmask8) -1);
  7648. }
  7649. extern __inline __m128i
  7650. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7651. _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  7652. {
  7653. return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
  7654. (__v2di) __W,
  7655. (__mmask8) __U);
  7656. }
  7657. extern __inline __m128i
  7658. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7659. _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
  7660. {
  7661. return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
  7662. (__v2di)
  7663. _mm_setzero_si128 (),
  7664. (__mmask8) __U);
  7665. }
  7666. extern __inline __m128i
  7667. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7668. _mm_conflict_epi64 (__m128i __A)
  7669. {
  7670. return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
  7671. (__v2di)
  7672. _mm_setzero_si128 (),
  7673. (__mmask8) -1);
  7674. }
  7675. extern __inline __m128i
  7676. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7677. _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  7678. {
  7679. return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
  7680. (__v2di) __W,
  7681. (__mmask8)
  7682. __U);
  7683. }
  7684. extern __inline __m128i
  7685. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7686. _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
  7687. {
  7688. return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
  7689. (__v2di)
  7690. _mm_setzero_si128 (),
  7691. (__mmask8)
  7692. __U);
  7693. }
  7694. extern __inline __m128i
  7695. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7696. _mm_conflict_epi32 (__m128i __A)
  7697. {
  7698. return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
  7699. (__v4si)
  7700. _mm_setzero_si128 (),
  7701. (__mmask8) -1);
  7702. }
  7703. extern __inline __m128i
  7704. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7705. _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  7706. {
  7707. return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
  7708. (__v4si) __W,
  7709. (__mmask8)
  7710. __U);
  7711. }
  7712. extern __inline __m128i
  7713. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7714. _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
  7715. {
  7716. return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
  7717. (__v4si)
  7718. _mm_setzero_si128 (),
  7719. (__mmask8)
  7720. __U);
  7721. }
  7722. #ifdef __DISABLE_AVX512VLCD__
  7723. #pragma GCC pop_options
  7724. #endif
  7725. extern __inline __m256d
  7726. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7727. _mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
  7728. __m256d __B)
  7729. {
  7730. return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
  7731. (__v4df) __B,
  7732. (__v4df) __W,
  7733. (__mmask8) __U);
  7734. }
  7735. extern __inline __m256d
  7736. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7737. _mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
  7738. {
  7739. return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
  7740. (__v4df) __B,
  7741. (__v4df)
  7742. _mm256_setzero_pd (),
  7743. (__mmask8) __U);
  7744. }
  7745. extern __inline __m128d
  7746. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7747. _mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
  7748. __m128d __B)
  7749. {
  7750. return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
  7751. (__v2df) __B,
  7752. (__v2df) __W,
  7753. (__mmask8) __U);
  7754. }
  7755. extern __inline __m128d
  7756. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7757. _mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
  7758. {
  7759. return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
  7760. (__v2df) __B,
  7761. (__v2df)
  7762. _mm_setzero_pd (),
  7763. (__mmask8) __U);
  7764. }
  7765. extern __inline __m256
  7766. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7767. _mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
  7768. __m256 __B)
  7769. {
  7770. return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
  7771. (__v8sf) __B,
  7772. (__v8sf) __W,
  7773. (__mmask8) __U);
  7774. }
  7775. extern __inline __m256d
  7776. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7777. _mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
  7778. __m256d __B)
  7779. {
  7780. return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
  7781. (__v4df) __B,
  7782. (__v4df) __W,
  7783. (__mmask8) __U);
  7784. }
  7785. extern __inline __m256d
  7786. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7787. _mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
  7788. {
  7789. return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
  7790. (__v4df) __B,
  7791. (__v4df)
  7792. _mm256_setzero_pd (),
  7793. (__mmask8) __U);
  7794. }
  7795. extern __inline __m128d
  7796. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7797. _mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
  7798. __m128d __B)
  7799. {
  7800. return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
  7801. (__v2df) __B,
  7802. (__v2df) __W,
  7803. (__mmask8) __U);
  7804. }
  7805. extern __inline __m128d
  7806. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7807. _mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
  7808. {
  7809. return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
  7810. (__v2df) __B,
  7811. (__v2df)
  7812. _mm_setzero_pd (),
  7813. (__mmask8) __U);
  7814. }
  7815. extern __inline __m256
  7816. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7817. _mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
  7818. __m256 __B)
  7819. {
  7820. return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
  7821. (__v8sf) __B,
  7822. (__v8sf) __W,
  7823. (__mmask8) __U);
  7824. }
  7825. extern __inline __m256
  7826. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7827. _mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
  7828. {
  7829. return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
  7830. (__v8sf) __B,
  7831. (__v8sf)
  7832. _mm256_setzero_ps (),
  7833. (__mmask8) __U);
  7834. }
  7835. extern __inline __m128
  7836. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7837. _mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  7838. {
  7839. return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
  7840. (__v4sf) __B,
  7841. (__v4sf) __W,
  7842. (__mmask8) __U);
  7843. }
  7844. extern __inline __m128
  7845. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7846. _mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
  7847. {
  7848. return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
  7849. (__v4sf) __B,
  7850. (__v4sf)
  7851. _mm_setzero_ps (),
  7852. (__mmask8) __U);
  7853. }
  7854. extern __inline __m128
  7855. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7856. _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
  7857. {
  7858. return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
  7859. (__v4sf) __W,
  7860. (__mmask8) __U);
  7861. }
  7862. extern __inline __m128
  7863. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7864. _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
  7865. {
  7866. return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
  7867. (__v4sf)
  7868. _mm_setzero_ps (),
  7869. (__mmask8) __U);
  7870. }
  7871. extern __inline __m256
  7872. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7873. _mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
  7874. {
  7875. return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
  7876. (__v8sf) __B,
  7877. (__v8sf)
  7878. _mm256_setzero_ps (),
  7879. (__mmask8) __U);
  7880. }
  7881. extern __inline __m256
  7882. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7883. _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
  7884. {
  7885. return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
  7886. (__v8sf) __W,
  7887. (__mmask8) __U);
  7888. }
  7889. extern __inline __m256
  7890. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7891. _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
  7892. {
  7893. return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
  7894. (__v8sf)
  7895. _mm256_setzero_ps (),
  7896. (__mmask8) __U);
  7897. }
  7898. extern __inline __m128
  7899. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7900. _mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  7901. {
  7902. return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
  7903. (__v4sf) __B,
  7904. (__v4sf) __W,
  7905. (__mmask8) __U);
  7906. }
  7907. extern __inline __m128
  7908. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7909. _mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
  7910. {
  7911. return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
  7912. (__v4sf) __B,
  7913. (__v4sf)
  7914. _mm_setzero_ps (),
  7915. (__mmask8) __U);
  7916. }
  7917. extern __inline __m256i
  7918. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7919. _mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  7920. __m128i __B)
  7921. {
  7922. return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
  7923. (__v4si) __B,
  7924. (__v8si) __W,
  7925. (__mmask8) __U);
  7926. }
  7927. extern __inline __m256i
  7928. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7929. _mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
  7930. {
  7931. return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
  7932. (__v4si) __B,
  7933. (__v8si)
  7934. _mm256_setzero_si256 (),
  7935. (__mmask8) __U);
  7936. }
  7937. extern __inline __m128i
  7938. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7939. _mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  7940. __m128i __B)
  7941. {
  7942. return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
  7943. (__v4si) __B,
  7944. (__v4si) __W,
  7945. (__mmask8) __U);
  7946. }
  7947. extern __inline __m128i
  7948. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7949. _mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  7950. {
  7951. return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
  7952. (__v4si) __B,
  7953. (__v4si)
  7954. _mm_setzero_si128 (),
  7955. (__mmask8) __U);
  7956. }
  7957. extern __inline __m256i
  7958. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7959. _mm256_sra_epi64 (__m256i __A, __m128i __B)
  7960. {
  7961. return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
  7962. (__v2di) __B,
  7963. (__v4di)
  7964. _mm256_setzero_si256 (),
  7965. (__mmask8) -1);
  7966. }
  7967. extern __inline __m256i
  7968. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7969. _mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  7970. __m128i __B)
  7971. {
  7972. return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
  7973. (__v2di) __B,
  7974. (__v4di) __W,
  7975. (__mmask8) __U);
  7976. }
  7977. extern __inline __m256i
  7978. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7979. _mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
  7980. {
  7981. return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
  7982. (__v2di) __B,
  7983. (__v4di)
  7984. _mm256_setzero_si256 (),
  7985. (__mmask8) __U);
  7986. }
  7987. extern __inline __m128i
  7988. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7989. _mm_sra_epi64 (__m128i __A, __m128i __B)
  7990. {
  7991. return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
  7992. (__v2di) __B,
  7993. (__v2di)
  7994. _mm_setzero_si128 (),
  7995. (__mmask8) -1);
  7996. }
  7997. extern __inline __m128i
  7998. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7999. _mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  8000. __m128i __B)
  8001. {
  8002. return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
  8003. (__v2di) __B,
  8004. (__v2di) __W,
  8005. (__mmask8) __U);
  8006. }
  8007. extern __inline __m128i
  8008. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8009. _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  8010. {
  8011. return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
  8012. (__v2di) __B,
  8013. (__v2di)
  8014. _mm_setzero_si128 (),
  8015. (__mmask8) __U);
  8016. }
  8017. extern __inline __m128i
  8018. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8019. _mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  8020. __m128i __B)
  8021. {
  8022. return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
  8023. (__v4si) __B,
  8024. (__v4si) __W,
  8025. (__mmask8) __U);
  8026. }
  8027. extern __inline __m128i
  8028. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8029. _mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  8030. {
  8031. return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
  8032. (__v4si) __B,
  8033. (__v4si)
  8034. _mm_setzero_si128 (),
  8035. (__mmask8) __U);
  8036. }
  8037. extern __inline __m128i
  8038. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8039. _mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  8040. __m128i __B)
  8041. {
  8042. return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
  8043. (__v2di) __B,
  8044. (__v2di) __W,
  8045. (__mmask8) __U);
  8046. }
  8047. extern __inline __m128i
  8048. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8049. _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  8050. {
  8051. return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
  8052. (__v2di) __B,
  8053. (__v2di)
  8054. _mm_setzero_si128 (),
  8055. (__mmask8) __U);
  8056. }
  8057. extern __inline __m256i
  8058. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8059. _mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  8060. __m128i __B)
  8061. {
  8062. return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
  8063. (__v4si) __B,
  8064. (__v8si) __W,
  8065. (__mmask8) __U);
  8066. }
  8067. extern __inline __m256i
  8068. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8069. _mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
  8070. {
  8071. return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
  8072. (__v4si) __B,
  8073. (__v8si)
  8074. _mm256_setzero_si256 (),
  8075. (__mmask8) __U);
  8076. }
  8077. extern __inline __m256i
  8078. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8079. _mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  8080. __m128i __B)
  8081. {
  8082. return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
  8083. (__v2di) __B,
  8084. (__v4di) __W,
  8085. (__mmask8) __U);
  8086. }
  8087. extern __inline __m256i
  8088. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8089. _mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
  8090. {
  8091. return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
  8092. (__v2di) __B,
  8093. (__v4di)
  8094. _mm256_setzero_si256 (),
  8095. (__mmask8) __U);
  8096. }
  8097. extern __inline __m256
  8098. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8099. _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
  8100. __m256 __Y)
  8101. {
  8102. return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
  8103. (__v8si) __X,
  8104. (__v8sf) __W,
  8105. (__mmask8) __U);
  8106. }
  8107. extern __inline __m256
  8108. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8109. _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
  8110. {
  8111. return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
  8112. (__v8si) __X,
  8113. (__v8sf)
  8114. _mm256_setzero_ps (),
  8115. (__mmask8) __U);
  8116. }
  8117. extern __inline __m256d
  8118. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8119. _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
  8120. {
  8121. return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
  8122. (__v4di) __X,
  8123. (__v4df)
  8124. _mm256_setzero_pd (),
  8125. (__mmask8) -1);
  8126. }
  8127. extern __inline __m256d
  8128. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8129. _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
  8130. __m256d __Y)
  8131. {
  8132. return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
  8133. (__v4di) __X,
  8134. (__v4df) __W,
  8135. (__mmask8) __U);
  8136. }
  8137. extern __inline __m256d
  8138. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8139. _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
  8140. {
  8141. return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
  8142. (__v4di) __X,
  8143. (__v4df)
  8144. _mm256_setzero_pd (),
  8145. (__mmask8) __U);
  8146. }
  8147. extern __inline __m256d
  8148. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8149. _mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
  8150. __m256i __C)
  8151. {
  8152. return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
  8153. (__v4di) __C,
  8154. (__v4df) __W,
  8155. (__mmask8)
  8156. __U);
  8157. }
  8158. extern __inline __m256d
  8159. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8160. _mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
  8161. {
  8162. return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
  8163. (__v4di) __C,
  8164. (__v4df)
  8165. _mm256_setzero_pd (),
  8166. (__mmask8)
  8167. __U);
  8168. }
  8169. extern __inline __m256
  8170. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8171. _mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
  8172. __m256i __C)
  8173. {
  8174. return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
  8175. (__v8si) __C,
  8176. (__v8sf) __W,
  8177. (__mmask8) __U);
  8178. }
  8179. extern __inline __m256
  8180. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8181. _mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
  8182. {
  8183. return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
  8184. (__v8si) __C,
  8185. (__v8sf)
  8186. _mm256_setzero_ps (),
  8187. (__mmask8) __U);
  8188. }
  8189. extern __inline __m128d
  8190. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8191. _mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
  8192. __m128i __C)
  8193. {
  8194. return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
  8195. (__v2di) __C,
  8196. (__v2df) __W,
  8197. (__mmask8) __U);
  8198. }
  8199. extern __inline __m128d
  8200. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8201. _mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
  8202. {
  8203. return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
  8204. (__v2di) __C,
  8205. (__v2df)
  8206. _mm_setzero_pd (),
  8207. (__mmask8) __U);
  8208. }
  8209. extern __inline __m128
  8210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8211. _mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
  8212. __m128i __C)
  8213. {
  8214. return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
  8215. (__v4si) __C,
  8216. (__v4sf) __W,
  8217. (__mmask8) __U);
  8218. }
  8219. extern __inline __m128
  8220. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8221. _mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
  8222. {
  8223. return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
  8224. (__v4si) __C,
  8225. (__v4sf)
  8226. _mm_setzero_ps (),
  8227. (__mmask8) __U);
  8228. }
  8229. extern __inline __m256i
  8230. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8231. _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
  8232. {
  8233. return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
  8234. (__v8si) __B,
  8235. (__v8si)
  8236. _mm256_setzero_si256 (),
  8237. __M);
  8238. }
  8239. extern __inline __m256i
  8240. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8241. _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
  8242. {
  8243. return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
  8244. (__v4di) __X,
  8245. (__v4di)
  8246. _mm256_setzero_si256 (),
  8247. __M);
  8248. }
  8249. extern __inline __m256i
  8250. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8251. _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
  8252. __m256i __B)
  8253. {
  8254. return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
  8255. (__v8si) __B,
  8256. (__v8si) __W, __M);
  8257. }
  8258. extern __inline __m128i
  8259. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8260. _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
  8261. {
  8262. return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
  8263. (__v4si) __B,
  8264. (__v4si)
  8265. _mm_setzero_si128 (),
  8266. __M);
  8267. }
  8268. extern __inline __m128i
  8269. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8270. _mm_mask_mullo_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
  8271. __m128i __B)
  8272. {
  8273. return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
  8274. (__v4si) __B,
  8275. (__v4si) __W, __M);
  8276. }
  8277. extern __inline __m256i
  8278. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8279. _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
  8280. __m256i __Y)
  8281. {
  8282. return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
  8283. (__v8si) __Y,
  8284. (__v4di) __W, __M);
  8285. }
  8286. extern __inline __m256i
  8287. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8288. _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
  8289. {
  8290. return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
  8291. (__v8si) __Y,
  8292. (__v4di)
  8293. _mm256_setzero_si256 (),
  8294. __M);
  8295. }
  8296. extern __inline __m128i
  8297. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8298. _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
  8299. __m128i __Y)
  8300. {
  8301. return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
  8302. (__v4si) __Y,
  8303. (__v2di) __W, __M);
  8304. }
  8305. extern __inline __m128i
  8306. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8307. _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
  8308. {
  8309. return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
  8310. (__v4si) __Y,
  8311. (__v2di)
  8312. _mm_setzero_si128 (),
  8313. __M);
  8314. }
  8315. extern __inline __m256i
  8316. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8317. _mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
  8318. {
  8319. return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
  8320. (__v4di) __X,
  8321. (__v4di)
  8322. _mm256_setzero_si256 (),
  8323. (__mmask8) -1);
  8324. }
  8325. extern __inline __m256i
  8326. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8327. _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
  8328. __m256i __Y)
  8329. {
  8330. return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
  8331. (__v4di) __X,
  8332. (__v4di) __W,
  8333. __M);
  8334. }
  8335. extern __inline __m256i
  8336. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8337. _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
  8338. __m256i __Y)
  8339. {
  8340. return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
  8341. (__v8si) __Y,
  8342. (__v4di) __W, __M);
  8343. }
  8344. extern __inline __m256i
  8345. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8346. _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
  8347. {
  8348. return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
  8349. (__v8si) __X,
  8350. (__v8si)
  8351. _mm256_setzero_si256 (),
  8352. __M);
  8353. }
  8354. extern __inline __m256i
  8355. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8356. _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
  8357. {
  8358. return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
  8359. (__v8si) __Y,
  8360. (__v4di)
  8361. _mm256_setzero_si256 (),
  8362. __M);
  8363. }
  8364. extern __inline __m128i
  8365. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8366. _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
  8367. __m128i __Y)
  8368. {
  8369. return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
  8370. (__v4si) __Y,
  8371. (__v2di) __W, __M);
  8372. }
  8373. extern __inline __m128i
  8374. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8375. _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
  8376. {
  8377. return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
  8378. (__v4si) __Y,
  8379. (__v2di)
  8380. _mm_setzero_si128 (),
  8381. __M);
  8382. }
  8383. extern __inline __m256i
  8384. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8385. _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
  8386. {
  8387. return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
  8388. (__v8si) __X,
  8389. (__v8si)
  8390. _mm256_setzero_si256 (),
  8391. (__mmask8) -1);
  8392. }
  8393. extern __inline __m256i
  8394. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8395. _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
  8396. __m256i __Y)
  8397. {
  8398. return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
  8399. (__v8si) __X,
  8400. (__v8si) __W,
  8401. __M);
  8402. }
  8403. extern __inline __mmask8
  8404. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8405. _mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8406. {
  8407. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8408. (__v8si) __Y, 4,
  8409. (__mmask8) __M);
  8410. }
  8411. extern __inline __mmask8
  8412. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8413. _mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
  8414. {
  8415. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8416. (__v8si) __Y, 4,
  8417. (__mmask8) -1);
  8418. }
  8419. extern __inline __mmask8
  8420. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8421. _mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8422. {
  8423. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8424. (__v8si) __Y, 1,
  8425. (__mmask8) __M);
  8426. }
  8427. extern __inline __mmask8
  8428. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8429. _mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
  8430. {
  8431. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8432. (__v8si) __Y, 1,
  8433. (__mmask8) -1);
  8434. }
  8435. extern __inline __mmask8
  8436. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8437. _mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8438. {
  8439. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8440. (__v8si) __Y, 5,
  8441. (__mmask8) __M);
  8442. }
  8443. extern __inline __mmask8
  8444. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8445. _mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
  8446. {
  8447. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8448. (__v8si) __Y, 5,
  8449. (__mmask8) -1);
  8450. }
  8451. extern __inline __mmask8
  8452. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8453. _mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8454. {
  8455. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8456. (__v8si) __Y, 2,
  8457. (__mmask8) __M);
  8458. }
  8459. extern __inline __mmask8
  8460. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8461. _mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
  8462. {
  8463. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8464. (__v8si) __Y, 2,
  8465. (__mmask8) -1);
  8466. }
  8467. extern __inline __mmask8
  8468. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8469. _mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8470. {
  8471. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8472. (__v4di) __Y, 4,
  8473. (__mmask8) __M);
  8474. }
  8475. extern __inline __mmask8
  8476. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8477. _mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
  8478. {
  8479. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8480. (__v4di) __Y, 4,
  8481. (__mmask8) -1);
  8482. }
  8483. extern __inline __mmask8
  8484. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8485. _mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8486. {
  8487. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8488. (__v4di) __Y, 1,
  8489. (__mmask8) __M);
  8490. }
  8491. extern __inline __mmask8
  8492. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8493. _mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
  8494. {
  8495. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8496. (__v4di) __Y, 1,
  8497. (__mmask8) -1);
  8498. }
  8499. extern __inline __mmask8
  8500. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8501. _mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8502. {
  8503. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8504. (__v4di) __Y, 5,
  8505. (__mmask8) __M);
  8506. }
  8507. extern __inline __mmask8
  8508. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8509. _mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
  8510. {
  8511. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8512. (__v4di) __Y, 5,
  8513. (__mmask8) -1);
  8514. }
  8515. extern __inline __mmask8
  8516. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8517. _mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8518. {
  8519. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8520. (__v4di) __Y, 2,
  8521. (__mmask8) __M);
  8522. }
  8523. extern __inline __mmask8
  8524. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8525. _mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
  8526. {
  8527. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8528. (__v4di) __Y, 2,
  8529. (__mmask8) -1);
  8530. }
  8531. extern __inline __mmask8
  8532. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8533. _mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8534. {
  8535. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8536. (__v8si) __Y, 4,
  8537. (__mmask8) __M);
  8538. }
  8539. extern __inline __mmask8
  8540. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8541. _mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
  8542. {
  8543. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8544. (__v8si) __Y, 4,
  8545. (__mmask8) -1);
  8546. }
  8547. extern __inline __mmask8
  8548. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8549. _mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8550. {
  8551. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8552. (__v8si) __Y, 1,
  8553. (__mmask8) __M);
  8554. }
  8555. extern __inline __mmask8
  8556. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8557. _mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
  8558. {
  8559. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8560. (__v8si) __Y, 1,
  8561. (__mmask8) -1);
  8562. }
  8563. extern __inline __mmask8
  8564. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8565. _mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8566. {
  8567. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8568. (__v8si) __Y, 5,
  8569. (__mmask8) __M);
  8570. }
  8571. extern __inline __mmask8
  8572. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8573. _mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
  8574. {
  8575. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8576. (__v8si) __Y, 5,
  8577. (__mmask8) -1);
  8578. }
  8579. extern __inline __mmask8
  8580. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8581. _mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8582. {
  8583. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8584. (__v8si) __Y, 2,
  8585. (__mmask8) __M);
  8586. }
  8587. extern __inline __mmask8
  8588. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8589. _mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
  8590. {
  8591. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8592. (__v8si) __Y, 2,
  8593. (__mmask8) -1);
  8594. }
  8595. extern __inline __mmask8
  8596. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8597. _mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8598. {
  8599. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8600. (__v4di) __Y, 4,
  8601. (__mmask8) __M);
  8602. }
  8603. extern __inline __mmask8
  8604. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8605. _mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
  8606. {
  8607. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8608. (__v4di) __Y, 4,
  8609. (__mmask8) -1);
  8610. }
  8611. extern __inline __mmask8
  8612. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8613. _mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8614. {
  8615. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8616. (__v4di) __Y, 1,
  8617. (__mmask8) __M);
  8618. }
  8619. extern __inline __mmask8
  8620. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8621. _mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
  8622. {
  8623. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8624. (__v4di) __Y, 1,
  8625. (__mmask8) -1);
  8626. }
  8627. extern __inline __mmask8
  8628. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8629. _mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8630. {
  8631. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8632. (__v4di) __Y, 5,
  8633. (__mmask8) __M);
  8634. }
  8635. extern __inline __mmask8
  8636. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8637. _mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
  8638. {
  8639. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8640. (__v4di) __Y, 5,
  8641. (__mmask8) -1);
  8642. }
  8643. extern __inline __mmask8
  8644. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8645. _mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8646. {
  8647. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8648. (__v4di) __Y, 2,
  8649. (__mmask8) __M);
  8650. }
  8651. extern __inline __mmask8
  8652. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8653. _mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
  8654. {
  8655. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8656. (__v4di) __Y, 2,
  8657. (__mmask8) -1);
  8658. }
  8659. extern __inline __mmask8
  8660. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8661. _mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8662. {
  8663. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8664. (__v4si) __Y, 4,
  8665. (__mmask8) __M);
  8666. }
  8667. extern __inline __mmask8
  8668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8669. _mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
  8670. {
  8671. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8672. (__v4si) __Y, 4,
  8673. (__mmask8) -1);
  8674. }
  8675. extern __inline __mmask8
  8676. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8677. _mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8678. {
  8679. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8680. (__v4si) __Y, 1,
  8681. (__mmask8) __M);
  8682. }
  8683. extern __inline __mmask8
  8684. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8685. _mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
  8686. {
  8687. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8688. (__v4si) __Y, 1,
  8689. (__mmask8) -1);
  8690. }
  8691. extern __inline __mmask8
  8692. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8693. _mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8694. {
  8695. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8696. (__v4si) __Y, 5,
  8697. (__mmask8) __M);
  8698. }
  8699. extern __inline __mmask8
  8700. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8701. _mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
  8702. {
  8703. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8704. (__v4si) __Y, 5,
  8705. (__mmask8) -1);
  8706. }
  8707. extern __inline __mmask8
  8708. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8709. _mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8710. {
  8711. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8712. (__v4si) __Y, 2,
  8713. (__mmask8) __M);
  8714. }
  8715. extern __inline __mmask8
  8716. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8717. _mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
  8718. {
  8719. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8720. (__v4si) __Y, 2,
  8721. (__mmask8) -1);
  8722. }
  8723. extern __inline __mmask8
  8724. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8725. _mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8726. {
  8727. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8728. (__v2di) __Y, 4,
  8729. (__mmask8) __M);
  8730. }
  8731. extern __inline __mmask8
  8732. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8733. _mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
  8734. {
  8735. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8736. (__v2di) __Y, 4,
  8737. (__mmask8) -1);
  8738. }
  8739. extern __inline __mmask8
  8740. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8741. _mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8742. {
  8743. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8744. (__v2di) __Y, 1,
  8745. (__mmask8) __M);
  8746. }
  8747. extern __inline __mmask8
  8748. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8749. _mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
  8750. {
  8751. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8752. (__v2di) __Y, 1,
  8753. (__mmask8) -1);
  8754. }
  8755. extern __inline __mmask8
  8756. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8757. _mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8758. {
  8759. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8760. (__v2di) __Y, 5,
  8761. (__mmask8) __M);
  8762. }
  8763. extern __inline __mmask8
  8764. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8765. _mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
  8766. {
  8767. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8768. (__v2di) __Y, 5,
  8769. (__mmask8) -1);
  8770. }
  8771. extern __inline __mmask8
  8772. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8773. _mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8774. {
  8775. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8776. (__v2di) __Y, 2,
  8777. (__mmask8) __M);
  8778. }
  8779. extern __inline __mmask8
  8780. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8781. _mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
  8782. {
  8783. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8784. (__v2di) __Y, 2,
  8785. (__mmask8) -1);
  8786. }
  8787. extern __inline __mmask8
  8788. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8789. _mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8790. {
  8791. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8792. (__v4si) __Y, 4,
  8793. (__mmask8) __M);
  8794. }
  8795. extern __inline __mmask8
  8796. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8797. _mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
  8798. {
  8799. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8800. (__v4si) __Y, 4,
  8801. (__mmask8) -1);
  8802. }
  8803. extern __inline __mmask8
  8804. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8805. _mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8806. {
  8807. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8808. (__v4si) __Y, 1,
  8809. (__mmask8) __M);
  8810. }
  8811. extern __inline __mmask8
  8812. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8813. _mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
  8814. {
  8815. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8816. (__v4si) __Y, 1,
  8817. (__mmask8) -1);
  8818. }
  8819. extern __inline __mmask8
  8820. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8821. _mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8822. {
  8823. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8824. (__v4si) __Y, 5,
  8825. (__mmask8) __M);
  8826. }
  8827. extern __inline __mmask8
  8828. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8829. _mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
  8830. {
  8831. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8832. (__v4si) __Y, 5,
  8833. (__mmask8) -1);
  8834. }
  8835. extern __inline __mmask8
  8836. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8837. _mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8838. {
  8839. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8840. (__v4si) __Y, 2,
  8841. (__mmask8) __M);
  8842. }
  8843. extern __inline __mmask8
  8844. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8845. _mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
  8846. {
  8847. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8848. (__v4si) __Y, 2,
  8849. (__mmask8) -1);
  8850. }
  8851. extern __inline __mmask8
  8852. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8853. _mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8854. {
  8855. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8856. (__v2di) __Y, 4,
  8857. (__mmask8) __M);
  8858. }
  8859. extern __inline __mmask8
  8860. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8861. _mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
  8862. {
  8863. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8864. (__v2di) __Y, 4,
  8865. (__mmask8) -1);
  8866. }
  8867. extern __inline __mmask8
  8868. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8869. _mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8870. {
  8871. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8872. (__v2di) __Y, 1,
  8873. (__mmask8) __M);
  8874. }
  8875. extern __inline __mmask8
  8876. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8877. _mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
  8878. {
  8879. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8880. (__v2di) __Y, 1,
  8881. (__mmask8) -1);
  8882. }
  8883. extern __inline __mmask8
  8884. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8885. _mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8886. {
  8887. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8888. (__v2di) __Y, 5,
  8889. (__mmask8) __M);
  8890. }
  8891. extern __inline __mmask8
  8892. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8893. _mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
  8894. {
  8895. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8896. (__v2di) __Y, 5,
  8897. (__mmask8) -1);
  8898. }
  8899. extern __inline __mmask8
  8900. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8901. _mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8902. {
  8903. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8904. (__v2di) __Y, 2,
  8905. (__mmask8) __M);
  8906. }
  8907. extern __inline __mmask8
  8908. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8909. _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
  8910. {
  8911. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8912. (__v2di) __Y, 2,
  8913. (__mmask8) -1);
  8914. }
  8915. #ifdef __OPTIMIZE__
  8916. extern __inline __m256i
  8917. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8918. _mm256_permutex_epi64 (__m256i __X, const int __I)
  8919. {
  8920. return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
  8921. __I,
  8922. (__v4di)
  8923. _mm256_setzero_si256(),
  8924. (__mmask8) -1);
  8925. }
  8926. extern __inline __m256i
  8927. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8928. _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
  8929. __m256i __X, const int __I)
  8930. {
  8931. return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
  8932. __I,
  8933. (__v4di) __W,
  8934. (__mmask8) __M);
  8935. }
  8936. extern __inline __m256i
  8937. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8938. _mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
  8939. {
  8940. return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
  8941. __I,
  8942. (__v4di)
  8943. _mm256_setzero_si256 (),
  8944. (__mmask8) __M);
  8945. }
  8946. extern __inline __m256d
  8947. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8948. _mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
  8949. __m256d __B, const int __imm)
  8950. {
  8951. return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
  8952. (__v4df) __B, __imm,
  8953. (__v4df) __W,
  8954. (__mmask8) __U);
  8955. }
  8956. extern __inline __m256d
  8957. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8958. _mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
  8959. const int __imm)
  8960. {
  8961. return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
  8962. (__v4df) __B, __imm,
  8963. (__v4df)
  8964. _mm256_setzero_pd (),
  8965. (__mmask8) __U);
  8966. }
  8967. extern __inline __m128d
  8968. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8969. _mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
  8970. __m128d __B, const int __imm)
  8971. {
  8972. return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
  8973. (__v2df) __B, __imm,
  8974. (__v2df) __W,
  8975. (__mmask8) __U);
  8976. }
  8977. extern __inline __m128d
  8978. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8979. _mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
  8980. const int __imm)
  8981. {
  8982. return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
  8983. (__v2df) __B, __imm,
  8984. (__v2df)
  8985. _mm_setzero_pd (),
  8986. (__mmask8) __U);
  8987. }
  8988. extern __inline __m256
  8989. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8990. _mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
  8991. __m256 __B, const int __imm)
  8992. {
  8993. return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
  8994. (__v8sf) __B, __imm,
  8995. (__v8sf) __W,
  8996. (__mmask8) __U);
  8997. }
  8998. extern __inline __m256
  8999. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9000. _mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
  9001. const int __imm)
  9002. {
  9003. return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
  9004. (__v8sf) __B, __imm,
  9005. (__v8sf)
  9006. _mm256_setzero_ps (),
  9007. (__mmask8) __U);
  9008. }
  9009. extern __inline __m128
  9010. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9011. _mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  9012. const int __imm)
  9013. {
  9014. return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
  9015. (__v4sf) __B, __imm,
  9016. (__v4sf) __W,
  9017. (__mmask8) __U);
  9018. }
  9019. extern __inline __m128
  9020. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9021. _mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
  9022. const int __imm)
  9023. {
  9024. return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
  9025. (__v4sf) __B, __imm,
  9026. (__v4sf)
  9027. _mm_setzero_ps (),
  9028. (__mmask8) __U);
  9029. }
  9030. extern __inline __m256i
  9031. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9032. _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
  9033. {
  9034. return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
  9035. (__v4si) __B,
  9036. __imm,
  9037. (__v8si)
  9038. _mm256_setzero_si256 (),
  9039. (__mmask8) -1);
  9040. }
  9041. extern __inline __m256i
  9042. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9043. _mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
  9044. __m128i __B, const int __imm)
  9045. {
  9046. return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
  9047. (__v4si) __B,
  9048. __imm,
  9049. (__v8si) __W,
  9050. (__mmask8)
  9051. __U);
  9052. }
  9053. extern __inline __m256i
  9054. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9055. _mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
  9056. const int __imm)
  9057. {
  9058. return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
  9059. (__v4si) __B,
  9060. __imm,
  9061. (__v8si)
  9062. _mm256_setzero_si256 (),
  9063. (__mmask8)
  9064. __U);
  9065. }
  9066. extern __inline __m256
  9067. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9068. _mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
  9069. {
  9070. return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
  9071. (__v4sf) __B,
  9072. __imm,
  9073. (__v8sf)
  9074. _mm256_setzero_ps (),
  9075. (__mmask8) -1);
  9076. }
  9077. extern __inline __m256
  9078. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9079. _mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
  9080. __m128 __B, const int __imm)
  9081. {
  9082. return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
  9083. (__v4sf) __B,
  9084. __imm,
  9085. (__v8sf) __W,
  9086. (__mmask8) __U);
  9087. }
  9088. extern __inline __m256
  9089. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9090. _mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
  9091. const int __imm)
  9092. {
  9093. return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
  9094. (__v4sf) __B,
  9095. __imm,
  9096. (__v8sf)
  9097. _mm256_setzero_ps (),
  9098. (__mmask8) __U);
  9099. }
  9100. extern __inline __m128i
  9101. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9102. _mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
  9103. {
  9104. return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
  9105. __imm,
  9106. (__v4si)
  9107. _mm_setzero_si128 (),
  9108. (__mmask8) -1);
  9109. }
  9110. extern __inline __m128i
  9111. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9112. _mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
  9113. const int __imm)
  9114. {
  9115. return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
  9116. __imm,
  9117. (__v4si) __W,
  9118. (__mmask8)
  9119. __U);
  9120. }
  9121. extern __inline __m128i
  9122. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9123. _mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
  9124. const int __imm)
  9125. {
  9126. return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
  9127. __imm,
  9128. (__v4si)
  9129. _mm_setzero_si128 (),
  9130. (__mmask8)
  9131. __U);
  9132. }
  9133. extern __inline __m128
  9134. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9135. _mm256_extractf32x4_ps (__m256 __A, const int __imm)
  9136. {
  9137. return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
  9138. __imm,
  9139. (__v4sf)
  9140. _mm_setzero_ps (),
  9141. (__mmask8) -1);
  9142. }
  9143. extern __inline __m128
  9144. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9145. _mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
  9146. const int __imm)
  9147. {
  9148. return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
  9149. __imm,
  9150. (__v4sf) __W,
  9151. (__mmask8)
  9152. __U);
  9153. }
  9154. extern __inline __m128
  9155. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9156. _mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
  9157. const int __imm)
  9158. {
  9159. return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
  9160. __imm,
  9161. (__v4sf)
  9162. _mm_setzero_ps (),
  9163. (__mmask8)
  9164. __U);
  9165. }
  9166. extern __inline __m256i
  9167. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9168. _mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
  9169. {
  9170. return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
  9171. (__v4di) __B,
  9172. __imm,
  9173. (__v4di)
  9174. _mm256_setzero_si256 (),
  9175. (__mmask8) -1);
  9176. }
  9177. extern __inline __m256i
  9178. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9179. _mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
  9180. __m256i __B, const int __imm)
  9181. {
  9182. return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
  9183. (__v4di) __B,
  9184. __imm,
  9185. (__v4di) __W,
  9186. (__mmask8) __U);
  9187. }
  9188. extern __inline __m256i
  9189. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9190. _mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
  9191. const int __imm)
  9192. {
  9193. return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
  9194. (__v4di) __B,
  9195. __imm,
  9196. (__v4di)
  9197. _mm256_setzero_si256 (),
  9198. (__mmask8) __U);
  9199. }
  9200. extern __inline __m256i
  9201. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9202. _mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
  9203. {
  9204. return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
  9205. (__v8si) __B,
  9206. __imm,
  9207. (__v8si)
  9208. _mm256_setzero_si256 (),
  9209. (__mmask8) -1);
  9210. }
  9211. extern __inline __m256i
  9212. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9213. _mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
  9214. __m256i __B, const int __imm)
  9215. {
  9216. return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
  9217. (__v8si) __B,
  9218. __imm,
  9219. (__v8si) __W,
  9220. (__mmask8) __U);
  9221. }
  9222. extern __inline __m256i
  9223. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9224. _mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
  9225. const int __imm)
  9226. {
  9227. return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
  9228. (__v8si) __B,
  9229. __imm,
  9230. (__v8si)
  9231. _mm256_setzero_si256 (),
  9232. (__mmask8) __U);
  9233. }
  9234. extern __inline __m256d
  9235. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9236. _mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
  9237. {
  9238. return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
  9239. (__v4df) __B,
  9240. __imm,
  9241. (__v4df)
  9242. _mm256_setzero_pd (),
  9243. (__mmask8) -1);
  9244. }
  9245. extern __inline __m256d
  9246. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9247. _mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
  9248. __m256d __B, const int __imm)
  9249. {
  9250. return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
  9251. (__v4df) __B,
  9252. __imm,
  9253. (__v4df) __W,
  9254. (__mmask8) __U);
  9255. }
  9256. extern __inline __m256d
  9257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9258. _mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
  9259. const int __imm)
  9260. {
  9261. return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
  9262. (__v4df) __B,
  9263. __imm,
  9264. (__v4df)
  9265. _mm256_setzero_pd (),
  9266. (__mmask8) __U);
  9267. }
  9268. extern __inline __m256
  9269. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9270. _mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
  9271. {
  9272. return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
  9273. (__v8sf) __B,
  9274. __imm,
  9275. (__v8sf)
  9276. _mm256_setzero_ps (),
  9277. (__mmask8) -1);
  9278. }
  9279. extern __inline __m256
  9280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9281. _mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
  9282. __m256 __B, const int __imm)
  9283. {
  9284. return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
  9285. (__v8sf) __B,
  9286. __imm,
  9287. (__v8sf) __W,
  9288. (__mmask8) __U);
  9289. }
  9290. extern __inline __m256
  9291. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9292. _mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
  9293. const int __imm)
  9294. {
  9295. return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
  9296. (__v8sf) __B,
  9297. __imm,
  9298. (__v8sf)
  9299. _mm256_setzero_ps (),
  9300. (__mmask8) __U);
  9301. }
  9302. extern __inline __m256d
  9303. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9304. _mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
  9305. const int __imm)
  9306. {
  9307. return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
  9308. (__v4df) __B,
  9309. (__v4di) __C,
  9310. __imm,
  9311. (__mmask8) -1);
  9312. }
  9313. extern __inline __m256d
  9314. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9315. _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
  9316. __m256i __C, const int __imm)
  9317. {
  9318. return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
  9319. (__v4df) __B,
  9320. (__v4di) __C,
  9321. __imm,
  9322. (__mmask8) __U);
  9323. }
  9324. extern __inline __m256d
  9325. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9326. _mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
  9327. __m256i __C, const int __imm)
  9328. {
  9329. return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
  9330. (__v4df) __B,
  9331. (__v4di) __C,
  9332. __imm,
  9333. (__mmask8) __U);
  9334. }
  9335. extern __inline __m256
  9336. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9337. _mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
  9338. const int __imm)
  9339. {
  9340. return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
  9341. (__v8sf) __B,
  9342. (__v8si) __C,
  9343. __imm,
  9344. (__mmask8) -1);
  9345. }
  9346. extern __inline __m256
  9347. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9348. _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
  9349. __m256i __C, const int __imm)
  9350. {
  9351. return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
  9352. (__v8sf) __B,
  9353. (__v8si) __C,
  9354. __imm,
  9355. (__mmask8) __U);
  9356. }
  9357. extern __inline __m256
  9358. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9359. _mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
  9360. __m256i __C, const int __imm)
  9361. {
  9362. return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
  9363. (__v8sf) __B,
  9364. (__v8si) __C,
  9365. __imm,
  9366. (__mmask8) __U);
  9367. }
  9368. extern __inline __m128d
  9369. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9370. _mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
  9371. const int __imm)
  9372. {
  9373. return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
  9374. (__v2df) __B,
  9375. (__v2di) __C,
  9376. __imm,
  9377. (__mmask8) -1);
  9378. }
  9379. extern __inline __m128d
  9380. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9381. _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
  9382. __m128i __C, const int __imm)
  9383. {
  9384. return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
  9385. (__v2df) __B,
  9386. (__v2di) __C,
  9387. __imm,
  9388. (__mmask8) __U);
  9389. }
  9390. extern __inline __m128d
  9391. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9392. _mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
  9393. __m128i __C, const int __imm)
  9394. {
  9395. return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
  9396. (__v2df) __B,
  9397. (__v2di) __C,
  9398. __imm,
  9399. (__mmask8) __U);
  9400. }
  9401. extern __inline __m128
  9402. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9403. _mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
  9404. {
  9405. return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
  9406. (__v4sf) __B,
  9407. (__v4si) __C,
  9408. __imm,
  9409. (__mmask8) -1);
  9410. }
  9411. extern __inline __m128
  9412. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9413. _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
  9414. __m128i __C, const int __imm)
  9415. {
  9416. return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
  9417. (__v4sf) __B,
  9418. (__v4si) __C,
  9419. __imm,
  9420. (__mmask8) __U);
  9421. }
  9422. extern __inline __m128
  9423. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9424. _mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
  9425. __m128i __C, const int __imm)
  9426. {
  9427. return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
  9428. (__v4sf) __B,
  9429. (__v4si) __C,
  9430. __imm,
  9431. (__mmask8) __U);
  9432. }
  9433. extern __inline __m256i
  9434. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9435. _mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  9436. const int __imm)
  9437. {
  9438. return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
  9439. (__v8si) __W,
  9440. (__mmask8) __U);
  9441. }
  9442. extern __inline __m256i
  9443. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9444. _mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
  9445. {
  9446. return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
  9447. (__v8si)
  9448. _mm256_setzero_si256 (),
  9449. (__mmask8) __U);
  9450. }
  9451. extern __inline __m128i
  9452. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9453. _mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  9454. const int __imm)
  9455. {
  9456. return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
  9457. (__v4si) __W,
  9458. (__mmask8) __U);
  9459. }
  9460. extern __inline __m128i
  9461. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9462. _mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
  9463. {
  9464. return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
  9465. (__v4si)
  9466. _mm_setzero_si128 (),
  9467. (__mmask8) __U);
  9468. }
  9469. extern __inline __m256i
  9470. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9471. _mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  9472. const int __imm)
  9473. {
  9474. return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
  9475. (__v4di) __W,
  9476. (__mmask8) __U);
  9477. }
  9478. extern __inline __m256i
  9479. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9480. _mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
  9481. {
  9482. return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
  9483. (__v4di)
  9484. _mm256_setzero_si256 (),
  9485. (__mmask8) __U);
  9486. }
  9487. extern __inline __m128i
  9488. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9489. _mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  9490. const int __imm)
  9491. {
  9492. return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
  9493. (__v2di) __W,
  9494. (__mmask8) __U);
  9495. }
  9496. extern __inline __m128i
  9497. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9498. _mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
  9499. {
  9500. return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
  9501. (__v2di)
  9502. _mm_setzero_si128 (),
  9503. (__mmask8) __U);
  9504. }
  9505. extern __inline __m256i
  9506. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9507. _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
  9508. const int __imm)
  9509. {
  9510. return (__m256i)
  9511. __builtin_ia32_pternlogq256_mask ((__v4di) __A,
  9512. (__v4di) __B,
  9513. (__v4di) __C,
  9514. (unsigned char) __imm,
  9515. (__mmask8) -1);
  9516. }
  9517. extern __inline __m256i
  9518. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9519. _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
  9520. __m256i __B, __m256i __C,
  9521. const int __imm)
  9522. {
  9523. return (__m256i)
  9524. __builtin_ia32_pternlogq256_mask ((__v4di) __A,
  9525. (__v4di) __B,
  9526. (__v4di) __C,
  9527. (unsigned char) __imm,
  9528. (__mmask8) __U);
  9529. }
  9530. extern __inline __m256i
  9531. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9532. _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
  9533. __m256i __B, __m256i __C,
  9534. const int __imm)
  9535. {
  9536. return (__m256i)
  9537. __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
  9538. (__v4di) __B,
  9539. (__v4di) __C,
  9540. (unsigned char) __imm,
  9541. (__mmask8) __U);
  9542. }
  9543. extern __inline __m256i
  9544. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9545. _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
  9546. const int __imm)
  9547. {
  9548. return (__m256i)
  9549. __builtin_ia32_pternlogd256_mask ((__v8si) __A,
  9550. (__v8si) __B,
  9551. (__v8si) __C,
  9552. (unsigned char) __imm,
  9553. (__mmask8) -1);
  9554. }
  9555. extern __inline __m256i
  9556. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9557. _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
  9558. __m256i __B, __m256i __C,
  9559. const int __imm)
  9560. {
  9561. return (__m256i)
  9562. __builtin_ia32_pternlogd256_mask ((__v8si) __A,
  9563. (__v8si) __B,
  9564. (__v8si) __C,
  9565. (unsigned char) __imm,
  9566. (__mmask8) __U);
  9567. }
  9568. extern __inline __m256i
  9569. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9570. _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
  9571. __m256i __B, __m256i __C,
  9572. const int __imm)
  9573. {
  9574. return (__m256i)
  9575. __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
  9576. (__v8si) __B,
  9577. (__v8si) __C,
  9578. (unsigned char) __imm,
  9579. (__mmask8) __U);
  9580. }
  9581. extern __inline __m128i
  9582. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9583. _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
  9584. const int __imm)
  9585. {
  9586. return (__m128i)
  9587. __builtin_ia32_pternlogq128_mask ((__v2di) __A,
  9588. (__v2di) __B,
  9589. (__v2di) __C,
  9590. (unsigned char) __imm,
  9591. (__mmask8) -1);
  9592. }
  9593. extern __inline __m128i
  9594. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9595. _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
  9596. __m128i __B, __m128i __C,
  9597. const int __imm)
  9598. {
  9599. return (__m128i)
  9600. __builtin_ia32_pternlogq128_mask ((__v2di) __A,
  9601. (__v2di) __B,
  9602. (__v2di) __C,
  9603. (unsigned char) __imm,
  9604. (__mmask8) __U);
  9605. }
  9606. extern __inline __m128i
  9607. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9608. _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
  9609. __m128i __B, __m128i __C,
  9610. const int __imm)
  9611. {
  9612. return (__m128i)
  9613. __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
  9614. (__v2di) __B,
  9615. (__v2di) __C,
  9616. (unsigned char) __imm,
  9617. (__mmask8) __U);
  9618. }
  9619. extern __inline __m128i
  9620. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9621. _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
  9622. const int __imm)
  9623. {
  9624. return (__m128i)
  9625. __builtin_ia32_pternlogd128_mask ((__v4si) __A,
  9626. (__v4si) __B,
  9627. (__v4si) __C,
  9628. (unsigned char) __imm,
  9629. (__mmask8) -1);
  9630. }
  9631. extern __inline __m128i
  9632. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9633. _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
  9634. __m128i __B, __m128i __C,
  9635. const int __imm)
  9636. {
  9637. return (__m128i)
  9638. __builtin_ia32_pternlogd128_mask ((__v4si) __A,
  9639. (__v4si) __B,
  9640. (__v4si) __C,
  9641. (unsigned char) __imm,
  9642. (__mmask8) __U);
  9643. }
  9644. extern __inline __m128i
  9645. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9646. _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
  9647. __m128i __B, __m128i __C,
  9648. const int __imm)
  9649. {
  9650. return (__m128i)
  9651. __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
  9652. (__v4si) __B,
  9653. (__v4si) __C,
  9654. (unsigned char) __imm,
  9655. (__mmask8) __U);
  9656. }
  9657. extern __inline __m256
  9658. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9659. _mm256_roundscale_ps (__m256 __A, const int __imm)
  9660. {
  9661. return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
  9662. __imm,
  9663. (__v8sf)
  9664. _mm256_setzero_ps (),
  9665. (__mmask8) -1);
  9666. }
  9667. extern __inline __m256
  9668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9669. _mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
  9670. const int __imm)
  9671. {
  9672. return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
  9673. __imm,
  9674. (__v8sf) __W,
  9675. (__mmask8) __U);
  9676. }
  9677. extern __inline __m256
  9678. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9679. _mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
  9680. {
  9681. return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
  9682. __imm,
  9683. (__v8sf)
  9684. _mm256_setzero_ps (),
  9685. (__mmask8) __U);
  9686. }
  9687. extern __inline __m256d
  9688. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9689. _mm256_roundscale_pd (__m256d __A, const int __imm)
  9690. {
  9691. return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
  9692. __imm,
  9693. (__v4df)
  9694. _mm256_setzero_pd (),
  9695. (__mmask8) -1);
  9696. }
  9697. extern __inline __m256d
  9698. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9699. _mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
  9700. const int __imm)
  9701. {
  9702. return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
  9703. __imm,
  9704. (__v4df) __W,
  9705. (__mmask8) __U);
  9706. }
  9707. extern __inline __m256d
  9708. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9709. _mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
  9710. {
  9711. return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
  9712. __imm,
  9713. (__v4df)
  9714. _mm256_setzero_pd (),
  9715. (__mmask8) __U);
  9716. }
  9717. extern __inline __m128
  9718. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9719. _mm_roundscale_ps (__m128 __A, const int __imm)
  9720. {
  9721. return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
  9722. __imm,
  9723. (__v4sf)
  9724. _mm_setzero_ps (),
  9725. (__mmask8) -1);
  9726. }
  9727. extern __inline __m128
  9728. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9729. _mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
  9730. const int __imm)
  9731. {
  9732. return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
  9733. __imm,
  9734. (__v4sf) __W,
  9735. (__mmask8) __U);
  9736. }
  9737. extern __inline __m128
  9738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9739. _mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
  9740. {
  9741. return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
  9742. __imm,
  9743. (__v4sf)
  9744. _mm_setzero_ps (),
  9745. (__mmask8) __U);
  9746. }
  9747. extern __inline __m128d
  9748. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9749. _mm_roundscale_pd (__m128d __A, const int __imm)
  9750. {
  9751. return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
  9752. __imm,
  9753. (__v2df)
  9754. _mm_setzero_pd (),
  9755. (__mmask8) -1);
  9756. }
  9757. extern __inline __m128d
  9758. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9759. _mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
  9760. const int __imm)
  9761. {
  9762. return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
  9763. __imm,
  9764. (__v2df) __W,
  9765. (__mmask8) __U);
  9766. }
  9767. extern __inline __m128d
  9768. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9769. _mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
  9770. {
  9771. return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
  9772. __imm,
  9773. (__v2df)
  9774. _mm_setzero_pd (),
  9775. (__mmask8) __U);
  9776. }
  9777. extern __inline __m256
  9778. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9779. _mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
  9780. _MM_MANTISSA_SIGN_ENUM __C)
  9781. {
  9782. return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
  9783. (__C << 2) | __B,
  9784. (__v8sf)
  9785. _mm256_setzero_ps (),
  9786. (__mmask8) -1);
  9787. }
  9788. extern __inline __m256
  9789. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9790. _mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
  9791. _MM_MANTISSA_NORM_ENUM __B,
  9792. _MM_MANTISSA_SIGN_ENUM __C)
  9793. {
  9794. return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
  9795. (__C << 2) | __B,
  9796. (__v8sf) __W,
  9797. (__mmask8) __U);
  9798. }
  9799. extern __inline __m256
  9800. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9801. _mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
  9802. _MM_MANTISSA_NORM_ENUM __B,
  9803. _MM_MANTISSA_SIGN_ENUM __C)
  9804. {
  9805. return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
  9806. (__C << 2) | __B,
  9807. (__v8sf)
  9808. _mm256_setzero_ps (),
  9809. (__mmask8) __U);
  9810. }
  9811. extern __inline __m128
  9812. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9813. _mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
  9814. _MM_MANTISSA_SIGN_ENUM __C)
  9815. {
  9816. return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
  9817. (__C << 2) | __B,
  9818. (__v4sf)
  9819. _mm_setzero_ps (),
  9820. (__mmask8) -1);
  9821. }
  9822. extern __inline __m128
  9823. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9824. _mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
  9825. _MM_MANTISSA_NORM_ENUM __B,
  9826. _MM_MANTISSA_SIGN_ENUM __C)
  9827. {
  9828. return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
  9829. (__C << 2) | __B,
  9830. (__v4sf) __W,
  9831. (__mmask8) __U);
  9832. }
  9833. extern __inline __m128
  9834. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9835. _mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
  9836. _MM_MANTISSA_NORM_ENUM __B,
  9837. _MM_MANTISSA_SIGN_ENUM __C)
  9838. {
  9839. return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
  9840. (__C << 2) | __B,
  9841. (__v4sf)
  9842. _mm_setzero_ps (),
  9843. (__mmask8) __U);
  9844. }
  9845. extern __inline __m256d
  9846. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9847. _mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
  9848. _MM_MANTISSA_SIGN_ENUM __C)
  9849. {
  9850. return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
  9851. (__C << 2) | __B,
  9852. (__v4df)
  9853. _mm256_setzero_pd (),
  9854. (__mmask8) -1);
  9855. }
  9856. extern __inline __m256d
  9857. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9858. _mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
  9859. _MM_MANTISSA_NORM_ENUM __B,
  9860. _MM_MANTISSA_SIGN_ENUM __C)
  9861. {
  9862. return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
  9863. (__C << 2) | __B,
  9864. (__v4df) __W,
  9865. (__mmask8) __U);
  9866. }
  9867. extern __inline __m256d
  9868. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9869. _mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
  9870. _MM_MANTISSA_NORM_ENUM __B,
  9871. _MM_MANTISSA_SIGN_ENUM __C)
  9872. {
  9873. return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
  9874. (__C << 2) | __B,
  9875. (__v4df)
  9876. _mm256_setzero_pd (),
  9877. (__mmask8) __U);
  9878. }
  9879. extern __inline __m128d
  9880. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9881. _mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
  9882. _MM_MANTISSA_SIGN_ENUM __C)
  9883. {
  9884. return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
  9885. (__C << 2) | __B,
  9886. (__v2df)
  9887. _mm_setzero_pd (),
  9888. (__mmask8) -1);
  9889. }
  9890. extern __inline __m128d
  9891. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9892. _mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
  9893. _MM_MANTISSA_NORM_ENUM __B,
  9894. _MM_MANTISSA_SIGN_ENUM __C)
  9895. {
  9896. return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
  9897. (__C << 2) | __B,
  9898. (__v2df) __W,
  9899. (__mmask8) __U);
  9900. }
  9901. extern __inline __m128d
  9902. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9903. _mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
  9904. _MM_MANTISSA_NORM_ENUM __B,
  9905. _MM_MANTISSA_SIGN_ENUM __C)
  9906. {
  9907. return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
  9908. (__C << 2) | __B,
  9909. (__v2df)
  9910. _mm_setzero_pd (),
  9911. (__mmask8) __U);
  9912. }
  9913. extern __inline __m256
  9914. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9915. _mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
  9916. __m256i __index, void const *__addr,
  9917. int __scale)
  9918. {
  9919. return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
  9920. __addr,
  9921. (__v8si) __index,
  9922. __mask, __scale);
  9923. }
  9924. extern __inline __m128
  9925. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9926. _mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
  9927. __m128i __index, void const *__addr,
  9928. int __scale)
  9929. {
  9930. return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
  9931. __addr,
  9932. (__v4si) __index,
  9933. __mask, __scale);
  9934. }
  9935. extern __inline __m256d
  9936. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9937. _mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
  9938. __m128i __index, void const *__addr,
  9939. int __scale)
  9940. {
  9941. return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
  9942. __addr,
  9943. (__v4si) __index,
  9944. __mask, __scale);
  9945. }
  9946. extern __inline __m128d
  9947. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9948. _mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
  9949. __m128i __index, void const *__addr,
  9950. int __scale)
  9951. {
  9952. return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
  9953. __addr,
  9954. (__v4si) __index,
  9955. __mask, __scale);
  9956. }
  9957. extern __inline __m128
  9958. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9959. _mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
  9960. __m256i __index, void const *__addr,
  9961. int __scale)
  9962. {
  9963. return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
  9964. __addr,
  9965. (__v4di) __index,
  9966. __mask, __scale);
  9967. }
  9968. extern __inline __m128
  9969. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9970. _mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
  9971. __m128i __index, void const *__addr,
  9972. int __scale)
  9973. {
  9974. return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
  9975. __addr,
  9976. (__v2di) __index,
  9977. __mask, __scale);
  9978. }
  9979. extern __inline __m256d
  9980. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9981. _mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
  9982. __m256i __index, void const *__addr,
  9983. int __scale)
  9984. {
  9985. return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
  9986. __addr,
  9987. (__v4di) __index,
  9988. __mask, __scale);
  9989. }
  9990. extern __inline __m128d
  9991. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9992. _mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
  9993. __m128i __index, void const *__addr,
  9994. int __scale)
  9995. {
  9996. return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
  9997. __addr,
  9998. (__v2di) __index,
  9999. __mask, __scale);
  10000. }
  10001. extern __inline __m256i
  10002. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10003. _mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
  10004. __m256i __index, void const *__addr,
  10005. int __scale)
  10006. {
  10007. return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
  10008. __addr,
  10009. (__v8si) __index,
  10010. __mask, __scale);
  10011. }
  10012. extern __inline __m128i
  10013. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10014. _mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
  10015. __m128i __index, void const *__addr,
  10016. int __scale)
  10017. {
  10018. return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
  10019. __addr,
  10020. (__v4si) __index,
  10021. __mask, __scale);
  10022. }
  10023. extern __inline __m256i
  10024. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10025. _mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
  10026. __m128i __index, void const *__addr,
  10027. int __scale)
  10028. {
  10029. return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
  10030. __addr,
  10031. (__v4si) __index,
  10032. __mask, __scale);
  10033. }
  10034. extern __inline __m128i
  10035. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10036. _mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
  10037. __m128i __index, void const *__addr,
  10038. int __scale)
  10039. {
  10040. return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
  10041. __addr,
  10042. (__v4si) __index,
  10043. __mask, __scale);
  10044. }
  10045. extern __inline __m128i
  10046. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10047. _mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
  10048. __m256i __index, void const *__addr,
  10049. int __scale)
  10050. {
  10051. return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
  10052. __addr,
  10053. (__v4di) __index,
  10054. __mask, __scale);
  10055. }
  10056. extern __inline __m128i
  10057. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10058. _mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
  10059. __m128i __index, void const *__addr,
  10060. int __scale)
  10061. {
  10062. return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
  10063. __addr,
  10064. (__v2di) __index,
  10065. __mask, __scale);
  10066. }
  10067. extern __inline __m256i
  10068. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10069. _mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
  10070. __m256i __index, void const *__addr,
  10071. int __scale)
  10072. {
  10073. return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
  10074. __addr,
  10075. (__v4di) __index,
  10076. __mask, __scale);
  10077. }
  10078. extern __inline __m128i
  10079. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10080. _mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
  10081. __m128i __index, void const *__addr,
  10082. int __scale)
  10083. {
  10084. return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
  10085. __addr,
  10086. (__v2di) __index,
  10087. __mask, __scale);
  10088. }
  10089. extern __inline void
  10090. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10091. _mm256_i32scatter_ps (void *__addr, __m256i __index,
  10092. __m256 __v1, const int __scale)
  10093. {
  10094. __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
  10095. (__v8si) __index, (__v8sf) __v1,
  10096. __scale);
  10097. }
  10098. extern __inline void
  10099. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10100. _mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
  10101. __m256i __index, __m256 __v1,
  10102. const int __scale)
  10103. {
  10104. __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
  10105. (__v8sf) __v1, __scale);
  10106. }
  10107. extern __inline void
  10108. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10109. _mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1,
  10110. const int __scale)
  10111. {
  10112. __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
  10113. (__v4si) __index, (__v4sf) __v1,
  10114. __scale);
  10115. }
  10116. extern __inline void
  10117. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10118. _mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
  10119. __m128i __index, __m128 __v1,
  10120. const int __scale)
  10121. {
  10122. __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
  10123. (__v4sf) __v1, __scale);
  10124. }
  10125. extern __inline void
  10126. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10127. _mm256_i32scatter_pd (void *__addr, __m128i __index,
  10128. __m256d __v1, const int __scale)
  10129. {
  10130. __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
  10131. (__v4si) __index, (__v4df) __v1,
  10132. __scale);
  10133. }
  10134. extern __inline void
  10135. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10136. _mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
  10137. __m128i __index, __m256d __v1,
  10138. const int __scale)
  10139. {
  10140. __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
  10141. (__v4df) __v1, __scale);
  10142. }
  10143. extern __inline void
  10144. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10145. _mm_i32scatter_pd (void *__addr, __m128i __index,
  10146. __m128d __v1, const int __scale)
  10147. {
  10148. __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
  10149. (__v4si) __index, (__v2df) __v1,
  10150. __scale);
  10151. }
  10152. extern __inline void
  10153. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10154. _mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
  10155. __m128i __index, __m128d __v1,
  10156. const int __scale)
  10157. {
  10158. __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
  10159. (__v2df) __v1, __scale);
  10160. }
  10161. extern __inline void
  10162. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10163. _mm256_i64scatter_ps (void *__addr, __m256i __index,
  10164. __m128 __v1, const int __scale)
  10165. {
  10166. __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
  10167. (__v4di) __index, (__v4sf) __v1,
  10168. __scale);
  10169. }
  10170. extern __inline void
  10171. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10172. _mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
  10173. __m256i __index, __m128 __v1,
  10174. const int __scale)
  10175. {
  10176. __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
  10177. (__v4sf) __v1, __scale);
  10178. }
  10179. extern __inline void
  10180. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10181. _mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1,
  10182. const int __scale)
  10183. {
  10184. __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
  10185. (__v2di) __index, (__v4sf) __v1,
  10186. __scale);
  10187. }
  10188. extern __inline void
  10189. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10190. _mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
  10191. __m128i __index, __m128 __v1,
  10192. const int __scale)
  10193. {
  10194. __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
  10195. (__v4sf) __v1, __scale);
  10196. }
  10197. extern __inline void
  10198. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10199. _mm256_i64scatter_pd (void *__addr, __m256i __index,
  10200. __m256d __v1, const int __scale)
  10201. {
  10202. __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
  10203. (__v4di) __index, (__v4df) __v1,
  10204. __scale);
  10205. }
  10206. extern __inline void
  10207. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10208. _mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
  10209. __m256i __index, __m256d __v1,
  10210. const int __scale)
  10211. {
  10212. __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
  10213. (__v4df) __v1, __scale);
  10214. }
  10215. extern __inline void
  10216. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10217. _mm_i64scatter_pd (void *__addr, __m128i __index,
  10218. __m128d __v1, const int __scale)
  10219. {
  10220. __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
  10221. (__v2di) __index, (__v2df) __v1,
  10222. __scale);
  10223. }
  10224. extern __inline void
  10225. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10226. _mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
  10227. __m128i __index, __m128d __v1,
  10228. const int __scale)
  10229. {
  10230. __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
  10231. (__v2df) __v1, __scale);
  10232. }
  10233. extern __inline void
  10234. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10235. _mm256_i32scatter_epi32 (void *__addr, __m256i __index,
  10236. __m256i __v1, const int __scale)
  10237. {
  10238. __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
  10239. (__v8si) __index, (__v8si) __v1,
  10240. __scale);
  10241. }
  10242. extern __inline void
  10243. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10244. _mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
  10245. __m256i __index, __m256i __v1,
  10246. const int __scale)
  10247. {
  10248. __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
  10249. (__v8si) __v1, __scale);
  10250. }
  10251. extern __inline void
  10252. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10253. _mm_i32scatter_epi32 (void *__addr, __m128i __index,
  10254. __m128i __v1, const int __scale)
  10255. {
  10256. __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
  10257. (__v4si) __index, (__v4si) __v1,
  10258. __scale);
  10259. }
  10260. extern __inline void
  10261. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10262. _mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
  10263. __m128i __index, __m128i __v1,
  10264. const int __scale)
  10265. {
  10266. __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
  10267. (__v4si) __v1, __scale);
  10268. }
  10269. extern __inline void
  10270. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10271. _mm256_i32scatter_epi64 (void *__addr, __m128i __index,
  10272. __m256i __v1, const int __scale)
  10273. {
  10274. __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
  10275. (__v4si) __index, (__v4di) __v1,
  10276. __scale);
  10277. }
  10278. extern __inline void
  10279. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10280. _mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
  10281. __m128i __index, __m256i __v1,
  10282. const int __scale)
  10283. {
  10284. __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
  10285. (__v4di) __v1, __scale);
  10286. }
  10287. extern __inline void
  10288. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10289. _mm_i32scatter_epi64 (void *__addr, __m128i __index,
  10290. __m128i __v1, const int __scale)
  10291. {
  10292. __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
  10293. (__v4si) __index, (__v2di) __v1,
  10294. __scale);
  10295. }
  10296. extern __inline void
  10297. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10298. _mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
  10299. __m128i __index, __m128i __v1,
  10300. const int __scale)
  10301. {
  10302. __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
  10303. (__v2di) __v1, __scale);
  10304. }
  10305. extern __inline void
  10306. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10307. _mm256_i64scatter_epi32 (void *__addr, __m256i __index,
  10308. __m128i __v1, const int __scale)
  10309. {
  10310. __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
  10311. (__v4di) __index, (__v4si) __v1,
  10312. __scale);
  10313. }
  10314. extern __inline void
  10315. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10316. _mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
  10317. __m256i __index, __m128i __v1,
  10318. const int __scale)
  10319. {
  10320. __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
  10321. (__v4si) __v1, __scale);
  10322. }
  10323. extern __inline void
  10324. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10325. _mm_i64scatter_epi32 (void *__addr, __m128i __index,
  10326. __m128i __v1, const int __scale)
  10327. {
  10328. __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
  10329. (__v2di) __index, (__v4si) __v1,
  10330. __scale);
  10331. }
  10332. extern __inline void
  10333. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10334. _mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
  10335. __m128i __index, __m128i __v1,
  10336. const int __scale)
  10337. {
  10338. __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
  10339. (__v4si) __v1, __scale);
  10340. }
  10341. extern __inline void
  10342. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10343. _mm256_i64scatter_epi64 (void *__addr, __m256i __index,
  10344. __m256i __v1, const int __scale)
  10345. {
  10346. __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
  10347. (__v4di) __index, (__v4di) __v1,
  10348. __scale);
  10349. }
  10350. extern __inline void
  10351. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10352. _mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
  10353. __m256i __index, __m256i __v1,
  10354. const int __scale)
  10355. {
  10356. __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
  10357. (__v4di) __v1, __scale);
  10358. }
  10359. extern __inline void
  10360. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10361. _mm_i64scatter_epi64 (void *__addr, __m128i __index,
  10362. __m128i __v1, const int __scale)
  10363. {
  10364. __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
  10365. (__v2di) __index, (__v2di) __v1,
  10366. __scale);
  10367. }
  10368. extern __inline void
  10369. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10370. _mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
  10371. __m128i __index, __m128i __v1,
  10372. const int __scale)
  10373. {
  10374. __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
  10375. (__v2di) __v1, __scale);
  10376. }
  10377. extern __inline __m256i
  10378. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10379. _mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10380. _MM_PERM_ENUM __mask)
  10381. {
  10382. return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
  10383. (__v8si) __W,
  10384. (__mmask8) __U);
  10385. }
  10386. extern __inline __m256i
  10387. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10388. _mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
  10389. _MM_PERM_ENUM __mask)
  10390. {
  10391. return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
  10392. (__v8si)
  10393. _mm256_setzero_si256 (),
  10394. (__mmask8) __U);
  10395. }
  10396. extern __inline __m128i
  10397. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10398. _mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  10399. _MM_PERM_ENUM __mask)
  10400. {
  10401. return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
  10402. (__v4si) __W,
  10403. (__mmask8) __U);
  10404. }
  10405. extern __inline __m128i
  10406. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10407. _mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
  10408. _MM_PERM_ENUM __mask)
  10409. {
  10410. return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
  10411. (__v4si)
  10412. _mm_setzero_si128 (),
  10413. (__mmask8) __U);
  10414. }
  10415. extern __inline __m256i
  10416. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10417. _mm256_rol_epi32 (__m256i __A, const int __B)
  10418. {
  10419. return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
  10420. (__v8si)
  10421. _mm256_setzero_si256 (),
  10422. (__mmask8) -1);
  10423. }
  10424. extern __inline __m256i
  10425. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10426. _mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10427. const int __B)
  10428. {
  10429. return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
  10430. (__v8si) __W,
  10431. (__mmask8) __U);
  10432. }
  10433. extern __inline __m256i
  10434. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10435. _mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
  10436. {
  10437. return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
  10438. (__v8si)
  10439. _mm256_setzero_si256 (),
  10440. (__mmask8) __U);
  10441. }
  10442. extern __inline __m128i
  10443. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10444. _mm_rol_epi32 (__m128i __A, const int __B)
  10445. {
  10446. return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
  10447. (__v4si)
  10448. _mm_setzero_si128 (),
  10449. (__mmask8) -1);
  10450. }
  10451. extern __inline __m128i
  10452. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10453. _mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  10454. const int __B)
  10455. {
  10456. return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
  10457. (__v4si) __W,
  10458. (__mmask8) __U);
  10459. }
  10460. extern __inline __m128i
  10461. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10462. _mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
  10463. {
  10464. return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
  10465. (__v4si)
  10466. _mm_setzero_si128 (),
  10467. (__mmask8) __U);
  10468. }
  10469. extern __inline __m256i
  10470. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10471. _mm256_ror_epi32 (__m256i __A, const int __B)
  10472. {
  10473. return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
  10474. (__v8si)
  10475. _mm256_setzero_si256 (),
  10476. (__mmask8) -1);
  10477. }
  10478. extern __inline __m256i
  10479. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10480. _mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10481. const int __B)
  10482. {
  10483. return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
  10484. (__v8si) __W,
  10485. (__mmask8) __U);
  10486. }
  10487. extern __inline __m256i
  10488. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10489. _mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
  10490. {
  10491. return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
  10492. (__v8si)
  10493. _mm256_setzero_si256 (),
  10494. (__mmask8) __U);
  10495. }
  10496. extern __inline __m128i
  10497. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10498. _mm_ror_epi32 (__m128i __A, const int __B)
  10499. {
  10500. return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
  10501. (__v4si)
  10502. _mm_setzero_si128 (),
  10503. (__mmask8) -1);
  10504. }
  10505. extern __inline __m128i
  10506. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10507. _mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  10508. const int __B)
  10509. {
  10510. return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
  10511. (__v4si) __W,
  10512. (__mmask8) __U);
  10513. }
  10514. extern __inline __m128i
  10515. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10516. _mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
  10517. {
  10518. return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
  10519. (__v4si)
  10520. _mm_setzero_si128 (),
  10521. (__mmask8) __U);
  10522. }
  10523. extern __inline __m256i
  10524. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10525. _mm256_rol_epi64 (__m256i __A, const int __B)
  10526. {
  10527. return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
  10528. (__v4di)
  10529. _mm256_setzero_si256 (),
  10530. (__mmask8) -1);
  10531. }
  10532. extern __inline __m256i
  10533. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10534. _mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  10535. const int __B)
  10536. {
  10537. return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
  10538. (__v4di) __W,
  10539. (__mmask8) __U);
  10540. }
  10541. extern __inline __m256i
  10542. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10543. _mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
  10544. {
  10545. return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
  10546. (__v4di)
  10547. _mm256_setzero_si256 (),
  10548. (__mmask8) __U);
  10549. }
  10550. extern __inline __m128i
  10551. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10552. _mm_rol_epi64 (__m128i __A, const int __B)
  10553. {
  10554. return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
  10555. (__v2di)
  10556. _mm_setzero_si128 (),
  10557. (__mmask8) -1);
  10558. }
  10559. extern __inline __m128i
  10560. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10561. _mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  10562. const int __B)
  10563. {
  10564. return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
  10565. (__v2di) __W,
  10566. (__mmask8) __U);
  10567. }
  10568. extern __inline __m128i
  10569. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10570. _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
  10571. {
  10572. return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
  10573. (__v2di)
  10574. _mm_setzero_si128 (),
  10575. (__mmask8) __U);
  10576. }
  10577. extern __inline __m256i
  10578. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10579. _mm256_ror_epi64 (__m256i __A, const int __B)
  10580. {
  10581. return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
  10582. (__v4di)
  10583. _mm256_setzero_si256 (),
  10584. (__mmask8) -1);
  10585. }
  10586. extern __inline __m256i
  10587. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10588. _mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  10589. const int __B)
  10590. {
  10591. return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
  10592. (__v4di) __W,
  10593. (__mmask8) __U);
  10594. }
  10595. extern __inline __m256i
  10596. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10597. _mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
  10598. {
  10599. return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
  10600. (__v4di)
  10601. _mm256_setzero_si256 (),
  10602. (__mmask8) __U);
  10603. }
  10604. extern __inline __m128i
  10605. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10606. _mm_ror_epi64 (__m128i __A, const int __B)
  10607. {
  10608. return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
  10609. (__v2di)
  10610. _mm_setzero_si128 (),
  10611. (__mmask8) -1);
  10612. }
  10613. extern __inline __m128i
  10614. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10615. _mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  10616. const int __B)
  10617. {
  10618. return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
  10619. (__v2di) __W,
  10620. (__mmask8) __U);
  10621. }
  10622. extern __inline __m128i
  10623. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10624. _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
  10625. {
  10626. return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
  10627. (__v2di)
  10628. _mm_setzero_si128 (),
  10629. (__mmask8) __U);
  10630. }
  10631. extern __inline __m128i
  10632. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10633. _mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
  10634. {
  10635. return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
  10636. (__v4si) __B, __imm,
  10637. (__v4si)
  10638. _mm_setzero_si128 (),
  10639. (__mmask8) -1);
  10640. }
  10641. extern __inline __m128i
  10642. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10643. _mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  10644. __m128i __B, const int __imm)
  10645. {
  10646. return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
  10647. (__v4si) __B, __imm,
  10648. (__v4si) __W,
  10649. (__mmask8) __U);
  10650. }
  10651. extern __inline __m128i
  10652. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10653. _mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
  10654. const int __imm)
  10655. {
  10656. return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
  10657. (__v4si) __B, __imm,
  10658. (__v4si)
  10659. _mm_setzero_si128 (),
  10660. (__mmask8) __U);
  10661. }
  10662. extern __inline __m128i
  10663. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10664. _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
  10665. {
  10666. return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
  10667. (__v2di) __B, __imm,
  10668. (__v2di)
  10669. _mm_setzero_si128 (),
  10670. (__mmask8) -1);
  10671. }
  10672. extern __inline __m128i
  10673. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10674. _mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  10675. __m128i __B, const int __imm)
  10676. {
  10677. return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
  10678. (__v2di) __B, __imm,
  10679. (__v2di) __W,
  10680. (__mmask8) __U);
  10681. }
  10682. extern __inline __m128i
  10683. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10684. _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
  10685. const int __imm)
  10686. {
  10687. return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
  10688. (__v2di) __B, __imm,
  10689. (__v2di)
  10690. _mm_setzero_si128 (),
  10691. (__mmask8) __U);
  10692. }
  10693. extern __inline __m256i
  10694. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10695. _mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
  10696. {
  10697. return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
  10698. (__v8si) __B, __imm,
  10699. (__v8si)
  10700. _mm256_setzero_si256 (),
  10701. (__mmask8) -1);
  10702. }
  10703. extern __inline __m256i
  10704. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10705. _mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10706. __m256i __B, const int __imm)
  10707. {
  10708. return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
  10709. (__v8si) __B, __imm,
  10710. (__v8si) __W,
  10711. (__mmask8) __U);
  10712. }
  10713. extern __inline __m256i
  10714. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10715. _mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
  10716. const int __imm)
  10717. {
  10718. return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
  10719. (__v8si) __B, __imm,
  10720. (__v8si)
  10721. _mm256_setzero_si256 (),
  10722. (__mmask8) __U);
  10723. }
  10724. extern __inline __m256i
  10725. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10726. _mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
  10727. {
  10728. return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
  10729. (__v4di) __B, __imm,
  10730. (__v4di)
  10731. _mm256_setzero_si256 (),
  10732. (__mmask8) -1);
  10733. }
  10734. extern __inline __m256i
  10735. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10736. _mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  10737. __m256i __B, const int __imm)
  10738. {
  10739. return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
  10740. (__v4di) __B, __imm,
  10741. (__v4di) __W,
  10742. (__mmask8) __U);
  10743. }
  10744. extern __inline __m256i
  10745. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10746. _mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
  10747. const int __imm)
  10748. {
  10749. return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
  10750. (__v4di) __B, __imm,
  10751. (__v4di)
  10752. _mm256_setzero_si256 (),
  10753. (__mmask8) __U);
  10754. }
  10755. extern __inline __m128i
  10756. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10757. _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
  10758. const int __I)
  10759. {
  10760. return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
  10761. (__v8hi) __W,
  10762. (__mmask8) __U);
  10763. }
  10764. extern __inline __m128i
  10765. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10766. _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
  10767. {
  10768. return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
  10769. (__v8hi)
  10770. _mm_setzero_si128 (),
  10771. (__mmask8) __U);
  10772. }
  10773. extern __inline __m128i
  10774. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10775. _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
  10776. const int __I)
  10777. {
  10778. return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
  10779. (__v8hi) __W,
  10780. (__mmask8) __U);
  10781. }
  10782. extern __inline __m128i
  10783. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10784. _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
  10785. {
  10786. return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
  10787. (__v8hi)
  10788. _mm_setzero_si128 (),
  10789. (__mmask8) __U);
  10790. }
  10791. extern __inline __m256i
  10792. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10793. _mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10794. const int __imm)
  10795. {
  10796. return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
  10797. (__v8si) __W,
  10798. (__mmask8) __U);
  10799. }
  10800. extern __inline __m256i
  10801. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10802. _mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
  10803. {
  10804. return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
  10805. (__v8si)
  10806. _mm256_setzero_si256 (),
  10807. (__mmask8) __U);
  10808. }
  10809. extern __inline __m128i
  10810. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10811. _mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  10812. const int __imm)
  10813. {
  10814. return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
  10815. (__v4si) __W,
  10816. (__mmask8) __U);
  10817. }
  10818. extern __inline __m128i
  10819. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10820. _mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
  10821. {
  10822. return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
  10823. (__v4si)
  10824. _mm_setzero_si128 (),
  10825. (__mmask8) __U);
  10826. }
  10827. extern __inline __m256i
  10828. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10829. _mm256_srai_epi64 (__m256i __A, const int __imm)
  10830. {
  10831. return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
  10832. (__v4di)
  10833. _mm256_setzero_si256 (),
  10834. (__mmask8) -1);
  10835. }
  10836. extern __inline __m256i
  10837. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10838. _mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  10839. const int __imm)
  10840. {
  10841. return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
  10842. (__v4di) __W,
  10843. (__mmask8) __U);
  10844. }
  10845. extern __inline __m256i
  10846. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10847. _mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
  10848. {
  10849. return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
  10850. (__v4di)
  10851. _mm256_setzero_si256 (),
  10852. (__mmask8) __U);
  10853. }
  10854. extern __inline __m128i
  10855. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10856. _mm_srai_epi64 (__m128i __A, const int __imm)
  10857. {
  10858. return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
  10859. (__v2di)
  10860. _mm_setzero_si128 (),
  10861. (__mmask8) -1);
  10862. }
  10863. extern __inline __m128i
  10864. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10865. _mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  10866. const int __imm)
  10867. {
  10868. return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
  10869. (__v2di) __W,
  10870. (__mmask8) __U);
  10871. }
  10872. extern __inline __m128i
  10873. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10874. _mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
  10875. {
  10876. return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
  10877. (__v2di)
  10878. _mm_setzero_si128 (),
  10879. (__mmask8) __U);
  10880. }
  10881. extern __inline __m128i
  10882. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10883. _mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
  10884. {
  10885. return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
  10886. (__v4si) __W,
  10887. (__mmask8) __U);
  10888. }
  10889. extern __inline __m128i
  10890. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10891. _mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
  10892. {
  10893. return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
  10894. (__v4si)
  10895. _mm_setzero_si128 (),
  10896. (__mmask8) __U);
  10897. }
  10898. extern __inline __m128i
  10899. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10900. _mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
  10901. {
  10902. return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
  10903. (__v2di) __W,
  10904. (__mmask8) __U);
  10905. }
  10906. extern __inline __m128i
  10907. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10908. _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
  10909. {
  10910. return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
  10911. (__v2di)
  10912. _mm_setzero_si128 (),
  10913. (__mmask8) __U);
  10914. }
  10915. extern __inline __m256i
  10916. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10917. _mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10918. int __B)
  10919. {
  10920. return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
  10921. (__v8si) __W,
  10922. (__mmask8) __U);
  10923. }
  10924. extern __inline __m256i
  10925. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10926. _mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
  10927. {
  10928. return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
  10929. (__v8si)
  10930. _mm256_setzero_si256 (),
  10931. (__mmask8) __U);
  10932. }
  10933. extern __inline __m256i
  10934. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10935. _mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  10936. int __B)
  10937. {
  10938. return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
  10939. (__v4di) __W,
  10940. (__mmask8) __U);
  10941. }
  10942. extern __inline __m256i
  10943. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10944. _mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
  10945. {
  10946. return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
  10947. (__v4di)
  10948. _mm256_setzero_si256 (),
  10949. (__mmask8) __U);
  10950. }
  10951. extern __inline __m256d
  10952. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10953. _mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
  10954. const int __imm)
  10955. {
  10956. return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
  10957. (__v4df) __W,
  10958. (__mmask8) __U);
  10959. }
  10960. extern __inline __m256d
  10961. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10962. _mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
  10963. {
  10964. return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
  10965. (__v4df)
  10966. _mm256_setzero_pd (),
  10967. (__mmask8) __U);
  10968. }
  10969. extern __inline __m256d
  10970. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10971. _mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
  10972. const int __C)
  10973. {
  10974. return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
  10975. (__v4df) __W,
  10976. (__mmask8) __U);
  10977. }
  10978. extern __inline __m256d
  10979. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10980. _mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
  10981. {
  10982. return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
  10983. (__v4df)
  10984. _mm256_setzero_pd (),
  10985. (__mmask8) __U);
  10986. }
  10987. extern __inline __m128d
  10988. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10989. _mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
  10990. const int __C)
  10991. {
  10992. return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
  10993. (__v2df) __W,
  10994. (__mmask8) __U);
  10995. }
  10996. extern __inline __m128d
  10997. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10998. _mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
  10999. {
  11000. return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
  11001. (__v2df)
  11002. _mm_setzero_pd (),
  11003. (__mmask8) __U);
  11004. }
  11005. extern __inline __m256
  11006. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11007. _mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
  11008. const int __C)
  11009. {
  11010. return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
  11011. (__v8sf) __W,
  11012. (__mmask8) __U);
  11013. }
  11014. extern __inline __m256
  11015. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11016. _mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
  11017. {
  11018. return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
  11019. (__v8sf)
  11020. _mm256_setzero_ps (),
  11021. (__mmask8) __U);
  11022. }
  11023. extern __inline __m128
  11024. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11025. _mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
  11026. const int __C)
  11027. {
  11028. return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
  11029. (__v4sf) __W,
  11030. (__mmask8) __U);
  11031. }
  11032. extern __inline __m128
  11033. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11034. _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
  11035. {
  11036. return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
  11037. (__v4sf)
  11038. _mm_setzero_ps (),
  11039. (__mmask8) __U);
  11040. }
  11041. extern __inline __m256d
  11042. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11043. _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
  11044. {
  11045. return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
  11046. (__v4df) __W,
  11047. (__mmask8) __U);
  11048. }
  11049. extern __inline __m256
  11050. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11051. _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
  11052. {
  11053. return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
  11054. (__v8sf) __W,
  11055. (__mmask8) __U);
  11056. }
  11057. extern __inline __m256i
  11058. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11059. _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
  11060. {
  11061. return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
  11062. (__v4di) __W,
  11063. (__mmask8) __U);
  11064. }
  11065. extern __inline __m256i
  11066. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11067. _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
  11068. {
  11069. return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
  11070. (__v8si) __W,
  11071. (__mmask8) __U);
  11072. }
  11073. extern __inline __m128d
  11074. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11075. _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
  11076. {
  11077. return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
  11078. (__v2df) __W,
  11079. (__mmask8) __U);
  11080. }
  11081. extern __inline __m128
  11082. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11083. _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
  11084. {
  11085. return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
  11086. (__v4sf) __W,
  11087. (__mmask8) __U);
  11088. }
  11089. extern __inline __m128i
  11090. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11091. _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
  11092. {
  11093. return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
  11094. (__v2di) __W,
  11095. (__mmask8) __U);
  11096. }
  11097. extern __inline __m128i
  11098. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11099. _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
  11100. {
  11101. return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
  11102. (__v4si) __W,
  11103. (__mmask8) __U);
  11104. }
  11105. extern __inline __mmask8
  11106. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11107. _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
  11108. {
  11109. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  11110. (__v4di) __Y, __P,
  11111. (__mmask8) -1);
  11112. }
  11113. extern __inline __mmask8
  11114. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11115. _mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
  11116. {
  11117. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  11118. (__v8si) __Y, __P,
  11119. (__mmask8) -1);
  11120. }
  11121. extern __inline __mmask8
  11122. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11123. _mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
  11124. {
  11125. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  11126. (__v4di) __Y, __P,
  11127. (__mmask8) -1);
  11128. }
  11129. extern __inline __mmask8
  11130. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11131. _mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
  11132. {
  11133. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  11134. (__v8si) __Y, __P,
  11135. (__mmask8) -1);
  11136. }
  11137. extern __inline __mmask8
  11138. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11139. _mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
  11140. {
  11141. return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
  11142. (__v4df) __Y, __P,
  11143. (__mmask8) -1);
  11144. }
  11145. extern __inline __mmask8
  11146. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11147. _mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
  11148. {
  11149. return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
  11150. (__v8sf) __Y, __P,
  11151. (__mmask8) -1);
  11152. }
  11153. extern __inline __mmask8
  11154. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11155. _mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
  11156. const int __P)
  11157. {
  11158. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  11159. (__v4di) __Y, __P,
  11160. (__mmask8) __U);
  11161. }
  11162. extern __inline __mmask8
  11163. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11164. _mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
  11165. const int __P)
  11166. {
  11167. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  11168. (__v8si) __Y, __P,
  11169. (__mmask8) __U);
  11170. }
  11171. extern __inline __mmask8
  11172. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11173. _mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
  11174. const int __P)
  11175. {
  11176. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  11177. (__v4di) __Y, __P,
  11178. (__mmask8) __U);
  11179. }
  11180. extern __inline __mmask8
  11181. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11182. _mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
  11183. const int __P)
  11184. {
  11185. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  11186. (__v8si) __Y, __P,
  11187. (__mmask8) __U);
  11188. }
  11189. extern __inline __mmask8
  11190. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11191. _mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
  11192. const int __P)
  11193. {
  11194. return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
  11195. (__v4df) __Y, __P,
  11196. (__mmask8) __U);
  11197. }
  11198. extern __inline __mmask8
  11199. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11200. _mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
  11201. const int __P)
  11202. {
  11203. return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
  11204. (__v8sf) __Y, __P,
  11205. (__mmask8) __U);
  11206. }
  11207. extern __inline __mmask8
  11208. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11209. _mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
  11210. {
  11211. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  11212. (__v2di) __Y, __P,
  11213. (__mmask8) -1);
  11214. }
  11215. extern __inline __mmask8
  11216. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11217. _mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
  11218. {
  11219. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  11220. (__v4si) __Y, __P,
  11221. (__mmask8) -1);
  11222. }
  11223. extern __inline __mmask8
  11224. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11225. _mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
  11226. {
  11227. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  11228. (__v2di) __Y, __P,
  11229. (__mmask8) -1);
  11230. }
  11231. extern __inline __mmask8
  11232. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11233. _mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
  11234. {
  11235. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  11236. (__v4si) __Y, __P,
  11237. (__mmask8) -1);
  11238. }
  11239. extern __inline __mmask8
  11240. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11241. _mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
  11242. {
  11243. return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
  11244. (__v2df) __Y, __P,
  11245. (__mmask8) -1);
  11246. }
  11247. extern __inline __mmask8
  11248. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11249. _mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
  11250. {
  11251. return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
  11252. (__v4sf) __Y, __P,
  11253. (__mmask8) -1);
  11254. }
  11255. extern __inline __mmask8
  11256. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11257. _mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
  11258. const int __P)
  11259. {
  11260. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  11261. (__v2di) __Y, __P,
  11262. (__mmask8) __U);
  11263. }
  11264. extern __inline __mmask8
  11265. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11266. _mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
  11267. const int __P)
  11268. {
  11269. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  11270. (__v4si) __Y, __P,
  11271. (__mmask8) __U);
  11272. }
  11273. extern __inline __mmask8
  11274. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11275. _mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
  11276. const int __P)
  11277. {
  11278. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  11279. (__v2di) __Y, __P,
  11280. (__mmask8) __U);
  11281. }
  11282. extern __inline __mmask8
  11283. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11284. _mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
  11285. const int __P)
  11286. {
  11287. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  11288. (__v4si) __Y, __P,
  11289. (__mmask8) __U);
  11290. }
  11291. extern __inline __mmask8
  11292. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11293. _mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
  11294. const int __P)
  11295. {
  11296. return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
  11297. (__v2df) __Y, __P,
  11298. (__mmask8) __U);
  11299. }
  11300. extern __inline __mmask8
  11301. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11302. _mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
  11303. const int __P)
  11304. {
  11305. return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
  11306. (__v4sf) __Y, __P,
  11307. (__mmask8) __U);
  11308. }
  11309. extern __inline __m256d
  11310. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11311. _mm256_permutex_pd (__m256d __X, const int __M)
  11312. {
  11313. return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
  11314. (__v4df)
  11315. _mm256_undefined_pd (),
  11316. (__mmask8) -1);
  11317. }
  11318. #else
  11319. #define _mm256_permutex_pd(X, M) \
  11320. ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
  11321. (__v4df)(__m256d) \
  11322. _mm256_undefined_pd (), \
  11323. (__mmask8)-1))
  11324. #define _mm256_permutex_epi64(X, I) \
  11325. ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
  11326. (int)(I), \
  11327. (__v4di)(__m256i) \
  11328. (_mm256_setzero_si256 ()),\
  11329. (__mmask8) -1))
  11330. #define _mm256_maskz_permutex_epi64(M, X, I) \
  11331. ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
  11332. (int)(I), \
  11333. (__v4di)(__m256i) \
  11334. (_mm256_setzero_si256 ()),\
  11335. (__mmask8)(M)))
  11336. #define _mm256_mask_permutex_epi64(W, M, X, I) \
  11337. ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
  11338. (int)(I), \
  11339. (__v4di)(__m256i)(W), \
  11340. (__mmask8)(M)))
  11341. #define _mm256_insertf32x4(X, Y, C) \
  11342. ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
  11343. (__v4sf)(__m128) (Y), (int) (C), \
  11344. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11345. (__mmask8)-1))
  11346. #define _mm256_mask_insertf32x4(W, U, X, Y, C) \
  11347. ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
  11348. (__v4sf)(__m128) (Y), (int) (C), \
  11349. (__v8sf)(__m256)(W), \
  11350. (__mmask8)(U)))
  11351. #define _mm256_maskz_insertf32x4(U, X, Y, C) \
  11352. ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
  11353. (__v4sf)(__m128) (Y), (int) (C), \
  11354. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11355. (__mmask8)(U)))
  11356. #define _mm256_inserti32x4(X, Y, C) \
  11357. ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
  11358. (__v4si)(__m128i) (Y), (int) (C), \
  11359. (__v8si)(__m256i)_mm256_setzero_si256 (), \
  11360. (__mmask8)-1))
  11361. #define _mm256_mask_inserti32x4(W, U, X, Y, C) \
  11362. ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
  11363. (__v4si)(__m128i) (Y), (int) (C), \
  11364. (__v8si)(__m256i)(W), \
  11365. (__mmask8)(U)))
  11366. #define _mm256_maskz_inserti32x4(U, X, Y, C) \
  11367. ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
  11368. (__v4si)(__m128i) (Y), (int) (C), \
  11369. (__v8si)(__m256i)_mm256_setzero_si256 (), \
  11370. (__mmask8)(U)))
  11371. #define _mm256_extractf32x4_ps(X, C) \
  11372. ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
  11373. (int) (C), \
  11374. (__v4sf)(__m128)_mm_setzero_ps (), \
  11375. (__mmask8)-1))
  11376. #define _mm256_mask_extractf32x4_ps(W, U, X, C) \
  11377. ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
  11378. (int) (C), \
  11379. (__v4sf)(__m128)(W), \
  11380. (__mmask8)(U)))
  11381. #define _mm256_maskz_extractf32x4_ps(U, X, C) \
  11382. ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
  11383. (int) (C), \
  11384. (__v4sf)(__m128)_mm_setzero_ps (), \
  11385. (__mmask8)(U)))
  11386. #define _mm256_extracti32x4_epi32(X, C) \
  11387. ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
  11388. (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
  11389. #define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
  11390. ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
  11391. (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
  11392. #define _mm256_maskz_extracti32x4_epi32(U, X, C) \
  11393. ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
  11394. (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
  11395. #define _mm256_shuffle_i64x2(X, Y, C) \
  11396. ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
  11397. (__v4di)(__m256i)(Y), (int)(C), \
  11398. (__v4di)(__m256i)_mm256_setzero_si256 (), \
  11399. (__mmask8)-1))
  11400. #define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
  11401. ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
  11402. (__v4di)(__m256i)(Y), (int)(C), \
  11403. (__v4di)(__m256i)(W),\
  11404. (__mmask8)(U)))
  11405. #define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
  11406. ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
  11407. (__v4di)(__m256i)(Y), (int)(C), \
  11408. (__v4di)(__m256i)_mm256_setzero_si256 (), \
  11409. (__mmask8)(U)))
  11410. #define _mm256_shuffle_i32x4(X, Y, C) \
  11411. ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
  11412. (__v8si)(__m256i)(Y), (int)(C), \
  11413. (__v8si)(__m256i) \
  11414. _mm256_setzero_si256 (), \
  11415. (__mmask8)-1))
  11416. #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
  11417. ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
  11418. (__v8si)(__m256i)(Y), (int)(C), \
  11419. (__v8si)(__m256i)(W), \
  11420. (__mmask8)(U)))
  11421. #define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
  11422. ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
  11423. (__v8si)(__m256i)(Y), (int)(C), \
  11424. (__v8si)(__m256i) \
  11425. _mm256_setzero_si256 (), \
  11426. (__mmask8)(U)))
  11427. #define _mm256_shuffle_f64x2(X, Y, C) \
  11428. ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
  11429. (__v4df)(__m256d)(Y), (int)(C), \
  11430. (__v4df)(__m256d)_mm256_setzero_pd (),\
  11431. (__mmask8)-1))
  11432. #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
  11433. ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
  11434. (__v4df)(__m256d)(Y), (int)(C), \
  11435. (__v4df)(__m256d)(W), \
  11436. (__mmask8)(U)))
  11437. #define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
  11438. ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
  11439. (__v4df)(__m256d)(Y), (int)(C), \
  11440. (__v4df)(__m256d)_mm256_setzero_pd( ),\
  11441. (__mmask8)(U)))
  11442. #define _mm256_shuffle_f32x4(X, Y, C) \
  11443. ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
  11444. (__v8sf)(__m256)(Y), (int)(C), \
  11445. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11446. (__mmask8)-1))
  11447. #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
  11448. ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
  11449. (__v8sf)(__m256)(Y), (int)(C), \
  11450. (__v8sf)(__m256)(W), \
  11451. (__mmask8)(U)))
  11452. #define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
  11453. ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
  11454. (__v8sf)(__m256)(Y), (int)(C), \
  11455. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11456. (__mmask8)(U)))
  11457. #define _mm256_mask_shuffle_pd(W, U, A, B, C) \
  11458. ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
  11459. (__v4df)(__m256d)(B), (int)(C), \
  11460. (__v4df)(__m256d)(W), \
  11461. (__mmask8)(U)))
  11462. #define _mm256_maskz_shuffle_pd(U, A, B, C) \
  11463. ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
  11464. (__v4df)(__m256d)(B), (int)(C), \
  11465. (__v4df)(__m256d) \
  11466. _mm256_setzero_pd (), \
  11467. (__mmask8)(U)))
  11468. #define _mm_mask_shuffle_pd(W, U, A, B, C) \
  11469. ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
  11470. (__v2df)(__m128d)(B), (int)(C), \
  11471. (__v2df)(__m128d)(W), \
  11472. (__mmask8)(U)))
  11473. #define _mm_maskz_shuffle_pd(U, A, B, C) \
  11474. ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
  11475. (__v2df)(__m128d)(B), (int)(C), \
  11476. (__v2df)(__m128d)_mm_setzero_pd (), \
  11477. (__mmask8)(U)))
  11478. #define _mm256_mask_shuffle_ps(W, U, A, B, C) \
  11479. ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
  11480. (__v8sf)(__m256)(B), (int)(C), \
  11481. (__v8sf)(__m256)(W), \
  11482. (__mmask8)(U)))
  11483. #define _mm256_maskz_shuffle_ps(U, A, B, C) \
  11484. ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
  11485. (__v8sf)(__m256)(B), (int)(C), \
  11486. (__v8sf)(__m256)_mm256_setzero_ps (),\
  11487. (__mmask8)(U)))
  11488. #define _mm_mask_shuffle_ps(W, U, A, B, C) \
  11489. ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
  11490. (__v4sf)(__m128)(B), (int)(C), \
  11491. (__v4sf)(__m128)(W), \
  11492. (__mmask8)(U)))
  11493. #define _mm_maskz_shuffle_ps(U, A, B, C) \
  11494. ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
  11495. (__v4sf)(__m128)(B), (int)(C), \
  11496. (__v4sf)(__m128)_mm_setzero_ps (), \
  11497. (__mmask8)(U)))
  11498. #define _mm256_fixupimm_pd(X, Y, Z, C) \
  11499. ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
  11500. (__v4df)(__m256d)(Y), \
  11501. (__v4di)(__m256i)(Z), (int)(C), \
  11502. (__mmask8)(-1)))
  11503. #define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
  11504. ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
  11505. (__v4df)(__m256d)(Y), \
  11506. (__v4di)(__m256i)(Z), (int)(C), \
  11507. (__mmask8)(U)))
  11508. #define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
  11509. ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
  11510. (__v4df)(__m256d)(Y), \
  11511. (__v4di)(__m256i)(Z), (int)(C),\
  11512. (__mmask8)(U)))
  11513. #define _mm256_fixupimm_ps(X, Y, Z, C) \
  11514. ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
  11515. (__v8sf)(__m256)(Y), \
  11516. (__v8si)(__m256i)(Z), (int)(C), \
  11517. (__mmask8)(-1)))
  11518. #define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
  11519. ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
  11520. (__v8sf)(__m256)(Y), \
  11521. (__v8si)(__m256i)(Z), (int)(C), \
  11522. (__mmask8)(U)))
  11523. #define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
  11524. ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
  11525. (__v8sf)(__m256)(Y), \
  11526. (__v8si)(__m256i)(Z), (int)(C),\
  11527. (__mmask8)(U)))
  11528. #define _mm_fixupimm_pd(X, Y, Z, C) \
  11529. ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
  11530. (__v2df)(__m128d)(Y), \
  11531. (__v2di)(__m128i)(Z), (int)(C), \
  11532. (__mmask8)(-1)))
  11533. #define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
  11534. ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
  11535. (__v2df)(__m128d)(Y), \
  11536. (__v2di)(__m128i)(Z), (int)(C), \
  11537. (__mmask8)(U)))
  11538. #define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
  11539. ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
  11540. (__v2df)(__m128d)(Y), \
  11541. (__v2di)(__m128i)(Z), (int)(C),\
  11542. (__mmask8)(U)))
  11543. #define _mm_fixupimm_ps(X, Y, Z, C) \
  11544. ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
  11545. (__v4sf)(__m128)(Y), \
  11546. (__v4si)(__m128i)(Z), (int)(C), \
  11547. (__mmask8)(-1)))
  11548. #define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
  11549. ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
  11550. (__v4sf)(__m128)(Y), \
  11551. (__v4si)(__m128i)(Z), (int)(C),\
  11552. (__mmask8)(U)))
  11553. #define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
  11554. ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
  11555. (__v4sf)(__m128)(Y), \
  11556. (__v4si)(__m128i)(Z), (int)(C),\
  11557. (__mmask8)(U)))
  11558. #define _mm256_mask_srli_epi32(W, U, A, B) \
  11559. ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
  11560. (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
  11561. #define _mm256_maskz_srli_epi32(U, A, B) \
  11562. ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
  11563. (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
  11564. #define _mm_mask_srli_epi32(W, U, A, B) \
  11565. ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
  11566. (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
  11567. #define _mm_maskz_srli_epi32(U, A, B) \
  11568. ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
  11569. (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
  11570. #define _mm256_mask_srli_epi64(W, U, A, B) \
  11571. ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
  11572. (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
  11573. #define _mm256_maskz_srli_epi64(U, A, B) \
  11574. ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
  11575. (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
  11576. #define _mm_mask_srli_epi64(W, U, A, B) \
  11577. ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
  11578. (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
  11579. #define _mm_maskz_srli_epi64(U, A, B) \
  11580. ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
  11581. (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
  11582. #define _mm256_mask_slli_epi32(W, U, X, C) \
  11583. ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
  11584. (__v8si)(__m256i)(W), \
  11585. (__mmask8)(U)))
  11586. #define _mm256_maskz_slli_epi32(U, X, C) \
  11587. ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
  11588. (__v8si)(__m256i)_mm256_setzero_si256 (), \
  11589. (__mmask8)(U)))
  11590. #define _mm256_mask_slli_epi64(W, U, X, C) \
  11591. ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
  11592. (__v4di)(__m256i)(W), \
  11593. (__mmask8)(U)))
  11594. #define _mm256_maskz_slli_epi64(U, X, C) \
  11595. ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
  11596. (__v4di)(__m256i)_mm256_setzero_si256 (), \
  11597. (__mmask8)(U)))
  11598. #define _mm_mask_slli_epi32(W, U, X, C) \
  11599. ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
  11600. (__v4si)(__m128i)(W),\
  11601. (__mmask8)(U)))
  11602. #define _mm_maskz_slli_epi32(U, X, C) \
  11603. ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
  11604. (__v4si)(__m128i)_mm_setzero_si128 (),\
  11605. (__mmask8)(U)))
  11606. #define _mm_mask_slli_epi64(W, U, X, C) \
  11607. ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
  11608. (__v2di)(__m128i)(W),\
  11609. (__mmask8)(U)))
  11610. #define _mm_maskz_slli_epi64(U, X, C) \
  11611. ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
  11612. (__v2di)(__m128i)_mm_setzero_si128 (),\
  11613. (__mmask8)(U)))
  11614. #define _mm256_ternarylogic_epi64(A, B, C, I) \
  11615. ((__m256i) \
  11616. __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A), \
  11617. (__v4di) (__m256i) (B), \
  11618. (__v4di) (__m256i) (C), \
  11619. (unsigned char) (I), \
  11620. (__mmask8) -1))
  11621. #define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
  11622. ((__m256i) \
  11623. __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A), \
  11624. (__v4di) (__m256i) (B), \
  11625. (__v4di) (__m256i) (C), \
  11626. (unsigned char) (I), \
  11627. (__mmask8) (U)))
  11628. #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
  11629. ((__m256i) \
  11630. __builtin_ia32_pternlogq256_maskz ((__v4di) (__m256i) (A), \
  11631. (__v4di) (__m256i) (B), \
  11632. (__v4di) (__m256i) (C), \
  11633. (unsigned char) (I), \
  11634. (__mmask8) (U)))
  11635. #define _mm256_ternarylogic_epi32(A, B, C, I) \
  11636. ((__m256i) \
  11637. __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A), \
  11638. (__v8si) (__m256i) (B), \
  11639. (__v8si) (__m256i) (C), \
  11640. (unsigned char) (I), \
  11641. (__mmask8) -1))
  11642. #define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
  11643. ((__m256i) \
  11644. __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A), \
  11645. (__v8si) (__m256i) (B), \
  11646. (__v8si) (__m256i) (C), \
  11647. (unsigned char) (I), \
  11648. (__mmask8) (U)))
  11649. #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
  11650. ((__m256i) \
  11651. __builtin_ia32_pternlogd256_maskz ((__v8si) (__m256i) (A), \
  11652. (__v8si) (__m256i) (B), \
  11653. (__v8si) (__m256i) (C), \
  11654. (unsigned char) (I), \
  11655. (__mmask8) (U)))
  11656. #define _mm_ternarylogic_epi64(A, B, C, I) \
  11657. ((__m128i) \
  11658. __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A), \
  11659. (__v2di) (__m128i) (B), \
  11660. (__v2di) (__m128i) (C), \
  11661. (unsigned char) (I), \
  11662. (__mmask8) -1))
  11663. #define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
  11664. ((__m128i) \
  11665. __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A), \
  11666. (__v2di) (__m128i) (B), \
  11667. (__v2di) (__m128i) (C), \
  11668. (unsigned char) (I), \
  11669. (__mmask8) (U)))
  11670. #define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
  11671. ((__m128i) \
  11672. __builtin_ia32_pternlogq128_maskz ((__v2di) (__m128i) (A), \
  11673. (__v2di) (__m128i) (B), \
  11674. (__v2di) (__m128i) (C), \
  11675. (unsigned char) (I), \
  11676. (__mmask8) (U)))
  11677. #define _mm_ternarylogic_epi32(A, B, C, I) \
  11678. ((__m128i) \
  11679. __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A), \
  11680. (__v4si) (__m128i) (B), \
  11681. (__v4si) (__m128i) (C), \
  11682. (unsigned char) (I), \
  11683. (__mmask8) -1))
  11684. #define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
  11685. ((__m128i) \
  11686. __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A), \
  11687. (__v4si) (__m128i) (B), \
  11688. (__v4si) (__m128i) (C), \
  11689. (unsigned char) (I), \
  11690. (__mmask8) (U)))
  11691. #define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
  11692. ((__m128i) \
  11693. __builtin_ia32_pternlogd128_maskz ((__v4si) (__m128i) (A), \
  11694. (__v4si) (__m128i) (B), \
  11695. (__v4si) (__m128i) (C), \
  11696. (unsigned char) (I), \
  11697. (__mmask8) (U)))
  11698. #define _mm256_roundscale_ps(A, B) \
  11699. ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
  11700. (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
  11701. #define _mm256_mask_roundscale_ps(W, U, A, B) \
  11702. ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
  11703. (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
  11704. #define _mm256_maskz_roundscale_ps(U, A, B) \
  11705. ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
  11706. (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
  11707. #define _mm256_roundscale_pd(A, B) \
  11708. ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
  11709. (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
  11710. #define _mm256_mask_roundscale_pd(W, U, A, B) \
  11711. ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
  11712. (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
  11713. #define _mm256_maskz_roundscale_pd(U, A, B) \
  11714. ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
  11715. (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
  11716. #define _mm_roundscale_ps(A, B) \
  11717. ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
  11718. (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
  11719. #define _mm_mask_roundscale_ps(W, U, A, B) \
  11720. ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
  11721. (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
  11722. #define _mm_maskz_roundscale_ps(U, A, B) \
  11723. ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
  11724. (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
  11725. #define _mm_roundscale_pd(A, B) \
  11726. ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
  11727. (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
  11728. #define _mm_mask_roundscale_pd(W, U, A, B) \
  11729. ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
  11730. (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
  11731. #define _mm_maskz_roundscale_pd(U, A, B) \
  11732. ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
  11733. (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
  11734. #define _mm256_getmant_ps(X, B, C) \
  11735. ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
  11736. (int)(((C)<<2) | (B)), \
  11737. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11738. (__mmask8)-1))
  11739. #define _mm256_mask_getmant_ps(W, U, X, B, C) \
  11740. ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
  11741. (int)(((C)<<2) | (B)), \
  11742. (__v8sf)(__m256)(W), \
  11743. (__mmask8)(U)))
  11744. #define _mm256_maskz_getmant_ps(U, X, B, C) \
  11745. ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
  11746. (int)(((C)<<2) | (B)), \
  11747. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11748. (__mmask8)(U)))
  11749. #define _mm_getmant_ps(X, B, C) \
  11750. ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
  11751. (int)(((C)<<2) | (B)), \
  11752. (__v4sf)(__m128)_mm_setzero_ps (), \
  11753. (__mmask8)-1))
  11754. #define _mm_mask_getmant_ps(W, U, X, B, C) \
  11755. ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
  11756. (int)(((C)<<2) | (B)), \
  11757. (__v4sf)(__m128)(W), \
  11758. (__mmask8)(U)))
  11759. #define _mm_maskz_getmant_ps(U, X, B, C) \
  11760. ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
  11761. (int)(((C)<<2) | (B)), \
  11762. (__v4sf)(__m128)_mm_setzero_ps (), \
  11763. (__mmask8)(U)))
  11764. #define _mm256_getmant_pd(X, B, C) \
  11765. ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
  11766. (int)(((C)<<2) | (B)), \
  11767. (__v4df)(__m256d)_mm256_setzero_pd (),\
  11768. (__mmask8)-1))
  11769. #define _mm256_mask_getmant_pd(W, U, X, B, C) \
  11770. ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
  11771. (int)(((C)<<2) | (B)), \
  11772. (__v4df)(__m256d)(W), \
  11773. (__mmask8)(U)))
  11774. #define _mm256_maskz_getmant_pd(U, X, B, C) \
  11775. ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
  11776. (int)(((C)<<2) | (B)), \
  11777. (__v4df)(__m256d)_mm256_setzero_pd (),\
  11778. (__mmask8)(U)))
  11779. #define _mm_getmant_pd(X, B, C) \
  11780. ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
  11781. (int)(((C)<<2) | (B)), \
  11782. (__v2df)(__m128d)_mm_setzero_pd (), \
  11783. (__mmask8)-1))
  11784. #define _mm_mask_getmant_pd(W, U, X, B, C) \
  11785. ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
  11786. (int)(((C)<<2) | (B)), \
  11787. (__v2df)(__m128d)(W), \
  11788. (__mmask8)(U)))
  11789. #define _mm_maskz_getmant_pd(U, X, B, C) \
  11790. ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
  11791. (int)(((C)<<2) | (B)), \
  11792. (__v2df)(__m128d)_mm_setzero_pd (), \
  11793. (__mmask8)(U)))
  11794. #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11795. (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256) (V1OLD), \
  11796. (void const *) (ADDR), \
  11797. (__v8si)(__m256i) (INDEX), \
  11798. (__mmask8) (MASK), \
  11799. (int) (SCALE))
  11800. #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11801. (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128) (V1OLD), \
  11802. (void const *) (ADDR), \
  11803. (__v4si)(__m128i) (INDEX), \
  11804. (__mmask8) (MASK), \
  11805. (int) (SCALE))
  11806. #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11807. (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d) (V1OLD), \
  11808. (void const *) (ADDR), \
  11809. (__v4si)(__m128i) (INDEX), \
  11810. (__mmask8) (MASK), \
  11811. (int) (SCALE))
  11812. #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11813. (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d) (V1OLD), \
  11814. (void const *) (ADDR), \
  11815. (__v4si)(__m128i) (INDEX), \
  11816. (__mmask8) (MASK), \
  11817. (int) (SCALE))
  11818. #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11819. (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128) (V1OLD), \
  11820. (void const *) (ADDR), \
  11821. (__v4di)(__m256i) (INDEX), \
  11822. (__mmask8) (MASK), \
  11823. (int) (SCALE))
  11824. #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11825. (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128) (V1OLD), \
  11826. (void const *) (ADDR), \
  11827. (__v2di)(__m128i) (INDEX), \
  11828. (__mmask8) (MASK), \
  11829. (int) (SCALE))
  11830. #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11831. (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d) (V1OLD), \
  11832. (void const *) (ADDR), \
  11833. (__v4di)(__m256i) (INDEX), \
  11834. (__mmask8) (MASK), \
  11835. (int) (SCALE))
  11836. #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11837. (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d) (V1OLD), \
  11838. (void const *) (ADDR), \
  11839. (__v2di)(__m128i) (INDEX), \
  11840. (__mmask8) (MASK), \
  11841. (int) (SCALE))
  11842. #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11843. (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i) (V1OLD), \
  11844. (void const *) (ADDR), \
  11845. (__v8si)(__m256i) (INDEX), \
  11846. (__mmask8) (MASK), \
  11847. (int) (SCALE))
  11848. #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11849. (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i) (V1OLD), \
  11850. (void const *) (ADDR), \
  11851. (__v4si)(__m128i) (INDEX), \
  11852. (__mmask8) (MASK), \
  11853. (int) (SCALE))
  11854. #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11855. (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i) (V1OLD), \
  11856. (void const *) (ADDR), \
  11857. (__v4si)(__m128i) (INDEX), \
  11858. (__mmask8) (MASK), \
  11859. (int) (SCALE))
  11860. #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11861. (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i) (V1OLD), \
  11862. (void const *) (ADDR), \
  11863. (__v4si)(__m128i) (INDEX), \
  11864. (__mmask8) (MASK), \
  11865. (int) (SCALE))
  11866. #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11867. (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i) (V1OLD), \
  11868. (void const *) (ADDR), \
  11869. (__v4di)(__m256i) (INDEX), \
  11870. (__mmask8) (MASK), \
  11871. (int) (SCALE))
  11872. #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11873. (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i) (V1OLD), \
  11874. (void const *) (ADDR), \
  11875. (__v2di)(__m128i) (INDEX), \
  11876. (__mmask8) (MASK), \
  11877. (int) (SCALE))
  11878. #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11879. (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i) (V1OLD), \
  11880. (void const *) (ADDR), \
  11881. (__v4di)(__m256i) (INDEX), \
  11882. (__mmask8) (MASK), \
  11883. (int) (SCALE))
  11884. #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11885. (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i) (V1OLD), \
  11886. (void const *) (ADDR), \
  11887. (__v2di)(__m128i) (INDEX), \
  11888. (__mmask8) (MASK), \
  11889. (int) (SCALE))
  11890. #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
  11891. __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8)0xFF, \
  11892. (__v8si)(__m256i) (INDEX), \
  11893. (__v8sf)(__m256) (V1), (int) (SCALE))
  11894. #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  11895. __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8) (MASK), \
  11896. (__v8si)(__m256i) (INDEX), \
  11897. (__v8sf)(__m256) (V1), (int) (SCALE))
  11898. #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
  11899. __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8)0xFF, \
  11900. (__v4si)(__m128i) (INDEX), \
  11901. (__v4sf)(__m128) (V1), (int) (SCALE))
  11902. #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  11903. __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8) (MASK), \
  11904. (__v4si)(__m128i) (INDEX), \
  11905. (__v4sf)(__m128) (V1), (int) (SCALE))
  11906. #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
  11907. __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8)0xFF, \
  11908. (__v4si)(__m128i) (INDEX), \
  11909. (__v4df)(__m256d) (V1), (int) (SCALE))
  11910. #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  11911. __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8) (MASK), \
  11912. (__v4si)(__m128i) (INDEX), \
  11913. (__v4df)(__m256d) (V1), (int) (SCALE))
  11914. #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
  11915. __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8)0xFF, \
  11916. (__v4si)(__m128i) (INDEX), \
  11917. (__v2df)(__m128d) (V1), (int) (SCALE))
  11918. #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  11919. __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8) (MASK), \
  11920. (__v4si)(__m128i) (INDEX), \
  11921. (__v2df)(__m128d) (V1), (int) (SCALE))
  11922. #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
  11923. __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8)0xFF, \
  11924. (__v4di)(__m256i) (INDEX), \
  11925. (__v4sf)(__m128) (V1), (int) (SCALE))
  11926. #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  11927. __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8) (MASK), \
  11928. (__v4di)(__m256i) (INDEX), \
  11929. (__v4sf)(__m128) (V1), (int) (SCALE))
  11930. #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
  11931. __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8)0xFF, \
  11932. (__v2di)(__m128i) (INDEX), \
  11933. (__v4sf)(__m128) (V1), (int) (SCALE))
  11934. #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  11935. __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8) (MASK), \
  11936. (__v2di)(__m128i) (INDEX), \
  11937. (__v4sf)(__m128) (V1), (int) (SCALE))
  11938. #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
  11939. __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8)0xFF, \
  11940. (__v4di)(__m256i) (INDEX), \
  11941. (__v4df)(__m256d) (V1), (int) (SCALE))
  11942. #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  11943. __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8) (MASK), \
  11944. (__v4di)(__m256i) (INDEX), \
  11945. (__v4df)(__m256d) (V1), (int) (SCALE))
  11946. #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
  11947. __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8)0xFF, \
  11948. (__v2di)(__m128i) (INDEX), \
  11949. (__v2df)(__m128d) (V1), (int) (SCALE))
  11950. #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  11951. __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8) (MASK), \
  11952. (__v2di)(__m128i) (INDEX), \
  11953. (__v2df)(__m128d) (V1), (int) (SCALE))
  11954. #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
  11955. __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8)0xFF, \
  11956. (__v8si)(__m256i) (INDEX), \
  11957. (__v8si)(__m256i) (V1), (int) (SCALE))
  11958. #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  11959. __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8) (MASK), \
  11960. (__v8si)(__m256i) (INDEX), \
  11961. (__v8si)(__m256i) (V1), (int) (SCALE))
  11962. #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
  11963. __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8)0xFF, \
  11964. (__v4si)(__m128i) (INDEX), \
  11965. (__v4si)(__m128i) (V1), (int) (SCALE))
  11966. #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  11967. __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8) (MASK), \
  11968. (__v4si)(__m128i) (INDEX), \
  11969. (__v4si)(__m128i) (V1), (int) (SCALE))
  11970. #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
  11971. __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8)0xFF, \
  11972. (__v4si)(__m128i) (INDEX), \
  11973. (__v4di)(__m256i) (V1), (int) (SCALE))
  11974. #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  11975. __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8) (MASK), \
  11976. (__v4si)(__m128i) (INDEX), \
  11977. (__v4di)(__m256i) (V1), (int) (SCALE))
  11978. #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
  11979. __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8)0xFF, \
  11980. (__v4si)(__m128i) (INDEX), \
  11981. (__v2di)(__m128i) (V1), (int) (SCALE))
  11982. #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  11983. __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8) (MASK), \
  11984. (__v4si)(__m128i) (INDEX), \
  11985. (__v2di)(__m128i) (V1), (int) (SCALE))
  11986. #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
  11987. __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8)0xFF, \
  11988. (__v4di)(__m256i) (INDEX), \
  11989. (__v4si)(__m128i) (V1), (int) (SCALE))
  11990. #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  11991. __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8) (MASK), \
  11992. (__v4di)(__m256i) (INDEX), \
  11993. (__v4si)(__m128i) (V1), (int) (SCALE))
  11994. #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
  11995. __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8)0xFF, \
  11996. (__v2di)(__m128i) (INDEX), \
  11997. (__v4si)(__m128i) (V1), (int) (SCALE))
  11998. #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  11999. __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8) (MASK), \
  12000. (__v2di)(__m128i) (INDEX), \
  12001. (__v4si)(__m128i) (V1), (int) (SCALE))
  12002. #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
  12003. __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8)0xFF, \
  12004. (__v4di)(__m256i) (INDEX), \
  12005. (__v4di)(__m256i) (V1), (int) (SCALE))
  12006. #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  12007. __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8) (MASK), \
  12008. (__v4di)(__m256i) (INDEX), \
  12009. (__v4di)(__m256i) (V1), (int) (SCALE))
  12010. #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
  12011. __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8)0xFF, \
  12012. (__v2di)(__m128i) (INDEX), \
  12013. (__v2di)(__m128i) (V1), (int) (SCALE))
  12014. #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  12015. __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8) (MASK), \
  12016. (__v2di)(__m128i) (INDEX), \
  12017. (__v2di)(__m128i) (V1), (int) (SCALE))
  12018. #define _mm256_mask_shuffle_epi32(W, U, X, C) \
  12019. ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
  12020. (__v8si)(__m256i)(W), \
  12021. (__mmask8)(U)))
  12022. #define _mm256_maskz_shuffle_epi32(U, X, C) \
  12023. ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
  12024. (__v8si)(__m256i) \
  12025. _mm256_setzero_si256 (), \
  12026. (__mmask8)(U)))
  12027. #define _mm_mask_shuffle_epi32(W, U, X, C) \
  12028. ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
  12029. (__v4si)(__m128i)(W), \
  12030. (__mmask8)(U)))
  12031. #define _mm_maskz_shuffle_epi32(U, X, C) \
  12032. ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
  12033. (__v4si)(__m128i)_mm_setzero_si128 (), \
  12034. (__mmask8)(U)))
  12035. #define _mm256_rol_epi64(A, B) \
  12036. ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  12037. (__v4di)(__m256i)_mm256_setzero_si256 (),\
  12038. (__mmask8)-1))
  12039. #define _mm256_mask_rol_epi64(W, U, A, B) \
  12040. ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  12041. (__v4di)(__m256i)(W), \
  12042. (__mmask8)(U)))
  12043. #define _mm256_maskz_rol_epi64(U, A, B) \
  12044. ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  12045. (__v4di)(__m256i)_mm256_setzero_si256 (),\
  12046. (__mmask8)(U)))
  12047. #define _mm_rol_epi64(A, B) \
  12048. ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  12049. (__v2di)(__m128i)_mm_setzero_si128 (),\
  12050. (__mmask8)-1))
  12051. #define _mm_mask_rol_epi64(W, U, A, B) \
  12052. ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  12053. (__v2di)(__m128i)(W), \
  12054. (__mmask8)(U)))
  12055. #define _mm_maskz_rol_epi64(U, A, B) \
  12056. ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  12057. (__v2di)(__m128i)_mm_setzero_si128 (),\
  12058. (__mmask8)(U)))
  12059. #define _mm256_ror_epi64(A, B) \
  12060. ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  12061. (__v4di)(__m256i)_mm256_setzero_si256 (),\
  12062. (__mmask8)-1))
  12063. #define _mm256_mask_ror_epi64(W, U, A, B) \
  12064. ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  12065. (__v4di)(__m256i)(W), \
  12066. (__mmask8)(U)))
  12067. #define _mm256_maskz_ror_epi64(U, A, B) \
  12068. ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  12069. (__v4di)(__m256i)_mm256_setzero_si256 (),\
  12070. (__mmask8)(U)))
  12071. #define _mm_ror_epi64(A, B) \
  12072. ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  12073. (__v2di)(__m128i)_mm_setzero_si128 (),\
  12074. (__mmask8)-1))
  12075. #define _mm_mask_ror_epi64(W, U, A, B) \
  12076. ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  12077. (__v2di)(__m128i)(W), \
  12078. (__mmask8)(U)))
  12079. #define _mm_maskz_ror_epi64(U, A, B) \
  12080. ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  12081. (__v2di)(__m128i)_mm_setzero_si128 (),\
  12082. (__mmask8)(U)))
  12083. #define _mm256_rol_epi32(A, B) \
  12084. ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
  12085. (__v8si)(__m256i)_mm256_setzero_si256 (),\
  12086. (__mmask8)-1))
  12087. #define _mm256_mask_rol_epi32(W, U, A, B) \
  12088. ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
  12089. (__v8si)(__m256i)(W), \
  12090. (__mmask8)(U)))
  12091. #define _mm256_maskz_rol_epi32(U, A, B) \
  12092. ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
  12093. (__v8si)(__m256i)_mm256_setzero_si256 (),\
  12094. (__mmask8)(U)))
  12095. #define _mm_rol_epi32(A, B) \
  12096. ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
  12097. (__v4si)(__m128i)_mm_setzero_si128 (),\
  12098. (__mmask8)-1))
  12099. #define _mm_mask_rol_epi32(W, U, A, B) \
  12100. ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
  12101. (__v4si)(__m128i)(W), \
  12102. (__mmask8)(U)))
  12103. #define _mm_maskz_rol_epi32(U, A, B) \
  12104. ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
  12105. (__v4si)(__m128i)_mm_setzero_si128 (),\
  12106. (__mmask8)(U)))
  12107. #define _mm256_ror_epi32(A, B) \
  12108. ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
  12109. (__v8si)(__m256i)_mm256_setzero_si256 (),\
  12110. (__mmask8)-1))
  12111. #define _mm256_mask_ror_epi32(W, U, A, B) \
  12112. ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
  12113. (__v8si)(__m256i)(W), \
  12114. (__mmask8)(U)))
  12115. #define _mm256_maskz_ror_epi32(U, A, B) \
  12116. ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
  12117. (__v8si)(__m256i) \
  12118. _mm256_setzero_si256 (), \
  12119. (__mmask8)(U)))
  12120. #define _mm_ror_epi32(A, B) \
  12121. ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
  12122. (__v4si)(__m128i)_mm_setzero_si128 (),\
  12123. (__mmask8)-1))
  12124. #define _mm_mask_ror_epi32(W, U, A, B) \
  12125. ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
  12126. (__v4si)(__m128i)(W), \
  12127. (__mmask8)(U)))
  12128. #define _mm_maskz_ror_epi32(U, A, B) \
  12129. ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
  12130. (__v4si)(__m128i)_mm_setzero_si128 (),\
  12131. (__mmask8)(U)))
  12132. #define _mm256_alignr_epi32(X, Y, C) \
  12133. ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
  12134. (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
  12135. #define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
  12136. ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
  12137. (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
  12138. #define _mm256_maskz_alignr_epi32(U, X, Y, C) \
  12139. ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
  12140. (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
  12141. (__mmask8)(U)))
  12142. #define _mm256_alignr_epi64(X, Y, C) \
  12143. ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
  12144. (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
  12145. #define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
  12146. ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
  12147. (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
  12148. #define _mm256_maskz_alignr_epi64(U, X, Y, C) \
  12149. ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
  12150. (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
  12151. (__mmask8)(U)))
  12152. #define _mm_alignr_epi32(X, Y, C) \
  12153. ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
  12154. (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
  12155. #define _mm_mask_alignr_epi32(W, U, X, Y, C) \
  12156. ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
  12157. (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
  12158. #define _mm_maskz_alignr_epi32(U, X, Y, C) \
  12159. ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
  12160. (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
  12161. (__mmask8)(U)))
  12162. #define _mm_alignr_epi64(X, Y, C) \
  12163. ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
  12164. (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
  12165. #define _mm_mask_alignr_epi64(W, U, X, Y, C) \
  12166. ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
  12167. (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
  12168. #define _mm_maskz_alignr_epi64(U, X, Y, C) \
  12169. ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
  12170. (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
  12171. (__mmask8)(U)))
  12172. #define _mm_mask_cvtps_ph(W, U, A, I) \
  12173. ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) (A), (int) (I), \
  12174. (__v8hi)(__m128i) (W), (__mmask8) (U)))
  12175. #define _mm_maskz_cvtps_ph(U, A, I) \
  12176. ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) (A), (int) (I), \
  12177. (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
  12178. #define _mm256_mask_cvtps_ph(W, U, A, I) \
  12179. ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I), \
  12180. (__v8hi)(__m128i) (W), (__mmask8) (U)))
  12181. #define _mm256_maskz_cvtps_ph(U, A, I) \
  12182. ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I), \
  12183. (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
  12184. #define _mm256_mask_srai_epi32(W, U, A, B) \
  12185. ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
  12186. (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
  12187. #define _mm256_maskz_srai_epi32(U, A, B) \
  12188. ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
  12189. (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
  12190. #define _mm_mask_srai_epi32(W, U, A, B) \
  12191. ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
  12192. (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
  12193. #define _mm_maskz_srai_epi32(U, A, B) \
  12194. ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
  12195. (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
  12196. #define _mm256_srai_epi64(A, B) \
  12197. ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
  12198. (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
  12199. #define _mm256_mask_srai_epi64(W, U, A, B) \
  12200. ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
  12201. (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
  12202. #define _mm256_maskz_srai_epi64(U, A, B) \
  12203. ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
  12204. (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
  12205. #define _mm_srai_epi64(A, B) \
  12206. ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
  12207. (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
  12208. #define _mm_mask_srai_epi64(W, U, A, B) \
  12209. ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
  12210. (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
  12211. #define _mm_maskz_srai_epi64(U, A, B) \
  12212. ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
  12213. (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
  12214. #define _mm256_mask_permutex_pd(W, U, A, B) \
  12215. ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
  12216. (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
  12217. #define _mm256_maskz_permutex_pd(U, A, B) \
  12218. ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
  12219. (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
  12220. #define _mm256_mask_permute_pd(W, U, X, C) \
  12221. ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
  12222. (__v4df)(__m256d)(W), \
  12223. (__mmask8)(U)))
  12224. #define _mm256_maskz_permute_pd(U, X, C) \
  12225. ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
  12226. (__v4df)(__m256d)_mm256_setzero_pd (),\
  12227. (__mmask8)(U)))
  12228. #define _mm256_mask_permute_ps(W, U, X, C) \
  12229. ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
  12230. (__v8sf)(__m256)(W), (__mmask8)(U)))
  12231. #define _mm256_maskz_permute_ps(U, X, C) \
  12232. ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
  12233. (__v8sf)(__m256)_mm256_setzero_ps (), \
  12234. (__mmask8)(U)))
  12235. #define _mm_mask_permute_pd(W, U, X, C) \
  12236. ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
  12237. (__v2df)(__m128d)(W), (__mmask8)(U)))
  12238. #define _mm_maskz_permute_pd(U, X, C) \
  12239. ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
  12240. (__v2df)(__m128d)_mm_setzero_pd (), \
  12241. (__mmask8)(U)))
  12242. #define _mm_mask_permute_ps(W, U, X, C) \
  12243. ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
  12244. (__v4sf)(__m128)(W), (__mmask8)(U)))
  12245. #define _mm_maskz_permute_ps(U, X, C) \
  12246. ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
  12247. (__v4sf)(__m128)_mm_setzero_ps (), \
  12248. (__mmask8)(U)))
  12249. #define _mm256_mask_blend_pd(__U, __A, __W) \
  12250. ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
  12251. (__v4df) (__W), \
  12252. (__mmask8) (__U)))
  12253. #define _mm256_mask_blend_ps(__U, __A, __W) \
  12254. ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
  12255. (__v8sf) (__W), \
  12256. (__mmask8) (__U)))
  12257. #define _mm256_mask_blend_epi64(__U, __A, __W) \
  12258. ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
  12259. (__v4di) (__W), \
  12260. (__mmask8) (__U)))
  12261. #define _mm256_mask_blend_epi32(__U, __A, __W) \
  12262. ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
  12263. (__v8si) (__W), \
  12264. (__mmask8) (__U)))
  12265. #define _mm_mask_blend_pd(__U, __A, __W) \
  12266. ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
  12267. (__v2df) (__W), \
  12268. (__mmask8) (__U)))
  12269. #define _mm_mask_blend_ps(__U, __A, __W) \
  12270. ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
  12271. (__v4sf) (__W), \
  12272. (__mmask8) (__U)))
  12273. #define _mm_mask_blend_epi64(__U, __A, __W) \
  12274. ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
  12275. (__v2di) (__W), \
  12276. (__mmask8) (__U)))
  12277. #define _mm_mask_blend_epi32(__U, __A, __W) \
  12278. ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
  12279. (__v4si) (__W), \
  12280. (__mmask8) (__U)))
  12281. #define _mm256_cmp_epu32_mask(X, Y, P) \
  12282. ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
  12283. (__v8si)(__m256i)(Y), (int)(P),\
  12284. (__mmask8)-1))
  12285. #define _mm256_cmp_epi64_mask(X, Y, P) \
  12286. ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
  12287. (__v4di)(__m256i)(Y), (int)(P),\
  12288. (__mmask8)-1))
  12289. #define _mm256_cmp_epi32_mask(X, Y, P) \
  12290. ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
  12291. (__v8si)(__m256i)(Y), (int)(P),\
  12292. (__mmask8)-1))
  12293. #define _mm256_cmp_epu64_mask(X, Y, P) \
  12294. ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
  12295. (__v4di)(__m256i)(Y), (int)(P),\
  12296. (__mmask8)-1))
  12297. #define _mm256_cmp_pd_mask(X, Y, P) \
  12298. ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
  12299. (__v4df)(__m256d)(Y), (int)(P),\
  12300. (__mmask8)-1))
  12301. #define _mm256_cmp_ps_mask(X, Y, P) \
  12302. ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
  12303. (__v8sf)(__m256)(Y), (int)(P),\
  12304. (__mmask8)-1))
  12305. #define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
  12306. ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
  12307. (__v4di)(__m256i)(Y), (int)(P),\
  12308. (__mmask8)(M)))
  12309. #define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
  12310. ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
  12311. (__v8si)(__m256i)(Y), (int)(P),\
  12312. (__mmask8)(M)))
  12313. #define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
  12314. ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
  12315. (__v4di)(__m256i)(Y), (int)(P),\
  12316. (__mmask8)(M)))
  12317. #define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
  12318. ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
  12319. (__v8si)(__m256i)(Y), (int)(P),\
  12320. (__mmask8)(M)))
  12321. #define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
  12322. ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
  12323. (__v4df)(__m256d)(Y), (int)(P),\
  12324. (__mmask8)(M)))
  12325. #define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
  12326. ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
  12327. (__v8sf)(__m256)(Y), (int)(P),\
  12328. (__mmask8)(M)))
  12329. #define _mm_cmp_epi64_mask(X, Y, P) \
  12330. ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
  12331. (__v2di)(__m128i)(Y), (int)(P),\
  12332. (__mmask8)-1))
  12333. #define _mm_cmp_epi32_mask(X, Y, P) \
  12334. ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
  12335. (__v4si)(__m128i)(Y), (int)(P),\
  12336. (__mmask8)-1))
  12337. #define _mm_cmp_epu64_mask(X, Y, P) \
  12338. ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
  12339. (__v2di)(__m128i)(Y), (int)(P),\
  12340. (__mmask8)-1))
  12341. #define _mm_cmp_epu32_mask(X, Y, P) \
  12342. ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
  12343. (__v4si)(__m128i)(Y), (int)(P),\
  12344. (__mmask8)-1))
  12345. #define _mm_cmp_pd_mask(X, Y, P) \
  12346. ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
  12347. (__v2df)(__m128d)(Y), (int)(P),\
  12348. (__mmask8)-1))
  12349. #define _mm_cmp_ps_mask(X, Y, P) \
  12350. ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
  12351. (__v4sf)(__m128)(Y), (int)(P),\
  12352. (__mmask8)-1))
  12353. #define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
  12354. ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
  12355. (__v2di)(__m128i)(Y), (int)(P),\
  12356. (__mmask8)(M)))
  12357. #define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
  12358. ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
  12359. (__v4si)(__m128i)(Y), (int)(P),\
  12360. (__mmask8)(M)))
  12361. #define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
  12362. ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
  12363. (__v2di)(__m128i)(Y), (int)(P),\
  12364. (__mmask8)(M)))
  12365. #define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
  12366. ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
  12367. (__v4si)(__m128i)(Y), (int)(P),\
  12368. (__mmask8)(M)))
  12369. #define _mm_mask_cmp_pd_mask(M, X, Y, P) \
  12370. ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
  12371. (__v2df)(__m128d)(Y), (int)(P),\
  12372. (__mmask8)(M)))
  12373. #define _mm_mask_cmp_ps_mask(M, X, Y, P) \
  12374. ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
  12375. (__v4sf)(__m128)(Y), (int)(P),\
  12376. (__mmask8)(M)))
  12377. #endif
  12378. #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A))
  12379. #define _mm256_mask_cvt_roundps_ph(A, B, C, D) \
  12380. _mm256_mask_cvtps_ph ((A), (B), (C), (D))
  12381. #define _mm256_maskz_cvt_roundps_ph(A, B, C) \
  12382. _mm256_maskz_cvtps_ph ((A), (B), (C))
  12383. #define _mm_mask_cvt_roundps_ph(A, B, C, D) \
  12384. _mm_mask_cvtps_ph ((A), (B), (C), (D))
  12385. #define _mm_maskz_cvt_roundps_ph(A, B, C) _mm_maskz_cvtps_ph ((A), (B), (C))
  12386. #ifdef __DISABLE_AVX512VL__
  12387. #undef __DISABLE_AVX512VL__
  12388. #pragma GCC pop_options
  12389. #endif /* __DISABLE_AVX512VL__ */
  12390. #endif /* _AVX512VLINTRIN_H_INCLUDED */