avx512vlintrin.h 415 KB


  1. /* Copyright (C) 2014-2019 Free Software Foundation, Inc.
  2. This file is part of GCC.
  3. GCC is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. GCC is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. Under Section 7 of GPL version 3, you are granted additional
  12. permissions described in the GCC Runtime Library Exception, version
  13. 3.1, as published by the Free Software Foundation.
  14. You should have received a copy of the GNU General Public License and
  15. a copy of the GCC Runtime Library Exception along with this program;
  16. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  17. <http://www.gnu.org/licenses/>. */
  18. #ifndef _IMMINTRIN_H_INCLUDED
  19. #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
  20. #endif
  21. #ifndef _AVX512VLINTRIN_H_INCLUDED
  22. #define _AVX512VLINTRIN_H_INCLUDED
  23. #ifndef __AVX512VL__
  24. #pragma GCC push_options
  25. #pragma GCC target("avx512vl")
  26. #define __DISABLE_AVX512VL__
  27. #endif /* __AVX512VL__ */
  28. /* Internal data types for implementing the intrinsics. */
  29. typedef unsigned int __mmask32;
  30. extern __inline __m256d
  31. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  32. _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
  33. {
  34. return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
  35. (__v4df) __W,
  36. (__mmask8) __U);
  37. }
  38. extern __inline __m256d
  39. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  40. _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
  41. {
  42. return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
  43. (__v4df)
  44. _mm256_setzero_pd (),
  45. (__mmask8) __U);
  46. }
  47. extern __inline __m128d
  48. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  49. _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
  50. {
  51. return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
  52. (__v2df) __W,
  53. (__mmask8) __U);
  54. }
  55. extern __inline __m128d
  56. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  57. _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
  58. {
  59. return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
  60. (__v2df)
  61. _mm_setzero_pd (),
  62. (__mmask8) __U);
  63. }
  64. extern __inline __m256d
  65. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  66. _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
  67. {
  68. return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
  69. (__v4df) __W,
  70. (__mmask8) __U);
  71. }
  72. extern __inline __m256d
  73. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  74. _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
  75. {
  76. return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
  77. (__v4df)
  78. _mm256_setzero_pd (),
  79. (__mmask8) __U);
  80. }
  81. extern __inline __m128d
  82. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  83. _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
  84. {
  85. return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
  86. (__v2df) __W,
  87. (__mmask8) __U);
  88. }
  89. extern __inline __m128d
  90. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  91. _mm_maskz_load_pd (__mmask8 __U, void const *__P)
  92. {
  93. return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
  94. (__v2df)
  95. _mm_setzero_pd (),
  96. (__mmask8) __U);
  97. }
  98. extern __inline void
  99. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  100. _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
  101. {
  102. __builtin_ia32_storeapd256_mask ((__v4df *) __P,
  103. (__v4df) __A,
  104. (__mmask8) __U);
  105. }
  106. extern __inline void
  107. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  108. _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
  109. {
  110. __builtin_ia32_storeapd128_mask ((__v2df *) __P,
  111. (__v2df) __A,
  112. (__mmask8) __U);
  113. }
  114. extern __inline __m256
  115. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  116. _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
  117. {
  118. return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
  119. (__v8sf) __W,
  120. (__mmask8) __U);
  121. }
  122. extern __inline __m256
  123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  124. _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
  125. {
  126. return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
  127. (__v8sf)
  128. _mm256_setzero_ps (),
  129. (__mmask8) __U);
  130. }
  131. extern __inline __m128
  132. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  133. _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
  134. {
  135. return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
  136. (__v4sf) __W,
  137. (__mmask8) __U);
  138. }
  139. extern __inline __m128
  140. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  141. _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
  142. {
  143. return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
  144. (__v4sf)
  145. _mm_setzero_ps (),
  146. (__mmask8) __U);
  147. }
  148. extern __inline __m256
  149. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  150. _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
  151. {
  152. return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
  153. (__v8sf) __W,
  154. (__mmask8) __U);
  155. }
  156. extern __inline __m256
  157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  158. _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
  159. {
  160. return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
  161. (__v8sf)
  162. _mm256_setzero_ps (),
  163. (__mmask8) __U);
  164. }
  165. extern __inline __m128
  166. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  167. _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
  168. {
  169. return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
  170. (__v4sf) __W,
  171. (__mmask8) __U);
  172. }
  173. extern __inline __m128
  174. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  175. _mm_maskz_load_ps (__mmask8 __U, void const *__P)
  176. {
  177. return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
  178. (__v4sf)
  179. _mm_setzero_ps (),
  180. (__mmask8) __U);
  181. }
  182. extern __inline void
  183. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  184. _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
  185. {
  186. __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
  187. (__v8sf) __A,
  188. (__mmask8) __U);
  189. }
  190. extern __inline void
  191. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  192. _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
  193. {
  194. __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
  195. (__v4sf) __A,
  196. (__mmask8) __U);
  197. }
  198. extern __inline __m256i
  199. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  200. _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  201. {
  202. return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
  203. (__v4di) __W,
  204. (__mmask8) __U);
  205. }
  206. extern __inline __m256i
  207. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  208. _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
  209. {
  210. return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
  211. (__v4di)
  212. _mm256_setzero_si256 (),
  213. (__mmask8) __U);
  214. }
  215. extern __inline __m128i
  216. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  217. _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  218. {
  219. return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
  220. (__v2di) __W,
  221. (__mmask8) __U);
  222. }
  223. extern __inline __m128i
  224. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  225. _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
  226. {
  227. return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
  228. (__v2di)
  229. _mm_setzero_si128 (),
  230. (__mmask8) __U);
  231. }
  232. extern __inline __m256i
  233. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  234. _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
  235. {
  236. return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
  237. (__v4di) __W,
  238. (__mmask8)
  239. __U);
  240. }
  241. extern __inline __m256i
  242. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  243. _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
  244. {
  245. return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
  246. (__v4di)
  247. _mm256_setzero_si256 (),
  248. (__mmask8)
  249. __U);
  250. }
  251. extern __inline __m128i
  252. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  253. _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
  254. {
  255. return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
  256. (__v2di) __W,
  257. (__mmask8)
  258. __U);
  259. }
  260. extern __inline __m128i
  261. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  262. _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
  263. {
  264. return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
  265. (__v2di)
  266. _mm_setzero_si128 (),
  267. (__mmask8)
  268. __U);
  269. }
  270. extern __inline void
  271. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  272. _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
  273. {
  274. __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
  275. (__v4di) __A,
  276. (__mmask8) __U);
  277. }
  278. extern __inline void
  279. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  280. _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
  281. {
  282. __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
  283. (__v2di) __A,
  284. (__mmask8) __U);
  285. }
  286. extern __inline __m256i
  287. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  288. _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  289. {
  290. return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
  291. (__v8si) __W,
  292. (__mmask8) __U);
  293. }
  294. extern __inline __m256i
  295. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  296. _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
  297. {
  298. return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
  299. (__v8si)
  300. _mm256_setzero_si256 (),
  301. (__mmask8) __U);
  302. }
  303. extern __inline __m128i
  304. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  305. _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  306. {
  307. return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
  308. (__v4si) __W,
  309. (__mmask8) __U);
  310. }
  311. extern __inline __m128i
  312. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  313. _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
  314. {
  315. return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
  316. (__v4si)
  317. _mm_setzero_si128 (),
  318. (__mmask8) __U);
  319. }
  320. extern __inline __m256i
  321. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  322. _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
  323. {
  324. return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
  325. (__v8si) __W,
  326. (__mmask8)
  327. __U);
  328. }
  329. extern __inline __m256i
  330. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  331. _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
  332. {
  333. return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
  334. (__v8si)
  335. _mm256_setzero_si256 (),
  336. (__mmask8)
  337. __U);
  338. }
  339. extern __inline __m128i
  340. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  341. _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
  342. {
  343. return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
  344. (__v4si) __W,
  345. (__mmask8)
  346. __U);
  347. }
  348. extern __inline __m128i
  349. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  350. _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
  351. {
  352. return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
  353. (__v4si)
  354. _mm_setzero_si128 (),
  355. (__mmask8)
  356. __U);
  357. }
  358. extern __inline void
  359. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  360. _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
  361. {
  362. __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
  363. (__v8si) __A,
  364. (__mmask8) __U);
  365. }
  366. extern __inline void
  367. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  368. _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
  369. {
  370. __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
  371. (__v4si) __A,
  372. (__mmask8) __U);
  373. }
  374. extern __inline __m128d
  375. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  376. _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  377. {
  378. return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
  379. (__v2df) __B,
  380. (__v2df) __W,
  381. (__mmask8) __U);
  382. }
  383. extern __inline __m128d
  384. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  385. _mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
  386. {
  387. return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
  388. (__v2df) __B,
  389. (__v2df)
  390. _mm_setzero_pd (),
  391. (__mmask8) __U);
  392. }
  393. extern __inline __m256d
  394. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  395. _mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
  396. __m256d __B)
  397. {
  398. return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
  399. (__v4df) __B,
  400. (__v4df) __W,
  401. (__mmask8) __U);
  402. }
  403. extern __inline __m256d
  404. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  405. _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
  406. {
  407. return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
  408. (__v4df) __B,
  409. (__v4df)
  410. _mm256_setzero_pd (),
  411. (__mmask8) __U);
  412. }
  413. extern __inline __m128
  414. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  415. _mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  416. {
  417. return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
  418. (__v4sf) __B,
  419. (__v4sf) __W,
  420. (__mmask8) __U);
  421. }
  422. extern __inline __m128
  423. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  424. _mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
  425. {
  426. return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
  427. (__v4sf) __B,
  428. (__v4sf)
  429. _mm_setzero_ps (),
  430. (__mmask8) __U);
  431. }
  432. extern __inline __m256
  433. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  434. _mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  435. {
  436. return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
  437. (__v8sf) __B,
  438. (__v8sf) __W,
  439. (__mmask8) __U);
  440. }
  441. extern __inline __m256
  442. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  443. _mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
  444. {
  445. return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
  446. (__v8sf) __B,
  447. (__v8sf)
  448. _mm256_setzero_ps (),
  449. (__mmask8) __U);
  450. }
  451. extern __inline __m128d
  452. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  453. _mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  454. {
  455. return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
  456. (__v2df) __B,
  457. (__v2df) __W,
  458. (__mmask8) __U);
  459. }
  460. extern __inline __m128d
  461. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  462. _mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
  463. {
  464. return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
  465. (__v2df) __B,
  466. (__v2df)
  467. _mm_setzero_pd (),
  468. (__mmask8) __U);
  469. }
  470. extern __inline __m256d
  471. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  472. _mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
  473. __m256d __B)
  474. {
  475. return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
  476. (__v4df) __B,
  477. (__v4df) __W,
  478. (__mmask8) __U);
  479. }
  480. extern __inline __m256d
  481. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  482. _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
  483. {
  484. return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
  485. (__v4df) __B,
  486. (__v4df)
  487. _mm256_setzero_pd (),
  488. (__mmask8) __U);
  489. }
  490. extern __inline __m128
  491. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  492. _mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  493. {
  494. return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
  495. (__v4sf) __B,
  496. (__v4sf) __W,
  497. (__mmask8) __U);
  498. }
  499. extern __inline __m128
  500. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  501. _mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
  502. {
  503. return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
  504. (__v4sf) __B,
  505. (__v4sf)
  506. _mm_setzero_ps (),
  507. (__mmask8) __U);
  508. }
  509. extern __inline __m256
  510. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  511. _mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  512. {
  513. return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
  514. (__v8sf) __B,
  515. (__v8sf) __W,
  516. (__mmask8) __U);
  517. }
  518. extern __inline __m256
  519. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  520. _mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
  521. {
  522. return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
  523. (__v8sf) __B,
  524. (__v8sf)
  525. _mm256_setzero_ps (),
  526. (__mmask8) __U);
  527. }
  528. extern __inline void
  529. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  530. _mm256_store_epi64 (void *__P, __m256i __A)
  531. {
  532. *(__m256i *) __P = __A;
  533. }
  534. extern __inline void
  535. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  536. _mm_store_epi64 (void *__P, __m128i __A)
  537. {
  538. *(__m128i *) __P = __A;
  539. }
  540. extern __inline __m256d
  541. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  542. _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
  543. {
  544. return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
  545. (__v4df) __W,
  546. (__mmask8) __U);
  547. }
  548. extern __inline __m256d
  549. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  550. _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
  551. {
  552. return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
  553. (__v4df)
  554. _mm256_setzero_pd (),
  555. (__mmask8) __U);
  556. }
  557. extern __inline __m128d
  558. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  559. _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
  560. {
  561. return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
  562. (__v2df) __W,
  563. (__mmask8) __U);
  564. }
  565. extern __inline __m128d
  566. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  567. _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
  568. {
  569. return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
  570. (__v2df)
  571. _mm_setzero_pd (),
  572. (__mmask8) __U);
  573. }
  574. extern __inline void
  575. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  576. _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
  577. {
  578. __builtin_ia32_storeupd256_mask ((double *) __P,
  579. (__v4df) __A,
  580. (__mmask8) __U);
  581. }
  582. extern __inline void
  583. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  584. _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
  585. {
  586. __builtin_ia32_storeupd128_mask ((double *) __P,
  587. (__v2df) __A,
  588. (__mmask8) __U);
  589. }
  590. extern __inline __m256
  591. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  592. _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
  593. {
  594. return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
  595. (__v8sf) __W,
  596. (__mmask8) __U);
  597. }
  598. extern __inline __m256
  599. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  600. _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
  601. {
  602. return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
  603. (__v8sf)
  604. _mm256_setzero_ps (),
  605. (__mmask8) __U);
  606. }
  607. extern __inline __m128
  608. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  609. _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
  610. {
  611. return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
  612. (__v4sf) __W,
  613. (__mmask8) __U);
  614. }
  615. extern __inline __m128
  616. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  617. _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
  618. {
  619. return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
  620. (__v4sf)
  621. _mm_setzero_ps (),
  622. (__mmask8) __U);
  623. }
  624. extern __inline void
  625. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  626. _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
  627. {
  628. __builtin_ia32_storeups256_mask ((float *) __P,
  629. (__v8sf) __A,
  630. (__mmask8) __U);
  631. }
  632. extern __inline void
  633. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  634. _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
  635. {
  636. __builtin_ia32_storeups128_mask ((float *) __P,
  637. (__v4sf) __A,
  638. (__mmask8) __U);
  639. }
  640. extern __inline __m256i
  641. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  642. _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
  643. {
  644. return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
  645. (__v4di) __W,
  646. (__mmask8) __U);
  647. }
  648. extern __inline __m256i
  649. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  650. _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
  651. {
  652. return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
  653. (__v4di)
  654. _mm256_setzero_si256 (),
  655. (__mmask8) __U);
  656. }
  657. extern __inline __m128i
  658. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  659. _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
  660. {
  661. return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
  662. (__v2di) __W,
  663. (__mmask8) __U);
  664. }
  665. extern __inline __m128i
  666. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  667. _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
  668. {
  669. return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
  670. (__v2di)
  671. _mm_setzero_si128 (),
  672. (__mmask8) __U);
  673. }
  674. extern __inline void
  675. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  676. _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
  677. {
  678. __builtin_ia32_storedqudi256_mask ((long long *) __P,
  679. (__v4di) __A,
  680. (__mmask8) __U);
  681. }
  682. extern __inline void
  683. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  684. _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
  685. {
  686. __builtin_ia32_storedqudi128_mask ((long long *) __P,
  687. (__v2di) __A,
  688. (__mmask8) __U);
  689. }
  690. extern __inline __m256i
  691. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  692. _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
  693. {
  694. return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
  695. (__v8si) __W,
  696. (__mmask8) __U);
  697. }
  698. extern __inline __m256i
  699. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  700. _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
  701. {
  702. return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
  703. (__v8si)
  704. _mm256_setzero_si256 (),
  705. (__mmask8) __U);
  706. }
  707. extern __inline __m128i
  708. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  709. _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
  710. {
  711. return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
  712. (__v4si) __W,
  713. (__mmask8) __U);
  714. }
  715. extern __inline __m128i
  716. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  717. _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
  718. {
  719. return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
  720. (__v4si)
  721. _mm_setzero_si128 (),
  722. (__mmask8) __U);
  723. }
  724. extern __inline void
  725. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  726. _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
  727. {
  728. __builtin_ia32_storedqusi256_mask ((int *) __P,
  729. (__v8si) __A,
  730. (__mmask8) __U);
  731. }
  732. extern __inline void
  733. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  734. _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
  735. {
  736. __builtin_ia32_storedqusi128_mask ((int *) __P,
  737. (__v4si) __A,
  738. (__mmask8) __U);
  739. }
  740. extern __inline __m256i
  741. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  742. _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  743. {
  744. return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
  745. (__v8si) __W,
  746. (__mmask8) __U);
  747. }
  748. extern __inline __m256i
  749. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  750. _mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
  751. {
  752. return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
  753. (__v8si)
  754. _mm256_setzero_si256 (),
  755. (__mmask8) __U);
  756. }
  757. extern __inline __m128i
  758. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  759. _mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  760. {
  761. return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
  762. (__v4si) __W,
  763. (__mmask8) __U);
  764. }
  765. extern __inline __m128i
  766. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  767. _mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
  768. {
  769. return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
  770. (__v4si)
  771. _mm_setzero_si128 (),
  772. (__mmask8) __U);
  773. }
  774. extern __inline __m256i
  775. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  776. _mm256_abs_epi64 (__m256i __A)
  777. {
  778. return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
  779. (__v4di)
  780. _mm256_setzero_si256 (),
  781. (__mmask8) -1);
  782. }
  783. extern __inline __m256i
  784. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  785. _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  786. {
  787. return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
  788. (__v4di) __W,
  789. (__mmask8) __U);
  790. }
  791. extern __inline __m256i
  792. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  793. _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
  794. {
  795. return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
  796. (__v4di)
  797. _mm256_setzero_si256 (),
  798. (__mmask8) __U);
  799. }
  800. extern __inline __m128i
  801. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  802. _mm_abs_epi64 (__m128i __A)
  803. {
  804. return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
  805. (__v2di)
  806. _mm_setzero_si128 (),
  807. (__mmask8) -1);
  808. }
  809. extern __inline __m128i
  810. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  811. _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  812. {
  813. return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
  814. (__v2di) __W,
  815. (__mmask8) __U);
  816. }
  817. extern __inline __m128i
  818. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  819. _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
  820. {
  821. return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
  822. (__v2di)
  823. _mm_setzero_si128 (),
  824. (__mmask8) __U);
  825. }
  826. extern __inline __m128i
  827. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  828. _mm256_cvtpd_epu32 (__m256d __A)
  829. {
  830. return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
  831. (__v4si)
  832. _mm_setzero_si128 (),
  833. (__mmask8) -1);
  834. }
  835. extern __inline __m128i
  836. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  837. _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
  838. {
  839. return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
  840. (__v4si) __W,
  841. (__mmask8) __U);
  842. }
  843. extern __inline __m128i
  844. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  845. _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
  846. {
  847. return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
  848. (__v4si)
  849. _mm_setzero_si128 (),
  850. (__mmask8) __U);
  851. }
  852. extern __inline __m128i
  853. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  854. _mm_cvtpd_epu32 (__m128d __A)
  855. {
  856. return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
  857. (__v4si)
  858. _mm_setzero_si128 (),
  859. (__mmask8) -1);
  860. }
  861. extern __inline __m128i
  862. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  863. _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
  864. {
  865. return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
  866. (__v4si) __W,
  867. (__mmask8) __U);
  868. }
  869. extern __inline __m128i
  870. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  871. _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
  872. {
  873. return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
  874. (__v4si)
  875. _mm_setzero_si128 (),
  876. (__mmask8) __U);
  877. }
  878. extern __inline __m256i
  879. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  880. _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
  881. {
  882. return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
  883. (__v8si) __W,
  884. (__mmask8) __U);
  885. }
  886. extern __inline __m256i
  887. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  888. _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
  889. {
  890. return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
  891. (__v8si)
  892. _mm256_setzero_si256 (),
  893. (__mmask8) __U);
  894. }
  895. extern __inline __m128i
  896. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  897. _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
  898. {
  899. return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
  900. (__v4si) __W,
  901. (__mmask8) __U);
  902. }
  903. extern __inline __m128i
  904. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  905. _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
  906. {
  907. return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
  908. (__v4si)
  909. _mm_setzero_si128 (),
  910. (__mmask8) __U);
  911. }
  912. extern __inline __m256i
  913. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  914. _mm256_cvttps_epu32 (__m256 __A)
  915. {
  916. return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
  917. (__v8si)
  918. _mm256_setzero_si256 (),
  919. (__mmask8) -1);
  920. }
  921. extern __inline __m256i
  922. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  923. _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
  924. {
  925. return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
  926. (__v8si) __W,
  927. (__mmask8) __U);
  928. }
  929. extern __inline __m256i
  930. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  931. _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
  932. {
  933. return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
  934. (__v8si)
  935. _mm256_setzero_si256 (),
  936. (__mmask8) __U);
  937. }
  938. extern __inline __m128i
  939. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  940. _mm_cvttps_epu32 (__m128 __A)
  941. {
  942. return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
  943. (__v4si)
  944. _mm_setzero_si128 (),
  945. (__mmask8) -1);
  946. }
  947. extern __inline __m128i
  948. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  949. _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
  950. {
  951. return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
  952. (__v4si) __W,
  953. (__mmask8) __U);
  954. }
  955. extern __inline __m128i
  956. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  957. _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
  958. {
  959. return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
  960. (__v4si)
  961. _mm_setzero_si128 (),
  962. (__mmask8) __U);
  963. }
  964. extern __inline __m128i
  965. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  966. _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
  967. {
  968. return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
  969. (__v4si) __W,
  970. (__mmask8) __U);
  971. }
  972. extern __inline __m128i
  973. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  974. _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
  975. {
  976. return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
  977. (__v4si)
  978. _mm_setzero_si128 (),
  979. (__mmask8) __U);
  980. }
  981. extern __inline __m128i
  982. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  983. _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
  984. {
  985. return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
  986. (__v4si) __W,
  987. (__mmask8) __U);
  988. }
  989. extern __inline __m128i
  990. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  991. _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
  992. {
  993. return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
  994. (__v4si)
  995. _mm_setzero_si128 (),
  996. (__mmask8) __U);
  997. }
  998. extern __inline __m128i
  999. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1000. _mm256_cvttpd_epu32 (__m256d __A)
  1001. {
  1002. return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
  1003. (__v4si)
  1004. _mm_setzero_si128 (),
  1005. (__mmask8) -1);
  1006. }
  1007. extern __inline __m128i
  1008. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1009. _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
  1010. {
  1011. return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
  1012. (__v4si) __W,
  1013. (__mmask8) __U);
  1014. }
  1015. extern __inline __m128i
  1016. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1017. _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
  1018. {
  1019. return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
  1020. (__v4si)
  1021. _mm_setzero_si128 (),
  1022. (__mmask8) __U);
  1023. }
  1024. extern __inline __m128i
  1025. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1026. _mm_cvttpd_epu32 (__m128d __A)
  1027. {
  1028. return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
  1029. (__v4si)
  1030. _mm_setzero_si128 (),
  1031. (__mmask8) -1);
  1032. }
  1033. extern __inline __m128i
  1034. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1035. _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
  1036. {
  1037. return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
  1038. (__v4si) __W,
  1039. (__mmask8) __U);
  1040. }
  1041. extern __inline __m128i
  1042. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1043. _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
  1044. {
  1045. return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
  1046. (__v4si)
  1047. _mm_setzero_si128 (),
  1048. (__mmask8) __U);
  1049. }
  1050. extern __inline __m128i
  1051. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1052. _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
  1053. {
  1054. return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
  1055. (__v4si) __W,
  1056. (__mmask8) __U);
  1057. }
  1058. extern __inline __m128i
  1059. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1060. _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
  1061. {
  1062. return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
  1063. (__v4si)
  1064. _mm_setzero_si128 (),
  1065. (__mmask8) __U);
  1066. }
  1067. extern __inline __m128i
  1068. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1069. _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
  1070. {
  1071. return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
  1072. (__v4si) __W,
  1073. (__mmask8) __U);
  1074. }
  1075. extern __inline __m128i
  1076. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1077. _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
  1078. {
  1079. return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
  1080. (__v4si)
  1081. _mm_setzero_si128 (),
  1082. (__mmask8) __U);
  1083. }
  1084. extern __inline __m256d
  1085. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1086. _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
  1087. {
  1088. return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
  1089. (__v4df) __W,
  1090. (__mmask8) __U);
  1091. }
  1092. extern __inline __m256d
  1093. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1094. _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
  1095. {
  1096. return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
  1097. (__v4df)
  1098. _mm256_setzero_pd (),
  1099. (__mmask8) __U);
  1100. }
  1101. extern __inline __m128d
  1102. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1103. _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
  1104. {
  1105. return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
  1106. (__v2df) __W,
  1107. (__mmask8) __U);
  1108. }
  1109. extern __inline __m128d
  1110. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1111. _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
  1112. {
  1113. return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
  1114. (__v2df)
  1115. _mm_setzero_pd (),
  1116. (__mmask8) __U);
  1117. }
  1118. extern __inline __m256d
  1119. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1120. _mm256_cvtepu32_pd (__m128i __A)
  1121. {
  1122. return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
  1123. (__v4df)
  1124. _mm256_setzero_pd (),
  1125. (__mmask8) -1);
  1126. }
  1127. extern __inline __m256d
  1128. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1129. _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
  1130. {
  1131. return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
  1132. (__v4df) __W,
  1133. (__mmask8) __U);
  1134. }
  1135. extern __inline __m256d
  1136. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1137. _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
  1138. {
  1139. return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
  1140. (__v4df)
  1141. _mm256_setzero_pd (),
  1142. (__mmask8) __U);
  1143. }
  1144. extern __inline __m128d
  1145. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1146. _mm_cvtepu32_pd (__m128i __A)
  1147. {
  1148. return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
  1149. (__v2df)
  1150. _mm_setzero_pd (),
  1151. (__mmask8) -1);
  1152. }
  1153. extern __inline __m128d
  1154. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1155. _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
  1156. {
  1157. return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
  1158. (__v2df) __W,
  1159. (__mmask8) __U);
  1160. }
  1161. extern __inline __m128d
  1162. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1163. _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
  1164. {
  1165. return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
  1166. (__v2df)
  1167. _mm_setzero_pd (),
  1168. (__mmask8) __U);
  1169. }
  1170. extern __inline __m256
  1171. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1172. _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
  1173. {
  1174. return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
  1175. (__v8sf) __W,
  1176. (__mmask8) __U);
  1177. }
  1178. extern __inline __m256
  1179. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1180. _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
  1181. {
  1182. return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
  1183. (__v8sf)
  1184. _mm256_setzero_ps (),
  1185. (__mmask8) __U);
  1186. }
  1187. extern __inline __m128
  1188. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1189. _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
  1190. {
  1191. return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
  1192. (__v4sf) __W,
  1193. (__mmask8) __U);
  1194. }
  1195. extern __inline __m128
  1196. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1197. _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
  1198. {
  1199. return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
  1200. (__v4sf)
  1201. _mm_setzero_ps (),
  1202. (__mmask8) __U);
  1203. }
  1204. extern __inline __m256
  1205. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1206. _mm256_cvtepu32_ps (__m256i __A)
  1207. {
  1208. return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
  1209. (__v8sf)
  1210. _mm256_setzero_ps (),
  1211. (__mmask8) -1);
  1212. }
  1213. extern __inline __m256
  1214. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1215. _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
  1216. {
  1217. return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
  1218. (__v8sf) __W,
  1219. (__mmask8) __U);
  1220. }
  1221. extern __inline __m256
  1222. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1223. _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
  1224. {
  1225. return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
  1226. (__v8sf)
  1227. _mm256_setzero_ps (),
  1228. (__mmask8) __U);
  1229. }
  1230. extern __inline __m128
  1231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1232. _mm_cvtepu32_ps (__m128i __A)
  1233. {
  1234. return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
  1235. (__v4sf)
  1236. _mm_setzero_ps (),
  1237. (__mmask8) -1);
  1238. }
  1239. extern __inline __m128
  1240. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1241. _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
  1242. {
  1243. return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
  1244. (__v4sf) __W,
  1245. (__mmask8) __U);
  1246. }
  1247. extern __inline __m128
  1248. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1249. _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
  1250. {
  1251. return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
  1252. (__v4sf)
  1253. _mm_setzero_ps (),
  1254. (__mmask8) __U);
  1255. }
  1256. extern __inline __m256d
  1257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1258. _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
  1259. {
  1260. return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
  1261. (__v4df) __W,
  1262. (__mmask8) __U);
  1263. }
  1264. extern __inline __m256d
  1265. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1266. _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
  1267. {
  1268. return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
  1269. (__v4df)
  1270. _mm256_setzero_pd (),
  1271. (__mmask8) __U);
  1272. }
  1273. extern __inline __m128d
  1274. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1275. _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
  1276. {
  1277. return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
  1278. (__v2df) __W,
  1279. (__mmask8) __U);
  1280. }
  1281. extern __inline __m128d
  1282. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1283. _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
  1284. {
  1285. return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
  1286. (__v2df)
  1287. _mm_setzero_pd (),
  1288. (__mmask8) __U);
  1289. }
  1290. extern __inline __m128i
  1291. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1292. _mm_cvtepi32_epi8 (__m128i __A)
  1293. {
  1294. return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
  1295. (__v16qi)
  1296. _mm_undefined_si128 (),
  1297. (__mmask8) -1);
  1298. }
  1299. extern __inline void
  1300. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1301. _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1302. {
  1303. __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
  1304. }
  1305. extern __inline __m128i
  1306. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1307. _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1308. {
  1309. return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
  1310. (__v16qi) __O, __M);
  1311. }
  1312. extern __inline __m128i
  1313. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1314. _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
  1315. {
  1316. return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
  1317. (__v16qi)
  1318. _mm_setzero_si128 (),
  1319. __M);
  1320. }
  1321. extern __inline __m128i
  1322. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1323. _mm256_cvtepi32_epi8 (__m256i __A)
  1324. {
  1325. return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
  1326. (__v16qi)
  1327. _mm_undefined_si128 (),
  1328. (__mmask8) -1);
  1329. }
  1330. extern __inline __m128i
  1331. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1332. _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1333. {
  1334. return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
  1335. (__v16qi) __O, __M);
  1336. }
  1337. extern __inline void
  1338. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1339. _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1340. {
  1341. __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
  1342. }
  1343. extern __inline __m128i
  1344. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1345. _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
  1346. {
  1347. return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
  1348. (__v16qi)
  1349. _mm_setzero_si128 (),
  1350. __M);
  1351. }
  1352. extern __inline __m128i
  1353. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1354. _mm_cvtsepi32_epi8 (__m128i __A)
  1355. {
  1356. return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
  1357. (__v16qi)
  1358. _mm_undefined_si128 (),
  1359. (__mmask8) -1);
  1360. }
  1361. extern __inline void
  1362. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1363. _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1364. {
  1365. __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
  1366. }
  1367. extern __inline __m128i
  1368. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1369. _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1370. {
  1371. return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
  1372. (__v16qi) __O, __M);
  1373. }
  1374. extern __inline __m128i
  1375. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1376. _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
  1377. {
  1378. return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
  1379. (__v16qi)
  1380. _mm_setzero_si128 (),
  1381. __M);
  1382. }
  1383. extern __inline __m128i
  1384. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1385. _mm256_cvtsepi32_epi8 (__m256i __A)
  1386. {
  1387. return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
  1388. (__v16qi)
  1389. _mm_undefined_si128 (),
  1390. (__mmask8) -1);
  1391. }
  1392. extern __inline void
  1393. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1394. _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1395. {
  1396. __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
  1397. }
  1398. extern __inline __m128i
  1399. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1400. _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1401. {
  1402. return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
  1403. (__v16qi) __O, __M);
  1404. }
  1405. extern __inline __m128i
  1406. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1407. _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
  1408. {
  1409. return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
  1410. (__v16qi)
  1411. _mm_setzero_si128 (),
  1412. __M);
  1413. }
  1414. extern __inline __m128i
  1415. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1416. _mm_cvtusepi32_epi8 (__m128i __A)
  1417. {
  1418. return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
  1419. (__v16qi)
  1420. _mm_undefined_si128 (),
  1421. (__mmask8) -1);
  1422. }
  1423. extern __inline void
  1424. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1425. _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1426. {
  1427. __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
  1428. }
  1429. extern __inline __m128i
  1430. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1431. _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1432. {
  1433. return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
  1434. (__v16qi) __O,
  1435. __M);
  1436. }
  1437. extern __inline __m128i
  1438. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1439. _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
  1440. {
  1441. return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
  1442. (__v16qi)
  1443. _mm_setzero_si128 (),
  1444. __M);
  1445. }
  1446. extern __inline __m128i
  1447. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1448. _mm256_cvtusepi32_epi8 (__m256i __A)
  1449. {
  1450. return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
  1451. (__v16qi)
  1452. _mm_undefined_si128 (),
  1453. (__mmask8) -1);
  1454. }
  1455. extern __inline void
  1456. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1457. _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1458. {
  1459. __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
  1460. }
  1461. extern __inline __m128i
  1462. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1463. _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1464. {
  1465. return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
  1466. (__v16qi) __O,
  1467. __M);
  1468. }
  1469. extern __inline __m128i
  1470. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1471. _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
  1472. {
  1473. return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
  1474. (__v16qi)
  1475. _mm_setzero_si128 (),
  1476. __M);
  1477. }
  1478. extern __inline __m128i
  1479. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1480. _mm_cvtepi32_epi16 (__m128i __A)
  1481. {
  1482. return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
  1483. (__v8hi)
  1484. _mm_setzero_si128 (),
  1485. (__mmask8) -1);
  1486. }
  1487. extern __inline void
  1488. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1489. _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  1490. {
  1491. __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
  1492. }
  1493. extern __inline __m128i
  1494. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1495. _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  1496. {
  1497. return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
  1498. (__v8hi) __O, __M);
  1499. }
  1500. extern __inline __m128i
  1501. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1502. _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
  1503. {
  1504. return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
  1505. (__v8hi)
  1506. _mm_setzero_si128 (),
  1507. __M);
  1508. }
  1509. extern __inline __m128i
  1510. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1511. _mm256_cvtepi32_epi16 (__m256i __A)
  1512. {
  1513. return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
  1514. (__v8hi)
  1515. _mm_setzero_si128 (),
  1516. (__mmask8) -1);
  1517. }
  1518. extern __inline void
  1519. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1520. _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  1521. {
  1522. __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
  1523. }
  1524. extern __inline __m128i
  1525. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1526. _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  1527. {
  1528. return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
  1529. (__v8hi) __O, __M);
  1530. }
  1531. extern __inline __m128i
  1532. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1533. _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
  1534. {
  1535. return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
  1536. (__v8hi)
  1537. _mm_setzero_si128 (),
  1538. __M);
  1539. }
  1540. extern __inline __m128i
  1541. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1542. _mm_cvtsepi32_epi16 (__m128i __A)
  1543. {
  1544. return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
  1545. (__v8hi)
  1546. _mm_setzero_si128 (),
  1547. (__mmask8) -1);
  1548. }
  1549. extern __inline void
  1550. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1551. _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  1552. {
  1553. __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
  1554. }
  1555. extern __inline __m128i
  1556. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1557. _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  1558. {
  1559. return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
  1560. (__v8hi)__O,
  1561. __M);
  1562. }
  1563. extern __inline __m128i
  1564. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1565. _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
  1566. {
  1567. return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
  1568. (__v8hi)
  1569. _mm_setzero_si128 (),
  1570. __M);
  1571. }
  1572. extern __inline __m128i
  1573. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1574. _mm256_cvtsepi32_epi16 (__m256i __A)
  1575. {
  1576. return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
  1577. (__v8hi)
  1578. _mm_undefined_si128 (),
  1579. (__mmask8) -1);
  1580. }
  1581. extern __inline void
  1582. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1583. _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  1584. {
  1585. __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
  1586. }
  1587. extern __inline __m128i
  1588. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1589. _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  1590. {
  1591. return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
  1592. (__v8hi) __O, __M);
  1593. }
  1594. extern __inline __m128i
  1595. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1596. _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
  1597. {
  1598. return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
  1599. (__v8hi)
  1600. _mm_setzero_si128 (),
  1601. __M);
  1602. }
  1603. extern __inline __m128i
  1604. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1605. _mm_cvtusepi32_epi16 (__m128i __A)
  1606. {
  1607. return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
  1608. (__v8hi)
  1609. _mm_undefined_si128 (),
  1610. (__mmask8) -1);
  1611. }
  1612. extern __inline void
  1613. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1614. _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  1615. {
  1616. __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
  1617. }
  1618. extern __inline __m128i
  1619. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1620. _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  1621. {
  1622. return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
  1623. (__v8hi) __O, __M);
  1624. }
  1625. extern __inline __m128i
  1626. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1627. _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
  1628. {
  1629. return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
  1630. (__v8hi)
  1631. _mm_setzero_si128 (),
  1632. __M);
  1633. }
  1634. extern __inline __m128i
  1635. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1636. _mm256_cvtusepi32_epi16 (__m256i __A)
  1637. {
  1638. return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
  1639. (__v8hi)
  1640. _mm_undefined_si128 (),
  1641. (__mmask8) -1);
  1642. }
  1643. extern __inline void
  1644. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1645. _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  1646. {
  1647. __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
  1648. }
  1649. extern __inline __m128i
  1650. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1651. _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  1652. {
  1653. return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
  1654. (__v8hi) __O, __M);
  1655. }
  1656. extern __inline __m128i
  1657. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1658. _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
  1659. {
  1660. return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
  1661. (__v8hi)
  1662. _mm_setzero_si128 (),
  1663. __M);
  1664. }
  1665. extern __inline __m128i
  1666. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1667. _mm_cvtepi64_epi8 (__m128i __A)
  1668. {
  1669. return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
  1670. (__v16qi)
  1671. _mm_undefined_si128 (),
  1672. (__mmask8) -1);
  1673. }
  1674. extern __inline void
  1675. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1676. _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1677. {
  1678. __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
  1679. }
  1680. extern __inline __m128i
  1681. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1682. _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1683. {
  1684. return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
  1685. (__v16qi) __O, __M);
  1686. }
  1687. extern __inline __m128i
  1688. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1689. _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
  1690. {
  1691. return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
  1692. (__v16qi)
  1693. _mm_setzero_si128 (),
  1694. __M);
  1695. }
  1696. extern __inline __m128i
  1697. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1698. _mm256_cvtepi64_epi8 (__m256i __A)
  1699. {
  1700. return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
  1701. (__v16qi)
  1702. _mm_undefined_si128 (),
  1703. (__mmask8) -1);
  1704. }
  1705. extern __inline void
  1706. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1707. _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1708. {
  1709. __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
  1710. }
  1711. extern __inline __m128i
  1712. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1713. _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1714. {
  1715. return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
  1716. (__v16qi) __O, __M);
  1717. }
  1718. extern __inline __m128i
  1719. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1720. _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
  1721. {
  1722. return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
  1723. (__v16qi)
  1724. _mm_setzero_si128 (),
  1725. __M);
  1726. }
  1727. extern __inline __m128i
  1728. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1729. _mm_cvtsepi64_epi8 (__m128i __A)
  1730. {
  1731. return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
  1732. (__v16qi)
  1733. _mm_undefined_si128 (),
  1734. (__mmask8) -1);
  1735. }
  1736. extern __inline void
  1737. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1738. _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1739. {
  1740. __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
  1741. }
  1742. extern __inline __m128i
  1743. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1744. _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1745. {
  1746. return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
  1747. (__v16qi) __O, __M);
  1748. }
  1749. extern __inline __m128i
  1750. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1751. _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
  1752. {
  1753. return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
  1754. (__v16qi)
  1755. _mm_setzero_si128 (),
  1756. __M);
  1757. }
  1758. extern __inline __m128i
  1759. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1760. _mm256_cvtsepi64_epi8 (__m256i __A)
  1761. {
  1762. return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
  1763. (__v16qi)
  1764. _mm_undefined_si128 (),
  1765. (__mmask8) -1);
  1766. }
  1767. extern __inline void
  1768. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1769. _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1770. {
  1771. __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
  1772. }
  1773. extern __inline __m128i
  1774. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1775. _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1776. {
  1777. return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
  1778. (__v16qi) __O, __M);
  1779. }
  1780. extern __inline __m128i
  1781. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1782. _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
  1783. {
  1784. return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
  1785. (__v16qi)
  1786. _mm_setzero_si128 (),
  1787. __M);
  1788. }
  1789. extern __inline __m128i
  1790. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1791. _mm_cvtusepi64_epi8 (__m128i __A)
  1792. {
  1793. return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
  1794. (__v16qi)
  1795. _mm_undefined_si128 (),
  1796. (__mmask8) -1);
  1797. }
  1798. extern __inline void
  1799. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1800. _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  1801. {
  1802. __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
  1803. }
  1804. extern __inline __m128i
  1805. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1806. _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  1807. {
  1808. return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
  1809. (__v16qi) __O,
  1810. __M);
  1811. }
  1812. extern __inline __m128i
  1813. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1814. _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
  1815. {
  1816. return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
  1817. (__v16qi)
  1818. _mm_setzero_si128 (),
  1819. __M);
  1820. }
  1821. extern __inline __m128i
  1822. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1823. _mm256_cvtusepi64_epi8 (__m256i __A)
  1824. {
  1825. return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
  1826. (__v16qi)
  1827. _mm_undefined_si128 (),
  1828. (__mmask8) -1);
  1829. }
  1830. extern __inline void
  1831. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1832. _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  1833. {
  1834. __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
  1835. }
  1836. extern __inline __m128i
  1837. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1838. _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  1839. {
  1840. return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
  1841. (__v16qi) __O,
  1842. __M);
  1843. }
  1844. extern __inline __m128i
  1845. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1846. _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
  1847. {
  1848. return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
  1849. (__v16qi)
  1850. _mm_setzero_si128 (),
  1851. __M);
  1852. }
  1853. extern __inline __m128i
  1854. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1855. _mm_cvtepi64_epi16 (__m128i __A)
  1856. {
  1857. return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
  1858. (__v8hi)
  1859. _mm_undefined_si128 (),
  1860. (__mmask8) -1);
  1861. }
  1862. extern __inline void
  1863. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1864. _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  1865. {
  1866. __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
  1867. }
  1868. extern __inline __m128i
  1869. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1870. _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  1871. {
  1872. return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
  1873. (__v8hi)__O,
  1874. __M);
  1875. }
  1876. extern __inline __m128i
  1877. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1878. _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
  1879. {
  1880. return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
  1881. (__v8hi)
  1882. _mm_setzero_si128 (),
  1883. __M);
  1884. }
  1885. extern __inline __m128i
  1886. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1887. _mm256_cvtepi64_epi16 (__m256i __A)
  1888. {
  1889. return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
  1890. (__v8hi)
  1891. _mm_undefined_si128 (),
  1892. (__mmask8) -1);
  1893. }
  1894. extern __inline void
  1895. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1896. _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  1897. {
  1898. __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
  1899. }
  1900. extern __inline __m128i
  1901. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1902. _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  1903. {
  1904. return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
  1905. (__v8hi) __O, __M);
  1906. }
  1907. extern __inline __m128i
  1908. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1909. _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
  1910. {
  1911. return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
  1912. (__v8hi)
  1913. _mm_setzero_si128 (),
  1914. __M);
  1915. }
  1916. extern __inline __m128i
  1917. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1918. _mm_cvtsepi64_epi16 (__m128i __A)
  1919. {
  1920. return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
  1921. (__v8hi)
  1922. _mm_undefined_si128 (),
  1923. (__mmask8) -1);
  1924. }
  1925. extern __inline void
  1926. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1927. _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  1928. {
  1929. __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
  1930. }
  1931. extern __inline __m128i
  1932. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1933. _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  1934. {
  1935. return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
  1936. (__v8hi) __O, __M);
  1937. }
  1938. extern __inline __m128i
  1939. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1940. _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
  1941. {
  1942. return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
  1943. (__v8hi)
  1944. _mm_setzero_si128 (),
  1945. __M);
  1946. }
  1947. extern __inline __m128i
  1948. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1949. _mm256_cvtsepi64_epi16 (__m256i __A)
  1950. {
  1951. return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
  1952. (__v8hi)
  1953. _mm_undefined_si128 (),
  1954. (__mmask8) -1);
  1955. }
  1956. extern __inline void
  1957. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1958. _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  1959. {
  1960. __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
  1961. }
  1962. extern __inline __m128i
  1963. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1964. _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  1965. {
  1966. return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
  1967. (__v8hi) __O, __M);
  1968. }
  1969. extern __inline __m128i
  1970. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1971. _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
  1972. {
  1973. return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
  1974. (__v8hi)
  1975. _mm_setzero_si128 (),
  1976. __M);
  1977. }
  1978. extern __inline __m128i
  1979. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1980. _mm_cvtusepi64_epi16 (__m128i __A)
  1981. {
  1982. return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
  1983. (__v8hi)
  1984. _mm_undefined_si128 (),
  1985. (__mmask8) -1);
  1986. }
  1987. extern __inline void
  1988. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1989. _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  1990. {
  1991. __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
  1992. }
  1993. extern __inline __m128i
  1994. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1995. _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  1996. {
  1997. return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
  1998. (__v8hi) __O, __M);
  1999. }
  2000. extern __inline __m128i
  2001. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2002. _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
  2003. {
  2004. return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
  2005. (__v8hi)
  2006. _mm_setzero_si128 (),
  2007. __M);
  2008. }
  2009. extern __inline __m128i
  2010. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2011. _mm256_cvtusepi64_epi16 (__m256i __A)
  2012. {
  2013. return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
  2014. (__v8hi)
  2015. _mm_undefined_si128 (),
  2016. (__mmask8) -1);
  2017. }
  2018. extern __inline void
  2019. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2020. _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  2021. {
  2022. __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
  2023. }
  2024. extern __inline __m128i
  2025. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2026. _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  2027. {
  2028. return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
  2029. (__v8hi) __O, __M);
  2030. }
  2031. extern __inline __m128i
  2032. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2033. _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
  2034. {
  2035. return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
  2036. (__v8hi)
  2037. _mm_setzero_si128 (),
  2038. __M);
  2039. }
  2040. extern __inline __m128i
  2041. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2042. _mm_cvtepi64_epi32 (__m128i __A)
  2043. {
  2044. return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
  2045. (__v4si)
  2046. _mm_undefined_si128 (),
  2047. (__mmask8) -1);
  2048. }
  2049. extern __inline void
  2050. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2051. _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
  2052. {
  2053. __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
  2054. }
  2055. extern __inline __m128i
  2056. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2057. _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  2058. {
  2059. return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
  2060. (__v4si) __O, __M);
  2061. }
  2062. extern __inline __m128i
  2063. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2064. _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
  2065. {
  2066. return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
  2067. (__v4si)
  2068. _mm_setzero_si128 (),
  2069. __M);
  2070. }
  2071. extern __inline __m128i
  2072. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2073. _mm256_cvtepi64_epi32 (__m256i __A)
  2074. {
  2075. return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
  2076. (__v4si)
  2077. _mm_undefined_si128 (),
  2078. (__mmask8) -1);
  2079. }
  2080. extern __inline void
  2081. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2082. _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
  2083. {
  2084. __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
  2085. }
  2086. extern __inline __m128i
  2087. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2088. _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
  2089. {
  2090. return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
  2091. (__v4si) __O, __M);
  2092. }
  2093. extern __inline __m128i
  2094. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2095. _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
  2096. {
  2097. return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
  2098. (__v4si)
  2099. _mm_setzero_si128 (),
  2100. __M);
  2101. }
  2102. extern __inline __m128i
  2103. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2104. _mm_cvtsepi64_epi32 (__m128i __A)
  2105. {
  2106. return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
  2107. (__v4si)
  2108. _mm_undefined_si128 (),
  2109. (__mmask8) -1);
  2110. }
  2111. extern __inline void
  2112. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2113. _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
  2114. {
  2115. __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
  2116. }
  2117. extern __inline __m128i
  2118. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2119. _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  2120. {
  2121. return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
  2122. (__v4si) __O, __M);
  2123. }
  2124. extern __inline __m128i
  2125. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2126. _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
  2127. {
  2128. return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
  2129. (__v4si)
  2130. _mm_setzero_si128 (),
  2131. __M);
  2132. }
  2133. extern __inline __m128i
  2134. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2135. _mm256_cvtsepi64_epi32 (__m256i __A)
  2136. {
  2137. return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
  2138. (__v4si)
  2139. _mm_undefined_si128 (),
  2140. (__mmask8) -1);
  2141. }
  2142. extern __inline void
  2143. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2144. _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
  2145. {
  2146. __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
  2147. }
  2148. extern __inline __m128i
  2149. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2150. _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
  2151. {
  2152. return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
  2153. (__v4si)__O,
  2154. __M);
  2155. }
  2156. extern __inline __m128i
  2157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2158. _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
  2159. {
  2160. return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
  2161. (__v4si)
  2162. _mm_setzero_si128 (),
  2163. __M);
  2164. }
  2165. extern __inline __m128i
  2166. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2167. _mm_cvtusepi64_epi32 (__m128i __A)
  2168. {
  2169. return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
  2170. (__v4si)
  2171. _mm_undefined_si128 (),
  2172. (__mmask8) -1);
  2173. }
  2174. extern __inline void
  2175. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2176. _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
  2177. {
  2178. __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
  2179. }
  2180. extern __inline __m128i
  2181. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2182. _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  2183. {
  2184. return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
  2185. (__v4si) __O, __M);
  2186. }
  2187. extern __inline __m128i
  2188. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2189. _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
  2190. {
  2191. return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
  2192. (__v4si)
  2193. _mm_setzero_si128 (),
  2194. __M);
  2195. }
  2196. extern __inline __m128i
  2197. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2198. _mm256_cvtusepi64_epi32 (__m256i __A)
  2199. {
  2200. return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
  2201. (__v4si)
  2202. _mm_undefined_si128 (),
  2203. (__mmask8) -1);
  2204. }
  2205. extern __inline void
  2206. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2207. _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
  2208. {
  2209. __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
  2210. }
  2211. extern __inline __m128i
  2212. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2213. _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
  2214. {
  2215. return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
  2216. (__v4si) __O, __M);
  2217. }
  2218. extern __inline __m128i
  2219. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2220. _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
  2221. {
  2222. return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
  2223. (__v4si)
  2224. _mm_setzero_si128 (),
  2225. __M);
  2226. }
  2227. extern __inline __m256
  2228. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2229. _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
  2230. {
  2231. return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
  2232. (__v8sf) __O,
  2233. __M);
  2234. }
  2235. extern __inline __m256
  2236. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2237. _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
  2238. {
  2239. return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
  2240. (__v8sf)
  2241. _mm256_setzero_ps (),
  2242. __M);
  2243. }
  2244. extern __inline __m128
  2245. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2246. _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
  2247. {
  2248. return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
  2249. (__v4sf) __O,
  2250. __M);
  2251. }
  2252. extern __inline __m128
  2253. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2254. _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
  2255. {
  2256. return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
  2257. (__v4sf)
  2258. _mm_setzero_ps (),
  2259. __M);
  2260. }
  2261. extern __inline __m256d
  2262. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2263. _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
  2264. {
  2265. return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
  2266. (__v4df) __O,
  2267. __M);
  2268. }
  2269. extern __inline __m256d
  2270. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2271. _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
  2272. {
  2273. return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
  2274. (__v4df)
  2275. _mm256_setzero_pd (),
  2276. __M);
  2277. }
  2278. extern __inline __m256i
  2279. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2280. _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
  2281. {
  2282. return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
  2283. (__v8si) __O,
  2284. __M);
  2285. }
  2286. extern __inline __m256i
  2287. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2288. _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
  2289. {
  2290. return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
  2291. (__v8si)
  2292. _mm256_setzero_si256 (),
  2293. __M);
  2294. }
  2295. extern __inline __m256i
  2296. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2297. _mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
  2298. {
  2299. return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
  2300. __M);
  2301. }
  2302. extern __inline __m256i
  2303. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2304. _mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
  2305. {
  2306. return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
  2307. (__v8si)
  2308. _mm256_setzero_si256 (),
  2309. __M);
  2310. }
  2311. extern __inline __m128i
  2312. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2313. _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  2314. {
  2315. return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
  2316. (__v4si) __O,
  2317. __M);
  2318. }
  2319. extern __inline __m128i
  2320. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2321. _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
  2322. {
  2323. return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
  2324. (__v4si)
  2325. _mm_setzero_si128 (),
  2326. __M);
  2327. }
  2328. extern __inline __m128i
  2329. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2330. _mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
  2331. {
  2332. return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
  2333. __M);
  2334. }
  2335. extern __inline __m128i
  2336. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2337. _mm_maskz_set1_epi32 (__mmask8 __M, int __A)
  2338. {
  2339. return (__m128i)
  2340. __builtin_ia32_pbroadcastd128_gpr_mask (__A,
  2341. (__v4si) _mm_setzero_si128 (),
  2342. __M);
  2343. }
  2344. extern __inline __m256i
  2345. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2346. _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
  2347. {
  2348. return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
  2349. (__v4di) __O,
  2350. __M);
  2351. }
  2352. extern __inline __m256i
  2353. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2354. _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
  2355. {
  2356. return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
  2357. (__v4di)
  2358. _mm256_setzero_si256 (),
  2359. __M);
  2360. }
  2361. extern __inline __m256i
  2362. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2363. _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
  2364. {
  2365. return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
  2366. __M);
  2367. }
  2368. extern __inline __m256i
  2369. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2370. _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
  2371. {
  2372. return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
  2373. (__v4di)
  2374. _mm256_setzero_si256 (),
  2375. __M);
  2376. }
  2377. extern __inline __m128i
  2378. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2379. _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
  2380. {
  2381. return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
  2382. (__v2di) __O,
  2383. __M);
  2384. }
  2385. extern __inline __m128i
  2386. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2387. _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
  2388. {
  2389. return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
  2390. (__v2di)
  2391. _mm_setzero_si128 (),
  2392. __M);
  2393. }
  2394. extern __inline __m128i
  2395. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2396. _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
  2397. {
  2398. return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
  2399. __M);
  2400. }
  2401. extern __inline __m128i
  2402. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2403. _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
  2404. {
  2405. return (__m128i)
  2406. __builtin_ia32_pbroadcastq128_gpr_mask (__A,
  2407. (__v2di) _mm_setzero_si128 (),
  2408. __M);
  2409. }
  2410. extern __inline __m256
  2411. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2412. _mm256_broadcast_f32x4 (__m128 __A)
  2413. {
  2414. return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
  2415. (__v8sf)_mm256_undefined_pd (),
  2416. (__mmask8) -1);
  2417. }
  2418. extern __inline __m256
  2419. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2420. _mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
  2421. {
  2422. return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
  2423. (__v8sf) __O,
  2424. __M);
  2425. }
  2426. extern __inline __m256
  2427. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2428. _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
  2429. {
  2430. return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
  2431. (__v8sf)
  2432. _mm256_setzero_ps (),
  2433. __M);
  2434. }
  2435. extern __inline __m256i
  2436. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2437. _mm256_broadcast_i32x4 (__m128i __A)
  2438. {
  2439. return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
  2440. __A,
  2441. (__v8si)_mm256_undefined_si256 (),
  2442. (__mmask8) -1);
  2443. }
  2444. extern __inline __m256i
  2445. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2446. _mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
  2447. {
  2448. return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
  2449. __A,
  2450. (__v8si)
  2451. __O, __M);
  2452. }
  2453. extern __inline __m256i
  2454. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2455. _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
  2456. {
  2457. return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
  2458. __A,
  2459. (__v8si)
  2460. _mm256_setzero_si256 (),
  2461. __M);
  2462. }
  2463. extern __inline __m256i
  2464. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2465. _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
  2466. {
  2467. return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
  2468. (__v8si) __W,
  2469. (__mmask8) __U);
  2470. }
  2471. extern __inline __m256i
  2472. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2473. _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
  2474. {
  2475. return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
  2476. (__v8si)
  2477. _mm256_setzero_si256 (),
  2478. (__mmask8) __U);
  2479. }
  2480. extern __inline __m128i
  2481. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2482. _mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  2483. {
  2484. return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
  2485. (__v4si) __W,
  2486. (__mmask8) __U);
  2487. }
  2488. extern __inline __m128i
  2489. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2490. _mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
  2491. {
  2492. return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
  2493. (__v4si)
  2494. _mm_setzero_si128 (),
  2495. (__mmask8) __U);
  2496. }
  2497. extern __inline __m256i
  2498. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2499. _mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
  2500. {
  2501. return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
  2502. (__v4di) __W,
  2503. (__mmask8) __U);
  2504. }
  2505. extern __inline __m256i
  2506. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2507. _mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
  2508. {
  2509. return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
  2510. (__v4di)
  2511. _mm256_setzero_si256 (),
  2512. (__mmask8) __U);
  2513. }
  2514. extern __inline __m128i
  2515. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2516. _mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  2517. {
  2518. return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
  2519. (__v2di) __W,
  2520. (__mmask8) __U);
  2521. }
  2522. extern __inline __m128i
  2523. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2524. _mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
  2525. {
  2526. return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
  2527. (__v2di)
  2528. _mm_setzero_si128 (),
  2529. (__mmask8) __U);
  2530. }
  2531. extern __inline __m256i
  2532. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2533. _mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
  2534. {
  2535. return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
  2536. (__v8si) __W,
  2537. (__mmask8) __U);
  2538. }
  2539. extern __inline __m256i
  2540. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2541. _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
  2542. {
  2543. return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
  2544. (__v8si)
  2545. _mm256_setzero_si256 (),
  2546. (__mmask8) __U);
  2547. }
  2548. extern __inline __m128i
  2549. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2550. _mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  2551. {
  2552. return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
  2553. (__v4si) __W,
  2554. (__mmask8) __U);
  2555. }
  2556. extern __inline __m128i
  2557. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2558. _mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
  2559. {
  2560. return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
  2561. (__v4si)
  2562. _mm_setzero_si128 (),
  2563. (__mmask8) __U);
  2564. }
  2565. extern __inline __m256i
  2566. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2567. _mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
  2568. {
  2569. return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
  2570. (__v4di) __W,
  2571. (__mmask8) __U);
  2572. }
  2573. extern __inline __m256i
  2574. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2575. _mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
  2576. {
  2577. return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
  2578. (__v4di)
  2579. _mm256_setzero_si256 (),
  2580. (__mmask8) __U);
  2581. }
  2582. extern __inline __m128i
  2583. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2584. _mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  2585. {
  2586. return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
  2587. (__v2di) __W,
  2588. (__mmask8) __U);
  2589. }
  2590. extern __inline __m128i
  2591. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2592. _mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
  2593. {
  2594. return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
  2595. (__v2di)
  2596. _mm_setzero_si128 (),
  2597. (__mmask8) __U);
  2598. }
  2599. extern __inline __m256i
  2600. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2601. _mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
  2602. {
  2603. return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
  2604. (__v4di) __W,
  2605. (__mmask8) __U);
  2606. }
  2607. extern __inline __m256i
  2608. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2609. _mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
  2610. {
  2611. return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
  2612. (__v4di)
  2613. _mm256_setzero_si256 (),
  2614. (__mmask8) __U);
  2615. }
  2616. extern __inline __m128i
  2617. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2618. _mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
  2619. {
  2620. return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
  2621. (__v2di) __W,
  2622. (__mmask8) __U);
  2623. }
  2624. extern __inline __m128i
  2625. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2626. _mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
  2627. {
  2628. return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
  2629. (__v2di)
  2630. _mm_setzero_si128 (),
  2631. (__mmask8) __U);
  2632. }
  2633. extern __inline __m256i
  2634. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2635. _mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
  2636. {
  2637. return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
  2638. (__v8si) __W,
  2639. (__mmask8) __U);
  2640. }
  2641. extern __inline __m256i
  2642. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2643. _mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
  2644. {
  2645. return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
  2646. (__v8si)
  2647. _mm256_setzero_si256 (),
  2648. (__mmask8) __U);
  2649. }
  2650. extern __inline __m128i
  2651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2652. _mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  2653. {
  2654. return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
  2655. (__v4si) __W,
  2656. (__mmask8) __U);
  2657. }
  2658. extern __inline __m128i
  2659. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2660. _mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
  2661. {
  2662. return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
  2663. (__v4si)
  2664. _mm_setzero_si128 (),
  2665. (__mmask8) __U);
  2666. }
  2667. extern __inline __m256i
  2668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2669. _mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
  2670. {
  2671. return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
  2672. (__v4di) __W,
  2673. (__mmask8) __U);
  2674. }
  2675. extern __inline __m256i
  2676. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2677. _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
  2678. {
  2679. return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
  2680. (__v4di)
  2681. _mm256_setzero_si256 (),
  2682. (__mmask8) __U);
  2683. }
  2684. extern __inline __m128i
  2685. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2686. _mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  2687. {
  2688. return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
  2689. (__v2di) __W,
  2690. (__mmask8) __U);
  2691. }
  2692. extern __inline __m128i
  2693. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2694. _mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
  2695. {
  2696. return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
  2697. (__v2di)
  2698. _mm_setzero_si128 (),
  2699. (__mmask8) __U);
  2700. }
  2701. extern __inline __m256i
  2702. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2703. _mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
  2704. {
  2705. return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
  2706. (__v8si) __W,
  2707. (__mmask8) __U);
  2708. }
  2709. extern __inline __m256i
  2710. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2711. _mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
  2712. {
  2713. return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
  2714. (__v8si)
  2715. _mm256_setzero_si256 (),
  2716. (__mmask8) __U);
  2717. }
  2718. extern __inline __m128i
  2719. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2720. _mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  2721. {
  2722. return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
  2723. (__v4si) __W,
  2724. (__mmask8) __U);
  2725. }
  2726. extern __inline __m128i
  2727. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2728. _mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
  2729. {
  2730. return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
  2731. (__v4si)
  2732. _mm_setzero_si128 (),
  2733. (__mmask8) __U);
  2734. }
  2735. extern __inline __m256i
  2736. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2737. _mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
  2738. {
  2739. return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
  2740. (__v4di) __W,
  2741. (__mmask8) __U);
  2742. }
  2743. extern __inline __m256i
  2744. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2745. _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
  2746. {
  2747. return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
  2748. (__v4di)
  2749. _mm256_setzero_si256 (),
  2750. (__mmask8) __U);
  2751. }
  2752. extern __inline __m128i
  2753. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2754. _mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  2755. {
  2756. return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
  2757. (__v2di) __W,
  2758. (__mmask8) __U);
  2759. }
  2760. extern __inline __m128i
  2761. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2762. _mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
  2763. {
  2764. return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
  2765. (__v2di)
  2766. _mm_setzero_si128 (),
  2767. (__mmask8) __U);
  2768. }
  2769. extern __inline __m256i
  2770. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2771. _mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
  2772. {
  2773. return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
  2774. (__v4di) __W,
  2775. (__mmask8) __U);
  2776. }
  2777. extern __inline __m256i
  2778. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2779. _mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
  2780. {
  2781. return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
  2782. (__v4di)
  2783. _mm256_setzero_si256 (),
  2784. (__mmask8) __U);
  2785. }
  2786. extern __inline __m128i
  2787. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2788. _mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
  2789. {
  2790. return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
  2791. (__v2di) __W,
  2792. (__mmask8) __U);
  2793. }
  2794. extern __inline __m128i
  2795. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2796. _mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
  2797. {
  2798. return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
  2799. (__v2di)
  2800. _mm_setzero_si128 (),
  2801. (__mmask8) __U);
  2802. }
  2803. extern __inline __m256d
  2804. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2805. _mm256_rcp14_pd (__m256d __A)
  2806. {
  2807. return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
  2808. (__v4df)
  2809. _mm256_setzero_pd (),
  2810. (__mmask8) -1);
  2811. }
  2812. extern __inline __m256d
  2813. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2814. _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
  2815. {
  2816. return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
  2817. (__v4df) __W,
  2818. (__mmask8) __U);
  2819. }
  2820. extern __inline __m256d
  2821. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2822. _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
  2823. {
  2824. return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
  2825. (__v4df)
  2826. _mm256_setzero_pd (),
  2827. (__mmask8) __U);
  2828. }
  2829. extern __inline __m128d
  2830. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2831. _mm_rcp14_pd (__m128d __A)
  2832. {
  2833. return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
  2834. (__v2df)
  2835. _mm_setzero_pd (),
  2836. (__mmask8) -1);
  2837. }
  2838. extern __inline __m128d
  2839. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2840. _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
  2841. {
  2842. return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
  2843. (__v2df) __W,
  2844. (__mmask8) __U);
  2845. }
  2846. extern __inline __m128d
  2847. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2848. _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
  2849. {
  2850. return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
  2851. (__v2df)
  2852. _mm_setzero_pd (),
  2853. (__mmask8) __U);
  2854. }
  2855. extern __inline __m256
  2856. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2857. _mm256_rcp14_ps (__m256 __A)
  2858. {
  2859. return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
  2860. (__v8sf)
  2861. _mm256_setzero_ps (),
  2862. (__mmask8) -1);
  2863. }
  2864. extern __inline __m256
  2865. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2866. _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
  2867. {
  2868. return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
  2869. (__v8sf) __W,
  2870. (__mmask8) __U);
  2871. }
  2872. extern __inline __m256
  2873. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2874. _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
  2875. {
  2876. return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
  2877. (__v8sf)
  2878. _mm256_setzero_ps (),
  2879. (__mmask8) __U);
  2880. }
  2881. extern __inline __m128
  2882. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2883. _mm_rcp14_ps (__m128 __A)
  2884. {
  2885. return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
  2886. (__v4sf)
  2887. _mm_setzero_ps (),
  2888. (__mmask8) -1);
  2889. }
  2890. extern __inline __m128
  2891. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2892. _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
  2893. {
  2894. return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
  2895. (__v4sf) __W,
  2896. (__mmask8) __U);
  2897. }
  2898. extern __inline __m128
  2899. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2900. _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
  2901. {
  2902. return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
  2903. (__v4sf)
  2904. _mm_setzero_ps (),
  2905. (__mmask8) __U);
  2906. }
  2907. extern __inline __m256d
  2908. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2909. _mm256_rsqrt14_pd (__m256d __A)
  2910. {
  2911. return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
  2912. (__v4df)
  2913. _mm256_setzero_pd (),
  2914. (__mmask8) -1);
  2915. }
  2916. extern __inline __m256d
  2917. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2918. _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
  2919. {
  2920. return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
  2921. (__v4df) __W,
  2922. (__mmask8) __U);
  2923. }
  2924. extern __inline __m256d
  2925. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2926. _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
  2927. {
  2928. return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
  2929. (__v4df)
  2930. _mm256_setzero_pd (),
  2931. (__mmask8) __U);
  2932. }
  2933. extern __inline __m128d
  2934. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2935. _mm_rsqrt14_pd (__m128d __A)
  2936. {
  2937. return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
  2938. (__v2df)
  2939. _mm_setzero_pd (),
  2940. (__mmask8) -1);
  2941. }
  2942. extern __inline __m128d
  2943. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2944. _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
  2945. {
  2946. return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
  2947. (__v2df) __W,
  2948. (__mmask8) __U);
  2949. }
  2950. extern __inline __m128d
  2951. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2952. _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
  2953. {
  2954. return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
  2955. (__v2df)
  2956. _mm_setzero_pd (),
  2957. (__mmask8) __U);
  2958. }
  2959. extern __inline __m256
  2960. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2961. _mm256_rsqrt14_ps (__m256 __A)
  2962. {
  2963. return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
  2964. (__v8sf)
  2965. _mm256_setzero_ps (),
  2966. (__mmask8) -1);
  2967. }
  2968. extern __inline __m256
  2969. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2970. _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
  2971. {
  2972. return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
  2973. (__v8sf) __W,
  2974. (__mmask8) __U);
  2975. }
  2976. extern __inline __m256
  2977. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2978. _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
  2979. {
  2980. return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
  2981. (__v8sf)
  2982. _mm256_setzero_ps (),
  2983. (__mmask8) __U);
  2984. }
  2985. extern __inline __m128
  2986. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2987. _mm_rsqrt14_ps (__m128 __A)
  2988. {
  2989. return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
  2990. (__v4sf)
  2991. _mm_setzero_ps (),
  2992. (__mmask8) -1);
  2993. }
  2994. extern __inline __m128
  2995. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2996. _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
  2997. {
  2998. return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
  2999. (__v4sf) __W,
  3000. (__mmask8) __U);
  3001. }
  3002. extern __inline __m128
  3003. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3004. _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
  3005. {
  3006. return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
  3007. (__v4sf)
  3008. _mm_setzero_ps (),
  3009. (__mmask8) __U);
  3010. }
  3011. extern __inline __m256d
  3012. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3013. _mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
  3014. {
  3015. return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
  3016. (__v4df) __W,
  3017. (__mmask8) __U);
  3018. }
  3019. extern __inline __m256d
  3020. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3021. _mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
  3022. {
  3023. return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
  3024. (__v4df)
  3025. _mm256_setzero_pd (),
  3026. (__mmask8) __U);
  3027. }
  3028. extern __inline __m128d
  3029. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3030. _mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
  3031. {
  3032. return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
  3033. (__v2df) __W,
  3034. (__mmask8) __U);
  3035. }
  3036. extern __inline __m128d
  3037. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3038. _mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
  3039. {
  3040. return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
  3041. (__v2df)
  3042. _mm_setzero_pd (),
  3043. (__mmask8) __U);
  3044. }
  3045. extern __inline __m256
  3046. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3047. _mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
  3048. {
  3049. return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
  3050. (__v8sf) __W,
  3051. (__mmask8) __U);
  3052. }
  3053. extern __inline __m256
  3054. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3055. _mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
  3056. {
  3057. return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
  3058. (__v8sf)
  3059. _mm256_setzero_ps (),
  3060. (__mmask8) __U);
  3061. }
  3062. extern __inline __m128
  3063. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3064. _mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
  3065. {
  3066. return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
  3067. (__v4sf) __W,
  3068. (__mmask8) __U);
  3069. }
  3070. extern __inline __m128
  3071. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3072. _mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
  3073. {
  3074. return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
  3075. (__v4sf)
  3076. _mm_setzero_ps (),
  3077. (__mmask8) __U);
  3078. }
  3079. extern __inline __m256i
  3080. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3081. _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  3082. __m256i __B)
  3083. {
  3084. return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
  3085. (__v8si) __B,
  3086. (__v8si) __W,
  3087. (__mmask8) __U);
  3088. }
  3089. extern __inline __m256i
  3090. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3091. _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  3092. {
  3093. return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
  3094. (__v8si) __B,
  3095. (__v8si)
  3096. _mm256_setzero_si256 (),
  3097. (__mmask8) __U);
  3098. }
  3099. extern __inline __m256i
  3100. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3101. _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  3102. __m256i __B)
  3103. {
  3104. return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
  3105. (__v4di) __B,
  3106. (__v4di) __W,
  3107. (__mmask8) __U);
  3108. }
  3109. extern __inline __m256i
  3110. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3111. _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  3112. {
  3113. return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
  3114. (__v4di) __B,
  3115. (__v4di)
  3116. _mm256_setzero_si256 (),
  3117. (__mmask8) __U);
  3118. }
  3119. extern __inline __m256i
  3120. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3121. _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  3122. __m256i __B)
  3123. {
  3124. return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
  3125. (__v8si) __B,
  3126. (__v8si) __W,
  3127. (__mmask8) __U);
  3128. }
  3129. extern __inline __m256i
  3130. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3131. _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  3132. {
  3133. return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
  3134. (__v8si) __B,
  3135. (__v8si)
  3136. _mm256_setzero_si256 (),
  3137. (__mmask8) __U);
  3138. }
  3139. extern __inline __m256i
  3140. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3141. _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  3142. __m256i __B)
  3143. {
  3144. return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
  3145. (__v4di) __B,
  3146. (__v4di) __W,
  3147. (__mmask8) __U);
  3148. }
  3149. extern __inline __m256i
  3150. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3151. _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  3152. {
  3153. return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
  3154. (__v4di) __B,
  3155. (__v4di)
  3156. _mm256_setzero_si256 (),
  3157. (__mmask8) __U);
  3158. }
  3159. extern __inline __m128i
  3160. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3161. _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  3162. __m128i __B)
  3163. {
  3164. return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
  3165. (__v4si) __B,
  3166. (__v4si) __W,
  3167. (__mmask8) __U);
  3168. }
  3169. extern __inline __m128i
  3170. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3171. _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  3172. {
  3173. return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
  3174. (__v4si) __B,
  3175. (__v4si)
  3176. _mm_setzero_si128 (),
  3177. (__mmask8) __U);
  3178. }
  3179. extern __inline __m128i
  3180. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3181. _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  3182. __m128i __B)
  3183. {
  3184. return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
  3185. (__v2di) __B,
  3186. (__v2di) __W,
  3187. (__mmask8) __U);
  3188. }
  3189. extern __inline __m128i
  3190. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3191. _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  3192. {
  3193. return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
  3194. (__v2di) __B,
  3195. (__v2di)
  3196. _mm_setzero_si128 (),
  3197. (__mmask8) __U);
  3198. }
  3199. extern __inline __m128i
  3200. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3201. _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  3202. __m128i __B)
  3203. {
  3204. return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
  3205. (__v4si) __B,
  3206. (__v4si) __W,
  3207. (__mmask8) __U);
  3208. }
  3209. extern __inline __m128i
  3210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3211. _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  3212. {
  3213. return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
  3214. (__v4si) __B,
  3215. (__v4si)
  3216. _mm_setzero_si128 (),
  3217. (__mmask8) __U);
  3218. }
  3219. extern __inline __m128i
  3220. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3221. _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  3222. __m128i __B)
  3223. {
  3224. return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
  3225. (__v2di) __B,
  3226. (__v2di) __W,
  3227. (__mmask8) __U);
  3228. }
  3229. extern __inline __m128i
  3230. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3231. _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  3232. {
  3233. return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
  3234. (__v2di) __B,
  3235. (__v2di)
  3236. _mm_setzero_si128 (),
  3237. (__mmask8) __U);
  3238. }
  3239. extern __inline __m256
  3240. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3241. _mm256_getexp_ps (__m256 __A)
  3242. {
  3243. return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
  3244. (__v8sf)
  3245. _mm256_setzero_ps (),
  3246. (__mmask8) -1);
  3247. }
  3248. extern __inline __m256
  3249. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3250. _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
  3251. {
  3252. return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
  3253. (__v8sf) __W,
  3254. (__mmask8) __U);
  3255. }
  3256. extern __inline __m256
  3257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3258. _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
  3259. {
  3260. return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
  3261. (__v8sf)
  3262. _mm256_setzero_ps (),
  3263. (__mmask8) __U);
  3264. }
  3265. extern __inline __m256d
  3266. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3267. _mm256_getexp_pd (__m256d __A)
  3268. {
  3269. return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
  3270. (__v4df)
  3271. _mm256_setzero_pd (),
  3272. (__mmask8) -1);
  3273. }
  3274. extern __inline __m256d
  3275. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3276. _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
  3277. {
  3278. return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
  3279. (__v4df) __W,
  3280. (__mmask8) __U);
  3281. }
  3282. extern __inline __m256d
  3283. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3284. _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
  3285. {
  3286. return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
  3287. (__v4df)
  3288. _mm256_setzero_pd (),
  3289. (__mmask8) __U);
  3290. }
  3291. extern __inline __m128
  3292. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3293. _mm_getexp_ps (__m128 __A)
  3294. {
  3295. return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
  3296. (__v4sf)
  3297. _mm_setzero_ps (),
  3298. (__mmask8) -1);
  3299. }
  3300. extern __inline __m128
  3301. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3302. _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
  3303. {
  3304. return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
  3305. (__v4sf) __W,
  3306. (__mmask8) __U);
  3307. }
  3308. extern __inline __m128
  3309. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3310. _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
  3311. {
  3312. return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
  3313. (__v4sf)
  3314. _mm_setzero_ps (),
  3315. (__mmask8) __U);
  3316. }
  3317. extern __inline __m128d
  3318. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3319. _mm_getexp_pd (__m128d __A)
  3320. {
  3321. return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
  3322. (__v2df)
  3323. _mm_setzero_pd (),
  3324. (__mmask8) -1);
  3325. }
  3326. extern __inline __m128d
  3327. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3328. _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
  3329. {
  3330. return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
  3331. (__v2df) __W,
  3332. (__mmask8) __U);
  3333. }
  3334. extern __inline __m128d
  3335. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3336. _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
  3337. {
  3338. return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
  3339. (__v2df)
  3340. _mm_setzero_pd (),
  3341. (__mmask8) __U);
  3342. }
  3343. extern __inline __m256i
  3344. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3345. _mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  3346. __m128i __B)
  3347. {
  3348. return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
  3349. (__v4si) __B,
  3350. (__v8si) __W,
  3351. (__mmask8) __U);
  3352. }
  3353. extern __inline __m256i
  3354. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3355. _mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
  3356. {
  3357. return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
  3358. (__v4si) __B,
  3359. (__v8si)
  3360. _mm256_setzero_si256 (),
  3361. (__mmask8) __U);
  3362. }
  3363. extern __inline __m128i
  3364. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3365. _mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  3366. __m128i __B)
  3367. {
  3368. return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
  3369. (__v4si) __B,
  3370. (__v4si) __W,
  3371. (__mmask8) __U);
  3372. }
  3373. extern __inline __m128i
  3374. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3375. _mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  3376. {
  3377. return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
  3378. (__v4si) __B,
  3379. (__v4si)
  3380. _mm_setzero_si128 (),
  3381. (__mmask8) __U);
  3382. }
  3383. extern __inline __m256i
  3384. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3385. _mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  3386. __m128i __B)
  3387. {
  3388. return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
  3389. (__v2di) __B,
  3390. (__v4di) __W,
  3391. (__mmask8) __U);
  3392. }
  3393. extern __inline __m256i
  3394. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3395. _mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
  3396. {
  3397. return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
  3398. (__v2di) __B,
  3399. (__v4di)
  3400. _mm256_setzero_si256 (),
  3401. (__mmask8) __U);
  3402. }
  3403. extern __inline __m128i
  3404. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3405. _mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  3406. __m128i __B)
  3407. {
  3408. return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
  3409. (__v2di) __B,
  3410. (__v2di) __W,
  3411. (__mmask8) __U);
  3412. }
  3413. extern __inline __m128i
  3414. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3415. _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  3416. {
  3417. return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
  3418. (__v2di) __B,
  3419. (__v2di)
  3420. _mm_setzero_si128 (),
  3421. (__mmask8) __U);
  3422. }
  3423. extern __inline __m256i
  3424. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3425. _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  3426. __m256i __B)
  3427. {
  3428. return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
  3429. (__v8si) __B,
  3430. (__v8si) __W,
  3431. (__mmask8) __U);
  3432. }
  3433. extern __inline __m256i
  3434. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3435. _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  3436. {
  3437. return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
  3438. (__v8si) __B,
  3439. (__v8si)
  3440. _mm256_setzero_si256 (),
  3441. (__mmask8) __U);
  3442. }
  3443. extern __inline __m256d
  3444. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3445. _mm256_scalef_pd (__m256d __A, __m256d __B)
  3446. {
  3447. return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
  3448. (__v4df) __B,
  3449. (__v4df)
  3450. _mm256_setzero_pd (),
  3451. (__mmask8) -1);
  3452. }
  3453. extern __inline __m256d
  3454. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3455. _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
  3456. __m256d __B)
  3457. {
  3458. return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
  3459. (__v4df) __B,
  3460. (__v4df) __W,
  3461. (__mmask8) __U);
  3462. }
  3463. extern __inline __m256d
  3464. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3465. _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
  3466. {
  3467. return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
  3468. (__v4df) __B,
  3469. (__v4df)
  3470. _mm256_setzero_pd (),
  3471. (__mmask8) __U);
  3472. }
  3473. extern __inline __m256
  3474. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3475. _mm256_scalef_ps (__m256 __A, __m256 __B)
  3476. {
  3477. return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
  3478. (__v8sf) __B,
  3479. (__v8sf)
  3480. _mm256_setzero_ps (),
  3481. (__mmask8) -1);
  3482. }
  3483. extern __inline __m256
  3484. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3485. _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
  3486. __m256 __B)
  3487. {
  3488. return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
  3489. (__v8sf) __B,
  3490. (__v8sf) __W,
  3491. (__mmask8) __U);
  3492. }
  3493. extern __inline __m256
  3494. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3495. _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
  3496. {
  3497. return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
  3498. (__v8sf) __B,
  3499. (__v8sf)
  3500. _mm256_setzero_ps (),
  3501. (__mmask8) __U);
  3502. }
  3503. extern __inline __m128d
  3504. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3505. _mm_scalef_pd (__m128d __A, __m128d __B)
  3506. {
  3507. return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
  3508. (__v2df) __B,
  3509. (__v2df)
  3510. _mm_setzero_pd (),
  3511. (__mmask8) -1);
  3512. }
  3513. extern __inline __m128d
  3514. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3515. _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
  3516. __m128d __B)
  3517. {
  3518. return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
  3519. (__v2df) __B,
  3520. (__v2df) __W,
  3521. (__mmask8) __U);
  3522. }
  3523. extern __inline __m128d
  3524. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3525. _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
  3526. {
  3527. return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
  3528. (__v2df) __B,
  3529. (__v2df)
  3530. _mm_setzero_pd (),
  3531. (__mmask8) __U);
  3532. }
  3533. extern __inline __m128
  3534. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3535. _mm_scalef_ps (__m128 __A, __m128 __B)
  3536. {
  3537. return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
  3538. (__v4sf) __B,
  3539. (__v4sf)
  3540. _mm_setzero_ps (),
  3541. (__mmask8) -1);
  3542. }
  3543. extern __inline __m128
  3544. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3545. _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  3546. {
  3547. return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
  3548. (__v4sf) __B,
  3549. (__v4sf) __W,
  3550. (__mmask8) __U);
  3551. }
  3552. extern __inline __m128
  3553. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3554. _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
  3555. {
  3556. return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
  3557. (__v4sf) __B,
  3558. (__v4sf)
  3559. _mm_setzero_ps (),
  3560. (__mmask8) __U);
  3561. }
  3562. extern __inline __m256d
  3563. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3564. _mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
  3565. __m256d __C)
  3566. {
  3567. return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
  3568. (__v4df) __B,
  3569. (__v4df) __C,
  3570. (__mmask8) __U);
  3571. }
  3572. extern __inline __m256d
  3573. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3574. _mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
  3575. __mmask8 __U)
  3576. {
  3577. return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
  3578. (__v4df) __B,
  3579. (__v4df) __C,
  3580. (__mmask8) __U);
  3581. }
  3582. extern __inline __m256d
  3583. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3584. _mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
  3585. __m256d __C)
  3586. {
  3587. return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
  3588. (__v4df) __B,
  3589. (__v4df) __C,
  3590. (__mmask8) __U);
  3591. }
  3592. extern __inline __m128d
  3593. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3594. _mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
  3595. {
  3596. return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
  3597. (__v2df) __B,
  3598. (__v2df) __C,
  3599. (__mmask8) __U);
  3600. }
  3601. extern __inline __m128d
  3602. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3603. _mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
  3604. __mmask8 __U)
  3605. {
  3606. return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
  3607. (__v2df) __B,
  3608. (__v2df) __C,
  3609. (__mmask8) __U);
  3610. }
  3611. extern __inline __m128d
  3612. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3613. _mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
  3614. __m128d __C)
  3615. {
  3616. return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
  3617. (__v2df) __B,
  3618. (__v2df) __C,
  3619. (__mmask8) __U);
  3620. }
  3621. extern __inline __m256
  3622. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3623. _mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
  3624. {
  3625. return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
  3626. (__v8sf) __B,
  3627. (__v8sf) __C,
  3628. (__mmask8) __U);
  3629. }
  3630. extern __inline __m256
  3631. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3632. _mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
  3633. __mmask8 __U)
  3634. {
  3635. return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
  3636. (__v8sf) __B,
  3637. (__v8sf) __C,
  3638. (__mmask8) __U);
  3639. }
  3640. extern __inline __m256
  3641. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3642. _mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
  3643. __m256 __C)
  3644. {
  3645. return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
  3646. (__v8sf) __B,
  3647. (__v8sf) __C,
  3648. (__mmask8) __U);
  3649. }
  3650. extern __inline __m128
  3651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3652. _mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  3653. {
  3654. return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
  3655. (__v4sf) __B,
  3656. (__v4sf) __C,
  3657. (__mmask8) __U);
  3658. }
  3659. extern __inline __m128
  3660. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3661. _mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  3662. {
  3663. return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
  3664. (__v4sf) __B,
  3665. (__v4sf) __C,
  3666. (__mmask8) __U);
  3667. }
  3668. extern __inline __m128
  3669. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3670. _mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  3671. {
  3672. return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
  3673. (__v4sf) __B,
  3674. (__v4sf) __C,
  3675. (__mmask8) __U);
  3676. }
  3677. extern __inline __m256d
  3678. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3679. _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
  3680. __m256d __C)
  3681. {
  3682. return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A,
  3683. (__v4df) __B,
  3684. (__v4df) __C,
  3685. (__mmask8) __U);
  3686. }
  3687. extern __inline __m256d
  3688. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3689. _mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
  3690. __mmask8 __U)
  3691. {
  3692. return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
  3693. (__v4df) __B,
  3694. (__v4df) __C,
  3695. (__mmask8) __U);
  3696. }
  3697. extern __inline __m256d
  3698. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3699. _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
  3700. __m256d __C)
  3701. {
  3702. return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A,
  3703. (__v4df) __B,
  3704. (__v4df) __C,
  3705. (__mmask8) __U);
  3706. }
  3707. extern __inline __m128d
  3708. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3709. _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
  3710. {
  3711. return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A,
  3712. (__v2df) __B,
  3713. (__v2df) __C,
  3714. (__mmask8) __U);
  3715. }
  3716. extern __inline __m128d
  3717. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3718. _mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
  3719. __mmask8 __U)
  3720. {
  3721. return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
  3722. (__v2df) __B,
  3723. (__v2df) __C,
  3724. (__mmask8) __U);
  3725. }
  3726. extern __inline __m128d
  3727. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3728. _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
  3729. __m128d __C)
  3730. {
  3731. return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A,
  3732. (__v2df) __B,
  3733. (__v2df) __C,
  3734. (__mmask8) __U);
  3735. }
  3736. extern __inline __m256
  3737. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3738. _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
  3739. {
  3740. return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A,
  3741. (__v8sf) __B,
  3742. (__v8sf) __C,
  3743. (__mmask8) __U);
  3744. }
  3745. extern __inline __m256
  3746. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3747. _mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
  3748. __mmask8 __U)
  3749. {
  3750. return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
  3751. (__v8sf) __B,
  3752. (__v8sf) __C,
  3753. (__mmask8) __U);
  3754. }
  3755. extern __inline __m256
  3756. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3757. _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
  3758. __m256 __C)
  3759. {
  3760. return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A,
  3761. (__v8sf) __B,
  3762. (__v8sf) __C,
  3763. (__mmask8) __U);
  3764. }
  3765. extern __inline __m128
  3766. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3767. _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  3768. {
  3769. return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A,
  3770. (__v4sf) __B,
  3771. (__v4sf) __C,
  3772. (__mmask8) __U);
  3773. }
  3774. extern __inline __m128
  3775. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3776. _mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  3777. {
  3778. return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
  3779. (__v4sf) __B,
  3780. (__v4sf) __C,
  3781. (__mmask8) __U);
  3782. }
  3783. extern __inline __m128
  3784. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3785. _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  3786. {
  3787. return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A,
  3788. (__v4sf) __B,
  3789. (__v4sf) __C,
  3790. (__mmask8) __U);
  3791. }
  3792. extern __inline __m256d
  3793. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3794. _mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
  3795. __m256d __C)
  3796. {
  3797. return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
  3798. (__v4df) __B,
  3799. (__v4df) __C,
  3800. (__mmask8) __U);
  3801. }
  3802. extern __inline __m256d
  3803. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3804. _mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
  3805. __mmask8 __U)
  3806. {
  3807. return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
  3808. (__v4df) __B,
  3809. (__v4df) __C,
  3810. (__mmask8)
  3811. __U);
  3812. }
  3813. extern __inline __m256d
  3814. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3815. _mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
  3816. __m256d __C)
  3817. {
  3818. return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
  3819. (__v4df) __B,
  3820. (__v4df) __C,
  3821. (__mmask8)
  3822. __U);
  3823. }
  3824. extern __inline __m128d
  3825. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3826. _mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
  3827. __m128d __C)
  3828. {
  3829. return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
  3830. (__v2df) __B,
  3831. (__v2df) __C,
  3832. (__mmask8) __U);
  3833. }
  3834. extern __inline __m128d
  3835. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3836. _mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
  3837. __mmask8 __U)
  3838. {
  3839. return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
  3840. (__v2df) __B,
  3841. (__v2df) __C,
  3842. (__mmask8)
  3843. __U);
  3844. }
  3845. extern __inline __m128d
  3846. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3847. _mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
  3848. __m128d __C)
  3849. {
  3850. return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
  3851. (__v2df) __B,
  3852. (__v2df) __C,
  3853. (__mmask8)
  3854. __U);
  3855. }
  3856. extern __inline __m256
  3857. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3858. _mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
  3859. __m256 __C)
  3860. {
  3861. return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
  3862. (__v8sf) __B,
  3863. (__v8sf) __C,
  3864. (__mmask8) __U);
  3865. }
  3866. extern __inline __m256
  3867. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3868. _mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
  3869. __mmask8 __U)
  3870. {
  3871. return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
  3872. (__v8sf) __B,
  3873. (__v8sf) __C,
  3874. (__mmask8) __U);
  3875. }
  3876. extern __inline __m256
  3877. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3878. _mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
  3879. __m256 __C)
  3880. {
  3881. return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
  3882. (__v8sf) __B,
  3883. (__v8sf) __C,
  3884. (__mmask8) __U);
  3885. }
  3886. extern __inline __m128
  3887. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3888. _mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  3889. {
  3890. return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
  3891. (__v4sf) __B,
  3892. (__v4sf) __C,
  3893. (__mmask8) __U);
  3894. }
  3895. extern __inline __m128
  3896. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3897. _mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
  3898. __mmask8 __U)
  3899. {
  3900. return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
  3901. (__v4sf) __B,
  3902. (__v4sf) __C,
  3903. (__mmask8) __U);
  3904. }
  3905. extern __inline __m128
  3906. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3907. _mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
  3908. __m128 __C)
  3909. {
  3910. return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
  3911. (__v4sf) __B,
  3912. (__v4sf) __C,
  3913. (__mmask8) __U);
  3914. }
  3915. extern __inline __m256d
  3916. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3917. _mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
  3918. __m256d __C)
  3919. {
  3920. return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
  3921. (__v4df) __B,
  3922. -(__v4df) __C,
  3923. (__mmask8) __U);
  3924. }
  3925. extern __inline __m256d
  3926. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3927. _mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
  3928. __mmask8 __U)
  3929. {
  3930. return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
  3931. (__v4df) __B,
  3932. (__v4df) __C,
  3933. (__mmask8)
  3934. __U);
  3935. }
  3936. extern __inline __m256d
  3937. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3938. _mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
  3939. __m256d __C)
  3940. {
  3941. return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
  3942. (__v4df) __B,
  3943. -(__v4df) __C,
  3944. (__mmask8)
  3945. __U);
  3946. }
  3947. extern __inline __m128d
  3948. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3949. _mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
  3950. __m128d __C)
  3951. {
  3952. return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
  3953. (__v2df) __B,
  3954. -(__v2df) __C,
  3955. (__mmask8) __U);
  3956. }
  3957. extern __inline __m128d
  3958. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3959. _mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
  3960. __mmask8 __U)
  3961. {
  3962. return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
  3963. (__v2df) __B,
  3964. (__v2df) __C,
  3965. (__mmask8)
  3966. __U);
  3967. }
  3968. extern __inline __m128d
  3969. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3970. _mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
  3971. __m128d __C)
  3972. {
  3973. return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
  3974. (__v2df) __B,
  3975. -(__v2df) __C,
  3976. (__mmask8)
  3977. __U);
  3978. }
  3979. extern __inline __m256
  3980. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3981. _mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
  3982. __m256 __C)
  3983. {
  3984. return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
  3985. (__v8sf) __B,
  3986. -(__v8sf) __C,
  3987. (__mmask8) __U);
  3988. }
  3989. extern __inline __m256
  3990. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3991. _mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
  3992. __mmask8 __U)
  3993. {
  3994. return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
  3995. (__v8sf) __B,
  3996. (__v8sf) __C,
  3997. (__mmask8) __U);
  3998. }
  3999. extern __inline __m256
  4000. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4001. _mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
  4002. __m256 __C)
  4003. {
  4004. return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
  4005. (__v8sf) __B,
  4006. -(__v8sf) __C,
  4007. (__mmask8) __U);
  4008. }
  4009. extern __inline __m128
  4010. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4011. _mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  4012. {
  4013. return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
  4014. (__v4sf) __B,
  4015. -(__v4sf) __C,
  4016. (__mmask8) __U);
  4017. }
  4018. extern __inline __m128
  4019. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4020. _mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
  4021. __mmask8 __U)
  4022. {
  4023. return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
  4024. (__v4sf) __B,
  4025. (__v4sf) __C,
  4026. (__mmask8) __U);
  4027. }
  4028. extern __inline __m128
  4029. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4030. _mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
  4031. __m128 __C)
  4032. {
  4033. return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
  4034. (__v4sf) __B,
  4035. -(__v4sf) __C,
  4036. (__mmask8) __U);
  4037. }
  4038. extern __inline __m256d
  4039. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4040. _mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
  4041. __m256d __C)
  4042. {
  4043. return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
  4044. (__v4df) __B,
  4045. (__v4df) __C,
  4046. (__mmask8) __U);
  4047. }
  4048. extern __inline __m256d
  4049. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4050. _mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
  4051. __mmask8 __U)
  4052. {
  4053. return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A,
  4054. (__v4df) __B,
  4055. (__v4df) __C,
  4056. (__mmask8) __U);
  4057. }
  4058. extern __inline __m256d
  4059. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4060. _mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
  4061. __m256d __C)
  4062. {
  4063. return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A,
  4064. (__v4df) __B,
  4065. (__v4df) __C,
  4066. (__mmask8) __U);
  4067. }
  4068. extern __inline __m128d
  4069. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4070. _mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
  4071. __m128d __C)
  4072. {
  4073. return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
  4074. (__v2df) __B,
  4075. (__v2df) __C,
  4076. (__mmask8) __U);
  4077. }
  4078. extern __inline __m128d
  4079. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4080. _mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
  4081. __mmask8 __U)
  4082. {
  4083. return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A,
  4084. (__v2df) __B,
  4085. (__v2df) __C,
  4086. (__mmask8) __U);
  4087. }
  4088. extern __inline __m128d
  4089. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4090. _mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
  4091. __m128d __C)
  4092. {
  4093. return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A,
  4094. (__v2df) __B,
  4095. (__v2df) __C,
  4096. (__mmask8) __U);
  4097. }
  4098. extern __inline __m256
  4099. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4100. _mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
  4101. __m256 __C)
  4102. {
  4103. return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
  4104. (__v8sf) __B,
  4105. (__v8sf) __C,
  4106. (__mmask8) __U);
  4107. }
  4108. extern __inline __m256
  4109. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4110. _mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
  4111. __mmask8 __U)
  4112. {
  4113. return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A,
  4114. (__v8sf) __B,
  4115. (__v8sf) __C,
  4116. (__mmask8) __U);
  4117. }
  4118. extern __inline __m256
  4119. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4120. _mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
  4121. __m256 __C)
  4122. {
  4123. return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A,
  4124. (__v8sf) __B,
  4125. (__v8sf) __C,
  4126. (__mmask8) __U);
  4127. }
  4128. extern __inline __m128
  4129. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4130. _mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  4131. {
  4132. return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
  4133. (__v4sf) __B,
  4134. (__v4sf) __C,
  4135. (__mmask8) __U);
  4136. }
  4137. extern __inline __m128
  4138. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4139. _mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  4140. {
  4141. return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A,
  4142. (__v4sf) __B,
  4143. (__v4sf) __C,
  4144. (__mmask8) __U);
  4145. }
  4146. extern __inline __m128
  4147. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4148. _mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  4149. {
  4150. return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A,
  4151. (__v4sf) __B,
  4152. (__v4sf) __C,
  4153. (__mmask8) __U);
  4154. }
  4155. extern __inline __m256d
  4156. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4157. _mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
  4158. __m256d __C)
  4159. {
  4160. return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
  4161. (__v4df) __B,
  4162. (__v4df) __C,
  4163. (__mmask8) __U);
  4164. }
  4165. extern __inline __m256d
  4166. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4167. _mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
  4168. __mmask8 __U)
  4169. {
  4170. return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
  4171. (__v4df) __B,
  4172. (__v4df) __C,
  4173. (__mmask8) __U);
  4174. }
  4175. extern __inline __m256d
  4176. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4177. _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
  4178. __m256d __C)
  4179. {
  4180. return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A,
  4181. (__v4df) __B,
  4182. (__v4df) __C,
  4183. (__mmask8) __U);
  4184. }
  4185. extern __inline __m128d
  4186. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4187. _mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
  4188. __m128d __C)
  4189. {
  4190. return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
  4191. (__v2df) __B,
  4192. (__v2df) __C,
  4193. (__mmask8) __U);
  4194. }
  4195. extern __inline __m128d
  4196. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4197. _mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
  4198. __mmask8 __U)
  4199. {
  4200. return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
  4201. (__v2df) __B,
  4202. (__v2df) __C,
  4203. (__mmask8) __U);
  4204. }
  4205. extern __inline __m128d
  4206. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4207. _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
  4208. __m128d __C)
  4209. {
  4210. return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A,
  4211. (__v2df) __B,
  4212. (__v2df) __C,
  4213. (__mmask8) __U);
  4214. }
  4215. extern __inline __m256
  4216. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4217. _mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
  4218. __m256 __C)
  4219. {
  4220. return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
  4221. (__v8sf) __B,
  4222. (__v8sf) __C,
  4223. (__mmask8) __U);
  4224. }
  4225. extern __inline __m256
  4226. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4227. _mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
  4228. __mmask8 __U)
  4229. {
  4230. return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
  4231. (__v8sf) __B,
  4232. (__v8sf) __C,
  4233. (__mmask8) __U);
  4234. }
  4235. extern __inline __m256
  4236. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4237. _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
  4238. __m256 __C)
  4239. {
  4240. return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A,
  4241. (__v8sf) __B,
  4242. (__v8sf) __C,
  4243. (__mmask8) __U);
  4244. }
  4245. extern __inline __m128
  4246. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4247. _mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  4248. {
  4249. return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
  4250. (__v4sf) __B,
  4251. (__v4sf) __C,
  4252. (__mmask8) __U);
  4253. }
  4254. extern __inline __m128
  4255. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4256. _mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  4257. {
  4258. return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
  4259. (__v4sf) __B,
  4260. (__v4sf) __C,
  4261. (__mmask8) __U);
  4262. }
  4263. extern __inline __m128
  4264. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4265. _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  4266. {
  4267. return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A,
  4268. (__v4sf) __B,
  4269. (__v4sf) __C,
  4270. (__mmask8) __U);
  4271. }
  4272. extern __inline __m128i
  4273. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4274. _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  4275. __m128i __B)
  4276. {
  4277. return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
  4278. (__v4si) __B,
  4279. (__v4si) __W,
  4280. (__mmask8) __U);
  4281. }
  4282. extern __inline __m128i
  4283. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4284. _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4285. {
  4286. return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
  4287. (__v4si) __B,
  4288. (__v4si)
  4289. _mm_setzero_si128 (),
  4290. (__mmask8) __U);
  4291. }
  4292. extern __inline __m256i
  4293. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4294. _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  4295. __m256i __B)
  4296. {
  4297. return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
  4298. (__v8si) __B,
  4299. (__v8si) __W,
  4300. (__mmask8) __U);
  4301. }
  4302. extern __inline __m256i
  4303. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4304. _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  4305. {
  4306. return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
  4307. (__v8si) __B,
  4308. (__v8si)
  4309. _mm256_setzero_si256 (),
  4310. (__mmask8) __U);
  4311. }
  4312. extern __inline __m128i
  4313. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4314. _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  4315. __m128i __B)
  4316. {
  4317. return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
  4318. (__v4si) __B,
  4319. (__v4si) __W,
  4320. (__mmask8) __U);
  4321. }
  4322. extern __inline __m128i
  4323. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4324. _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4325. {
  4326. return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
  4327. (__v4si) __B,
  4328. (__v4si)
  4329. _mm_setzero_si128 (),
  4330. (__mmask8) __U);
  4331. }
  4332. extern __inline __m256i
  4333. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4334. _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  4335. __m256i __B)
  4336. {
  4337. return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
  4338. (__v8si) __B,
  4339. (__v8si) __W,
  4340. (__mmask8) __U);
  4341. }
  4342. extern __inline __m256i
  4343. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4344. _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  4345. {
  4346. return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
  4347. (__v8si) __B,
  4348. (__v8si)
  4349. _mm256_setzero_si256 (),
  4350. (__mmask8) __U);
  4351. }
  4352. extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  4353. _mm256_or_epi32 (__m256i __A, __m256i __B)
  4354. {
  4355. return (__m256i) ((__v8su)__A | (__v8su)__B);
  4356. }
  4357. extern __inline __m128i
  4358. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4359. _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  4360. {
  4361. return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
  4362. (__v4si) __B,
  4363. (__v4si) __W,
  4364. (__mmask8) __U);
  4365. }
  4366. extern __inline __m128i
  4367. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4368. _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4369. {
  4370. return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
  4371. (__v4si) __B,
  4372. (__v4si)
  4373. _mm_setzero_si128 (),
  4374. (__mmask8) __U);
  4375. }
  4376. extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  4377. _mm_or_epi32 (__m128i __A, __m128i __B)
  4378. {
  4379. return (__m128i) ((__v4su)__A | (__v4su)__B);
  4380. }
  4381. extern __inline __m256i
  4382. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4383. _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  4384. __m256i __B)
  4385. {
  4386. return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
  4387. (__v8si) __B,
  4388. (__v8si) __W,
  4389. (__mmask8) __U);
  4390. }
  4391. extern __inline __m256i
  4392. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4393. _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  4394. {
  4395. return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
  4396. (__v8si) __B,
  4397. (__v8si)
  4398. _mm256_setzero_si256 (),
  4399. (__mmask8) __U);
  4400. }
  4401. extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  4402. _mm256_xor_epi32 (__m256i __A, __m256i __B)
  4403. {
  4404. return (__m256i) ((__v8su)__A ^ (__v8su)__B);
  4405. }
  4406. extern __inline __m128i
  4407. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4408. _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  4409. __m128i __B)
  4410. {
  4411. return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
  4412. (__v4si) __B,
  4413. (__v4si) __W,
  4414. (__mmask8) __U);
  4415. }
  4416. extern __inline __m128i
  4417. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4418. _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4419. {
  4420. return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
  4421. (__v4si) __B,
  4422. (__v4si)
  4423. _mm_setzero_si128 (),
  4424. (__mmask8) __U);
  4425. }
  4426. extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  4427. _mm_xor_epi32 (__m128i __A, __m128i __B)
  4428. {
  4429. return (__m128i) ((__v4su)__A ^ (__v4su)__B);
  4430. }
  4431. extern __inline __m128
  4432. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4433. _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
  4434. {
  4435. return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
  4436. (__v4sf) __W,
  4437. (__mmask8) __U);
  4438. }
  4439. extern __inline __m128
  4440. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4441. _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
  4442. {
  4443. return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
  4444. (__v4sf)
  4445. _mm_setzero_ps (),
  4446. (__mmask8) __U);
  4447. }
  4448. extern __inline __m128
  4449. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4450. _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
  4451. {
  4452. return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
  4453. (__v4sf) __W,
  4454. (__mmask8) __U);
  4455. }
  4456. extern __inline __m128
  4457. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4458. _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
  4459. {
  4460. return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
  4461. (__v4sf)
  4462. _mm_setzero_ps (),
  4463. (__mmask8) __U);
  4464. }
  4465. extern __inline __m256i
  4466. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4467. _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
  4468. {
  4469. return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
  4470. (__v8si) __W,
  4471. (__mmask8) __U);
  4472. }
  4473. extern __inline __m256i
  4474. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4475. _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
  4476. {
  4477. return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
  4478. (__v8si)
  4479. _mm256_setzero_si256 (),
  4480. (__mmask8) __U);
  4481. }
  4482. extern __inline __m128i
  4483. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4484. _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
  4485. {
  4486. return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
  4487. (__v4si) __W,
  4488. (__mmask8) __U);
  4489. }
  4490. extern __inline __m128i
  4491. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4492. _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
  4493. {
  4494. return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
  4495. (__v4si)
  4496. _mm_setzero_si128 (),
  4497. (__mmask8) __U);
  4498. }
  4499. extern __inline __m256i
  4500. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4501. _mm256_cvtps_epu32 (__m256 __A)
  4502. {
  4503. return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
  4504. (__v8si)
  4505. _mm256_setzero_si256 (),
  4506. (__mmask8) -1);
  4507. }
  4508. extern __inline __m256i
  4509. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4510. _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
  4511. {
  4512. return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
  4513. (__v8si) __W,
  4514. (__mmask8) __U);
  4515. }
  4516. extern __inline __m256i
  4517. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4518. _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
  4519. {
  4520. return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
  4521. (__v8si)
  4522. _mm256_setzero_si256 (),
  4523. (__mmask8) __U);
  4524. }
  4525. extern __inline __m128i
  4526. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4527. _mm_cvtps_epu32 (__m128 __A)
  4528. {
  4529. return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
  4530. (__v4si)
  4531. _mm_setzero_si128 (),
  4532. (__mmask8) -1);
  4533. }
  4534. extern __inline __m128i
  4535. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4536. _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
  4537. {
  4538. return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
  4539. (__v4si) __W,
  4540. (__mmask8) __U);
  4541. }
  4542. extern __inline __m128i
  4543. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4544. _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
  4545. {
  4546. return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
  4547. (__v4si)
  4548. _mm_setzero_si128 (),
  4549. (__mmask8) __U);
  4550. }
  4551. extern __inline __m256d
  4552. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4553. _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
  4554. {
  4555. return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
  4556. (__v4df) __W,
  4557. (__mmask8) __U);
  4558. }
  4559. extern __inline __m256d
  4560. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4561. _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
  4562. {
  4563. return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
  4564. (__v4df)
  4565. _mm256_setzero_pd (),
  4566. (__mmask8) __U);
  4567. }
  4568. extern __inline __m128d
  4569. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4570. _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
  4571. {
  4572. return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
  4573. (__v2df) __W,
  4574. (__mmask8) __U);
  4575. }
  4576. extern __inline __m128d
  4577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4578. _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
  4579. {
  4580. return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
  4581. (__v2df)
  4582. _mm_setzero_pd (),
  4583. (__mmask8) __U);
  4584. }
  4585. extern __inline __m256
  4586. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4587. _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
  4588. {
  4589. return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
  4590. (__v8sf) __W,
  4591. (__mmask8) __U);
  4592. }
  4593. extern __inline __m256
  4594. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4595. _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
  4596. {
  4597. return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
  4598. (__v8sf)
  4599. _mm256_setzero_ps (),
  4600. (__mmask8) __U);
  4601. }
  4602. extern __inline __m128
  4603. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4604. _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
  4605. {
  4606. return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
  4607. (__v4sf) __W,
  4608. (__mmask8) __U);
  4609. }
  4610. extern __inline __m128
  4611. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4612. _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
  4613. {
  4614. return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
  4615. (__v4sf)
  4616. _mm_setzero_ps (),
  4617. (__mmask8) __U);
  4618. }
  4619. extern __inline __m256
  4620. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4621. _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
  4622. {
  4623. return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
  4624. (__v8sf) __W,
  4625. (__mmask8) __U);
  4626. }
  4627. extern __inline __m256
  4628. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4629. _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
  4630. {
  4631. return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
  4632. (__v8sf)
  4633. _mm256_setzero_ps (),
  4634. (__mmask8) __U);
  4635. }
  4636. extern __inline __m128
  4637. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4638. _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
  4639. {
  4640. return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
  4641. (__v4sf) __W,
  4642. (__mmask8) __U);
  4643. }
  4644. extern __inline __m128
  4645. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4646. _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
  4647. {
  4648. return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
  4649. (__v4sf)
  4650. _mm_setzero_ps (),
  4651. (__mmask8) __U);
  4652. }
  4653. extern __inline __m128i
  4654. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4655. _mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  4656. __m128i __B)
  4657. {
  4658. return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
  4659. (__v4si) __B,
  4660. (__v4si) __W,
  4661. (__mmask8) __U);
  4662. }
  4663. extern __inline __m128i
  4664. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4665. _mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4666. {
  4667. return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
  4668. (__v4si) __B,
  4669. (__v4si)
  4670. _mm_setzero_si128 (),
  4671. (__mmask8) __U);
  4672. }
  4673. extern __inline __m256i
  4674. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4675. _mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  4676. __m256i __B)
  4677. {
  4678. return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
  4679. (__v8si) __B,
  4680. (__v8si) __W,
  4681. (__mmask8) __U);
  4682. }
  4683. extern __inline __m256i
  4684. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4685. _mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  4686. {
  4687. return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
  4688. (__v8si) __B,
  4689. (__v8si)
  4690. _mm256_setzero_si256 (),
  4691. (__mmask8) __U);
  4692. }
  4693. extern __inline __m128i
  4694. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4695. _mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  4696. __m128i __B)
  4697. {
  4698. return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
  4699. (__v2di) __B,
  4700. (__v2di) __W,
  4701. (__mmask8) __U);
  4702. }
  4703. extern __inline __m128i
  4704. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4705. _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  4706. {
  4707. return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
  4708. (__v2di) __B,
  4709. (__v2di)
  4710. _mm_setzero_si128 (),
  4711. (__mmask8) __U);
  4712. }
  4713. extern __inline __m256i
  4714. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4715. _mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  4716. __m256i __B)
  4717. {
  4718. return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
  4719. (__v4di) __B,
  4720. (__v4di) __W,
  4721. (__mmask8) __U);
  4722. }
  4723. extern __inline __m256i
  4724. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4725. _mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  4726. {
  4727. return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
  4728. (__v4di) __B,
  4729. (__v4di)
  4730. _mm256_setzero_si256 (),
  4731. (__mmask8) __U);
  4732. }
  4733. extern __inline __m128i
  4734. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4735. _mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  4736. __m128i __B)
  4737. {
  4738. return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
  4739. (__v4si) __B,
  4740. (__v4si) __W,
  4741. (__mmask8) __U);
  4742. }
  4743. extern __inline __m128i
  4744. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4745. _mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4746. {
  4747. return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
  4748. (__v4si) __B,
  4749. (__v4si)
  4750. _mm_setzero_si128 (),
  4751. (__mmask8) __U);
  4752. }
  4753. extern __inline __m256i
  4754. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4755. _mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  4756. __m256i __B)
  4757. {
  4758. return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
  4759. (__v8si) __B,
  4760. (__v8si) __W,
  4761. (__mmask8) __U);
  4762. }
  4763. extern __inline __m256i
  4764. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4765. _mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  4766. {
  4767. return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
  4768. (__v8si) __B,
  4769. (__v8si)
  4770. _mm256_setzero_si256 (),
  4771. (__mmask8) __U);
  4772. }
  4773. extern __inline __m128i
  4774. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4775. _mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  4776. __m128i __B)
  4777. {
  4778. return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
  4779. (__v2di) __B,
  4780. (__v2di) __W,
  4781. (__mmask8) __U);
  4782. }
  4783. extern __inline __m128i
  4784. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4785. _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  4786. {
  4787. return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
  4788. (__v2di) __B,
  4789. (__v2di)
  4790. _mm_setzero_si128 (),
  4791. (__mmask8) __U);
  4792. }
  4793. extern __inline __m256i
  4794. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4795. _mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  4796. __m256i __B)
  4797. {
  4798. return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
  4799. (__v4di) __B,
  4800. (__v4di) __W,
  4801. (__mmask8) __U);
  4802. }
  4803. extern __inline __m256i
  4804. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4805. _mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  4806. {
  4807. return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
  4808. (__v4di) __B,
  4809. (__v4di)
  4810. _mm256_setzero_si256 (),
  4811. (__mmask8) __U);
  4812. }
  4813. extern __inline __mmask8
  4814. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4815. _mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
  4816. {
  4817. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
  4818. (__v4si) __B, 0,
  4819. (__mmask8) -1);
  4820. }
  4821. extern __inline __mmask8
  4822. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4823. _mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
  4824. {
  4825. return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
  4826. (__v4si) __B,
  4827. (__mmask8) -1);
  4828. }
  4829. extern __inline __mmask8
  4830. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4831. _mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  4832. {
  4833. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
  4834. (__v4si) __B, 0, __U);
  4835. }
  4836. extern __inline __mmask8
  4837. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4838. _mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  4839. {
  4840. return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
  4841. (__v4si) __B, __U);
  4842. }
  4843. extern __inline __mmask8
  4844. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4845. _mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
  4846. {
  4847. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
  4848. (__v8si) __B, 0,
  4849. (__mmask8) -1);
  4850. }
  4851. extern __inline __mmask8
  4852. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4853. _mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
  4854. {
  4855. return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
  4856. (__v8si) __B,
  4857. (__mmask8) -1);
  4858. }
  4859. extern __inline __mmask8
  4860. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4861. _mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  4862. {
  4863. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
  4864. (__v8si) __B, 0, __U);
  4865. }
  4866. extern __inline __mmask8
  4867. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4868. _mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  4869. {
  4870. return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
  4871. (__v8si) __B, __U);
  4872. }
  4873. extern __inline __mmask8
  4874. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4875. _mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
  4876. {
  4877. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
  4878. (__v2di) __B, 0,
  4879. (__mmask8) -1);
  4880. }
  4881. extern __inline __mmask8
  4882. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4883. _mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
  4884. {
  4885. return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
  4886. (__v2di) __B,
  4887. (__mmask8) -1);
  4888. }
  4889. extern __inline __mmask8
  4890. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4891. _mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  4892. {
  4893. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
  4894. (__v2di) __B, 0, __U);
  4895. }
  4896. extern __inline __mmask8
  4897. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4898. _mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  4899. {
  4900. return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
  4901. (__v2di) __B, __U);
  4902. }
  4903. extern __inline __mmask8
  4904. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4905. _mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
  4906. {
  4907. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
  4908. (__v4di) __B, 0,
  4909. (__mmask8) -1);
  4910. }
  4911. extern __inline __mmask8
  4912. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4913. _mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
  4914. {
  4915. return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
  4916. (__v4di) __B,
  4917. (__mmask8) -1);
  4918. }
  4919. extern __inline __mmask8
  4920. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4921. _mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  4922. {
  4923. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
  4924. (__v4di) __B, 0, __U);
  4925. }
  4926. extern __inline __mmask8
  4927. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4928. _mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  4929. {
  4930. return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
  4931. (__v4di) __B, __U);
  4932. }
  4933. extern __inline __mmask8
  4934. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4935. _mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
  4936. {
  4937. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
  4938. (__v4si) __B, 6,
  4939. (__mmask8) -1);
  4940. }
  4941. extern __inline __mmask8
  4942. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4943. _mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
  4944. {
  4945. return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
  4946. (__v4si) __B,
  4947. (__mmask8) -1);
  4948. }
  4949. extern __inline __mmask8
  4950. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4951. _mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  4952. {
  4953. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
  4954. (__v4si) __B, 6, __U);
  4955. }
  4956. extern __inline __mmask8
  4957. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4958. _mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  4959. {
  4960. return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
  4961. (__v4si) __B, __U);
  4962. }
  4963. extern __inline __mmask8
  4964. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4965. _mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
  4966. {
  4967. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
  4968. (__v8si) __B, 6,
  4969. (__mmask8) -1);
  4970. }
  4971. extern __inline __mmask8
  4972. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4973. _mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
  4974. {
  4975. return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
  4976. (__v8si) __B,
  4977. (__mmask8) -1);
  4978. }
  4979. extern __inline __mmask8
  4980. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4981. _mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  4982. {
  4983. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
  4984. (__v8si) __B, 6, __U);
  4985. }
  4986. extern __inline __mmask8
  4987. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4988. _mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  4989. {
  4990. return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
  4991. (__v8si) __B, __U);
  4992. }
  4993. extern __inline __mmask8
  4994. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4995. _mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
  4996. {
  4997. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
  4998. (__v2di) __B, 6,
  4999. (__mmask8) -1);
  5000. }
  5001. extern __inline __mmask8
  5002. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5003. _mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
  5004. {
  5005. return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
  5006. (__v2di) __B,
  5007. (__mmask8) -1);
  5008. }
  5009. extern __inline __mmask8
  5010. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5011. _mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5012. {
  5013. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
  5014. (__v2di) __B, 6, __U);
  5015. }
  5016. extern __inline __mmask8
  5017. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5018. _mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5019. {
  5020. return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
  5021. (__v2di) __B, __U);
  5022. }
  5023. extern __inline __mmask8
  5024. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5025. _mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
  5026. {
  5027. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
  5028. (__v4di) __B, 6,
  5029. (__mmask8) -1);
  5030. }
  5031. extern __inline __mmask8
  5032. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5033. _mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
  5034. {
  5035. return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
  5036. (__v4di) __B,
  5037. (__mmask8) -1);
  5038. }
  5039. extern __inline __mmask8
  5040. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5041. _mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5042. {
  5043. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
  5044. (__v4di) __B, 6, __U);
  5045. }
  5046. extern __inline __mmask8
  5047. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5048. _mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5049. {
  5050. return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
  5051. (__v4di) __B, __U);
  5052. }
  5053. extern __inline __mmask8
  5054. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5055. _mm_test_epi32_mask (__m128i __A, __m128i __B)
  5056. {
  5057. return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
  5058. (__v4si) __B,
  5059. (__mmask8) -1);
  5060. }
  5061. extern __inline __mmask8
  5062. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5063. _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5064. {
  5065. return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
  5066. (__v4si) __B, __U);
  5067. }
  5068. extern __inline __mmask8
  5069. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5070. _mm256_test_epi32_mask (__m256i __A, __m256i __B)
  5071. {
  5072. return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
  5073. (__v8si) __B,
  5074. (__mmask8) -1);
  5075. }
  5076. extern __inline __mmask8
  5077. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5078. _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5079. {
  5080. return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
  5081. (__v8si) __B, __U);
  5082. }
  5083. extern __inline __mmask8
  5084. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5085. _mm_test_epi64_mask (__m128i __A, __m128i __B)
  5086. {
  5087. return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
  5088. (__v2di) __B,
  5089. (__mmask8) -1);
  5090. }
  5091. extern __inline __mmask8
  5092. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5093. _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5094. {
  5095. return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
  5096. (__v2di) __B, __U);
  5097. }
  5098. extern __inline __mmask8
  5099. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5100. _mm256_test_epi64_mask (__m256i __A, __m256i __B)
  5101. {
  5102. return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
  5103. (__v4di) __B,
  5104. (__mmask8) -1);
  5105. }
  5106. extern __inline __mmask8
  5107. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5108. _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5109. {
  5110. return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
  5111. (__v4di) __B, __U);
  5112. }
  5113. extern __inline __mmask8
  5114. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5115. _mm_testn_epi32_mask (__m128i __A, __m128i __B)
  5116. {
  5117. return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
  5118. (__v4si) __B,
  5119. (__mmask8) -1);
  5120. }
  5121. extern __inline __mmask8
  5122. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5123. _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5124. {
  5125. return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
  5126. (__v4si) __B, __U);
  5127. }
  5128. extern __inline __mmask8
  5129. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5130. _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
  5131. {
  5132. return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
  5133. (__v8si) __B,
  5134. (__mmask8) -1);
  5135. }
  5136. extern __inline __mmask8
  5137. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5138. _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5139. {
  5140. return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
  5141. (__v8si) __B, __U);
  5142. }
  5143. extern __inline __mmask8
  5144. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5145. _mm_testn_epi64_mask (__m128i __A, __m128i __B)
  5146. {
  5147. return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
  5148. (__v2di) __B,
  5149. (__mmask8) -1);
  5150. }
  5151. extern __inline __mmask8
  5152. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5153. _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5154. {
  5155. return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
  5156. (__v2di) __B, __U);
  5157. }
  5158. extern __inline __mmask8
  5159. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5160. _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
  5161. {
  5162. return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
  5163. (__v4di) __B,
  5164. (__mmask8) -1);
  5165. }
  5166. extern __inline __mmask8
  5167. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5168. _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5169. {
  5170. return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
  5171. (__v4di) __B, __U);
  5172. }
  5173. extern __inline __m256d
  5174. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5175. _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
  5176. {
  5177. return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
  5178. (__v4df) __W,
  5179. (__mmask8) __U);
  5180. }
  5181. extern __inline __m256d
  5182. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5183. _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
  5184. {
  5185. return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
  5186. (__v4df)
  5187. _mm256_setzero_pd (),
  5188. (__mmask8) __U);
  5189. }
  5190. extern __inline void
  5191. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5192. _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
  5193. {
  5194. __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
  5195. (__v4df) __A,
  5196. (__mmask8) __U);
  5197. }
  5198. extern __inline __m128d
  5199. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5200. _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
  5201. {
  5202. return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
  5203. (__v2df) __W,
  5204. (__mmask8) __U);
  5205. }
  5206. extern __inline __m128d
  5207. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5208. _mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
  5209. {
  5210. return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
  5211. (__v2df)
  5212. _mm_setzero_pd (),
  5213. (__mmask8) __U);
  5214. }
  5215. extern __inline void
  5216. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5217. _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
  5218. {
  5219. __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
  5220. (__v2df) __A,
  5221. (__mmask8) __U);
  5222. }
  5223. extern __inline __m256
  5224. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5225. _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
  5226. {
  5227. return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
  5228. (__v8sf) __W,
  5229. (__mmask8) __U);
  5230. }
  5231. extern __inline __m256
  5232. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5233. _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
  5234. {
  5235. return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
  5236. (__v8sf)
  5237. _mm256_setzero_ps (),
  5238. (__mmask8) __U);
  5239. }
  5240. extern __inline void
  5241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5242. _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
  5243. {
  5244. __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
  5245. (__v8sf) __A,
  5246. (__mmask8) __U);
  5247. }
  5248. extern __inline __m128
  5249. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5250. _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
  5251. {
  5252. return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
  5253. (__v4sf) __W,
  5254. (__mmask8) __U);
  5255. }
  5256. extern __inline __m128
  5257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5258. _mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
  5259. {
  5260. return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
  5261. (__v4sf)
  5262. _mm_setzero_ps (),
  5263. (__mmask8) __U);
  5264. }
  5265. extern __inline void
  5266. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5267. _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
  5268. {
  5269. __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
  5270. (__v4sf) __A,
  5271. (__mmask8) __U);
  5272. }
  5273. extern __inline __m256i
  5274. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5275. _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  5276. {
  5277. return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
  5278. (__v4di) __W,
  5279. (__mmask8) __U);
  5280. }
  5281. extern __inline __m256i
  5282. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5283. _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
  5284. {
  5285. return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
  5286. (__v4di)
  5287. _mm256_setzero_si256 (),
  5288. (__mmask8) __U);
  5289. }
  5290. extern __inline void
  5291. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5292. _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
  5293. {
  5294. __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
  5295. (__v4di) __A,
  5296. (__mmask8) __U);
  5297. }
  5298. extern __inline __m128i
  5299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5300. _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  5301. {
  5302. return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
  5303. (__v2di) __W,
  5304. (__mmask8) __U);
  5305. }
  5306. extern __inline __m128i
  5307. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5308. _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
  5309. {
  5310. return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
  5311. (__v2di)
  5312. _mm_setzero_si128 (),
  5313. (__mmask8) __U);
  5314. }
  5315. extern __inline void
  5316. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5317. _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
  5318. {
  5319. __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
  5320. (__v2di) __A,
  5321. (__mmask8) __U);
  5322. }
  5323. extern __inline __m256i
  5324. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5325. _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  5326. {
  5327. return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
  5328. (__v8si) __W,
  5329. (__mmask8) __U);
  5330. }
  5331. extern __inline __m256i
  5332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5333. _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
  5334. {
  5335. return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
  5336. (__v8si)
  5337. _mm256_setzero_si256 (),
  5338. (__mmask8) __U);
  5339. }
  5340. extern __inline void
  5341. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5342. _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
  5343. {
  5344. __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
  5345. (__v8si) __A,
  5346. (__mmask8) __U);
  5347. }
  5348. extern __inline __m128i
  5349. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5350. _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  5351. {
  5352. return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
  5353. (__v4si) __W,
  5354. (__mmask8) __U);
  5355. }
  5356. extern __inline __m128i
  5357. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5358. _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
  5359. {
  5360. return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
  5361. (__v4si)
  5362. _mm_setzero_si128 (),
  5363. (__mmask8) __U);
  5364. }
  5365. extern __inline void
  5366. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5367. _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
  5368. {
  5369. __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
  5370. (__v4si) __A,
  5371. (__mmask8) __U);
  5372. }
  5373. extern __inline __m256d
  5374. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5375. _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
  5376. {
  5377. return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
  5378. (__v4df) __W,
  5379. (__mmask8) __U);
  5380. }
  5381. extern __inline __m256d
  5382. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5383. _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
  5384. {
  5385. return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
  5386. (__v4df)
  5387. _mm256_setzero_pd (),
  5388. (__mmask8) __U);
  5389. }
  5390. extern __inline __m256d
  5391. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5392. _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
  5393. {
  5394. return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
  5395. (__v4df) __W,
  5396. (__mmask8)
  5397. __U);
  5398. }
  5399. extern __inline __m256d
  5400. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5401. _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
  5402. {
  5403. return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
  5404. (__v4df)
  5405. _mm256_setzero_pd (),
  5406. (__mmask8)
  5407. __U);
  5408. }
  5409. extern __inline __m128d
  5410. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5411. _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
  5412. {
  5413. return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
  5414. (__v2df) __W,
  5415. (__mmask8) __U);
  5416. }
  5417. extern __inline __m128d
  5418. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5419. _mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
  5420. {
  5421. return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
  5422. (__v2df)
  5423. _mm_setzero_pd (),
  5424. (__mmask8) __U);
  5425. }
  5426. extern __inline __m128d
  5427. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5428. _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
  5429. {
  5430. return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
  5431. (__v2df) __W,
  5432. (__mmask8)
  5433. __U);
  5434. }
  5435. extern __inline __m128d
  5436. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5437. _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
  5438. {
  5439. return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
  5440. (__v2df)
  5441. _mm_setzero_pd (),
  5442. (__mmask8)
  5443. __U);
  5444. }
  5445. extern __inline __m256
  5446. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5447. _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
  5448. {
  5449. return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
  5450. (__v8sf) __W,
  5451. (__mmask8) __U);
  5452. }
  5453. extern __inline __m256
  5454. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5455. _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
  5456. {
  5457. return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
  5458. (__v8sf)
  5459. _mm256_setzero_ps (),
  5460. (__mmask8) __U);
  5461. }
  5462. extern __inline __m256
  5463. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5464. _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
  5465. {
  5466. return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
  5467. (__v8sf) __W,
  5468. (__mmask8) __U);
  5469. }
  5470. extern __inline __m256
  5471. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5472. _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
  5473. {
  5474. return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
  5475. (__v8sf)
  5476. _mm256_setzero_ps (),
  5477. (__mmask8)
  5478. __U);
  5479. }
  5480. extern __inline __m128
  5481. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5482. _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
  5483. {
  5484. return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
  5485. (__v4sf) __W,
  5486. (__mmask8) __U);
  5487. }
  5488. extern __inline __m128
  5489. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5490. _mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
  5491. {
  5492. return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
  5493. (__v4sf)
  5494. _mm_setzero_ps (),
  5495. (__mmask8) __U);
  5496. }
  5497. extern __inline __m128
  5498. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5499. _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
  5500. {
  5501. return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
  5502. (__v4sf) __W,
  5503. (__mmask8) __U);
  5504. }
  5505. extern __inline __m128
  5506. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5507. _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
  5508. {
  5509. return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
  5510. (__v4sf)
  5511. _mm_setzero_ps (),
  5512. (__mmask8)
  5513. __U);
  5514. }
  5515. extern __inline __m256i
  5516. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5517. _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  5518. {
  5519. return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
  5520. (__v4di) __W,
  5521. (__mmask8) __U);
  5522. }
  5523. extern __inline __m256i
  5524. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5525. _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
  5526. {
  5527. return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
  5528. (__v4di)
  5529. _mm256_setzero_si256 (),
  5530. (__mmask8) __U);
  5531. }
  5532. extern __inline __m256i
  5533. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5534. _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
  5535. void const *__P)
  5536. {
  5537. return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
  5538. (__v4di) __W,
  5539. (__mmask8)
  5540. __U);
  5541. }
  5542. extern __inline __m256i
  5543. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5544. _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
  5545. {
  5546. return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
  5547. (__v4di)
  5548. _mm256_setzero_si256 (),
  5549. (__mmask8)
  5550. __U);
  5551. }
  5552. extern __inline __m128i
  5553. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5554. _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  5555. {
  5556. return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
  5557. (__v2di) __W,
  5558. (__mmask8) __U);
  5559. }
  5560. extern __inline __m128i
  5561. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5562. _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
  5563. {
  5564. return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
  5565. (__v2di)
  5566. _mm_setzero_si128 (),
  5567. (__mmask8) __U);
  5568. }
  5569. extern __inline __m128i
  5570. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5571. _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
  5572. {
  5573. return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
  5574. (__v2di) __W,
  5575. (__mmask8)
  5576. __U);
  5577. }
  5578. extern __inline __m128i
  5579. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5580. _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
  5581. {
  5582. return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
  5583. (__v2di)
  5584. _mm_setzero_si128 (),
  5585. (__mmask8)
  5586. __U);
  5587. }
  5588. extern __inline __m256i
  5589. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5590. _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  5591. {
  5592. return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
  5593. (__v8si) __W,
  5594. (__mmask8) __U);
  5595. }
  5596. extern __inline __m256i
  5597. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5598. _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
  5599. {
  5600. return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
  5601. (__v8si)
  5602. _mm256_setzero_si256 (),
  5603. (__mmask8) __U);
  5604. }
  5605. extern __inline __m256i
  5606. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5607. _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
  5608. void const *__P)
  5609. {
  5610. return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
  5611. (__v8si) __W,
  5612. (__mmask8)
  5613. __U);
  5614. }
  5615. extern __inline __m256i
  5616. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5617. _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
  5618. {
  5619. return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
  5620. (__v8si)
  5621. _mm256_setzero_si256 (),
  5622. (__mmask8)
  5623. __U);
  5624. }
  5625. extern __inline __m128i
  5626. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5627. _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  5628. {
  5629. return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
  5630. (__v4si) __W,
  5631. (__mmask8) __U);
  5632. }
  5633. extern __inline __m128i
  5634. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5635. _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
  5636. {
  5637. return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
  5638. (__v4si)
  5639. _mm_setzero_si128 (),
  5640. (__mmask8) __U);
  5641. }
  5642. extern __inline __m128i
  5643. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5644. _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
  5645. {
  5646. return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
  5647. (__v4si) __W,
  5648. (__mmask8)
  5649. __U);
  5650. }
  5651. extern __inline __m128i
  5652. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5653. _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
  5654. {
  5655. return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
  5656. (__v4si)
  5657. _mm_setzero_si128 (),
  5658. (__mmask8)
  5659. __U);
  5660. }
  5661. extern __inline __m256d
  5662. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5663. _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
  5664. {
  5665. return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
  5666. /* idx */ ,
  5667. (__v4df) __A,
  5668. (__v4df) __B,
  5669. (__mmask8) -1);
  5670. }
  5671. extern __inline __m256d
  5672. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5673. _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
  5674. __m256d __B)
  5675. {
  5676. return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
  5677. /* idx */ ,
  5678. (__v4df) __A,
  5679. (__v4df) __B,
  5680. (__mmask8)
  5681. __U);
  5682. }
  5683. extern __inline __m256d
  5684. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5685. _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
  5686. __m256d __B)
  5687. {
  5688. return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
  5689. (__v4di) __I
  5690. /* idx */ ,
  5691. (__v4df) __B,
  5692. (__mmask8)
  5693. __U);
  5694. }
  5695. extern __inline __m256d
  5696. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5697. _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
  5698. __m256d __B)
  5699. {
  5700. return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
  5701. /* idx */ ,
  5702. (__v4df) __A,
  5703. (__v4df) __B,
  5704. (__mmask8)
  5705. __U);
  5706. }
  5707. extern __inline __m256
  5708. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5709. _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
  5710. {
  5711. return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
  5712. /* idx */ ,
  5713. (__v8sf) __A,
  5714. (__v8sf) __B,
  5715. (__mmask8) -1);
  5716. }
  5717. extern __inline __m256
  5718. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5719. _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
  5720. __m256 __B)
  5721. {
  5722. return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
  5723. /* idx */ ,
  5724. (__v8sf) __A,
  5725. (__v8sf) __B,
  5726. (__mmask8) __U);
  5727. }
  5728. extern __inline __m256
  5729. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5730. _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
  5731. __m256 __B)
  5732. {
  5733. return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
  5734. (__v8si) __I
  5735. /* idx */ ,
  5736. (__v8sf) __B,
  5737. (__mmask8) __U);
  5738. }
  5739. extern __inline __m256
  5740. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5741. _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
  5742. __m256 __B)
  5743. {
  5744. return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
  5745. /* idx */ ,
  5746. (__v8sf) __A,
  5747. (__v8sf) __B,
  5748. (__mmask8)
  5749. __U);
  5750. }
  5751. extern __inline __m128i
  5752. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5753. _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
  5754. {
  5755. return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
  5756. /* idx */ ,
  5757. (__v2di) __A,
  5758. (__v2di) __B,
  5759. (__mmask8) -1);
  5760. }
  5761. extern __inline __m128i
  5762. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5763. _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
  5764. __m128i __B)
  5765. {
  5766. return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
  5767. /* idx */ ,
  5768. (__v2di) __A,
  5769. (__v2di) __B,
  5770. (__mmask8) __U);
  5771. }
  5772. extern __inline __m128i
  5773. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5774. _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
  5775. __m128i __B)
  5776. {
  5777. return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
  5778. (__v2di) __I
  5779. /* idx */ ,
  5780. (__v2di) __B,
  5781. (__mmask8) __U);
  5782. }
  5783. extern __inline __m128i
  5784. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5785. _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
  5786. __m128i __B)
  5787. {
  5788. return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
  5789. /* idx */ ,
  5790. (__v2di) __A,
  5791. (__v2di) __B,
  5792. (__mmask8)
  5793. __U);
  5794. }
  5795. extern __inline __m128i
  5796. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5797. _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
  5798. {
  5799. return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
  5800. /* idx */ ,
  5801. (__v4si) __A,
  5802. (__v4si) __B,
  5803. (__mmask8) -1);
  5804. }
  5805. extern __inline __m128i
  5806. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5807. _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
  5808. __m128i __B)
  5809. {
  5810. return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
  5811. /* idx */ ,
  5812. (__v4si) __A,
  5813. (__v4si) __B,
  5814. (__mmask8) __U);
  5815. }
  5816. extern __inline __m128i
  5817. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5818. _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
  5819. __m128i __B)
  5820. {
  5821. return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
  5822. (__v4si) __I
  5823. /* idx */ ,
  5824. (__v4si) __B,
  5825. (__mmask8) __U);
  5826. }
  5827. extern __inline __m128i
  5828. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5829. _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
  5830. __m128i __B)
  5831. {
  5832. return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
  5833. /* idx */ ,
  5834. (__v4si) __A,
  5835. (__v4si) __B,
  5836. (__mmask8)
  5837. __U);
  5838. }
  5839. extern __inline __m256i
  5840. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5841. _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
  5842. {
  5843. return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
  5844. /* idx */ ,
  5845. (__v4di) __A,
  5846. (__v4di) __B,
  5847. (__mmask8) -1);
  5848. }
  5849. extern __inline __m256i
  5850. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5851. _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
  5852. __m256i __B)
  5853. {
  5854. return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
  5855. /* idx */ ,
  5856. (__v4di) __A,
  5857. (__v4di) __B,
  5858. (__mmask8) __U);
  5859. }
  5860. extern __inline __m256i
  5861. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5862. _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
  5863. __mmask8 __U, __m256i __B)
  5864. {
  5865. return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
  5866. (__v4di) __I
  5867. /* idx */ ,
  5868. (__v4di) __B,
  5869. (__mmask8) __U);
  5870. }
  5871. extern __inline __m256i
  5872. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5873. _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
  5874. __m256i __I, __m256i __B)
  5875. {
  5876. return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
  5877. /* idx */ ,
  5878. (__v4di) __A,
  5879. (__v4di) __B,
  5880. (__mmask8)
  5881. __U);
  5882. }
  5883. extern __inline __m256i
  5884. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5885. _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
  5886. {
  5887. return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
  5888. /* idx */ ,
  5889. (__v8si) __A,
  5890. (__v8si) __B,
  5891. (__mmask8) -1);
  5892. }
  5893. extern __inline __m256i
  5894. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5895. _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
  5896. __m256i __B)
  5897. {
  5898. return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
  5899. /* idx */ ,
  5900. (__v8si) __A,
  5901. (__v8si) __B,
  5902. (__mmask8) __U);
  5903. }
  5904. extern __inline __m256i
  5905. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5906. _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
  5907. __mmask8 __U, __m256i __B)
  5908. {
  5909. return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
  5910. (__v8si) __I
  5911. /* idx */ ,
  5912. (__v8si) __B,
  5913. (__mmask8) __U);
  5914. }
  5915. extern __inline __m256i
  5916. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5917. _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
  5918. __m256i __I, __m256i __B)
  5919. {
  5920. return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
  5921. /* idx */ ,
  5922. (__v8si) __A,
  5923. (__v8si) __B,
  5924. (__mmask8)
  5925. __U);
  5926. }
  5927. extern __inline __m128d
  5928. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5929. _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
  5930. {
  5931. return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
  5932. /* idx */ ,
  5933. (__v2df) __A,
  5934. (__v2df) __B,
  5935. (__mmask8) -1);
  5936. }
  5937. extern __inline __m128d
  5938. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5939. _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
  5940. __m128d __B)
  5941. {
  5942. return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
  5943. /* idx */ ,
  5944. (__v2df) __A,
  5945. (__v2df) __B,
  5946. (__mmask8)
  5947. __U);
  5948. }
  5949. extern __inline __m128d
  5950. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5951. _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
  5952. __m128d __B)
  5953. {
  5954. return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
  5955. (__v2di) __I
  5956. /* idx */ ,
  5957. (__v2df) __B,
  5958. (__mmask8)
  5959. __U);
  5960. }
  5961. extern __inline __m128d
  5962. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5963. _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
  5964. __m128d __B)
  5965. {
  5966. return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
  5967. /* idx */ ,
  5968. (__v2df) __A,
  5969. (__v2df) __B,
  5970. (__mmask8)
  5971. __U);
  5972. }
  5973. extern __inline __m128
  5974. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5975. _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
  5976. {
  5977. return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
  5978. /* idx */ ,
  5979. (__v4sf) __A,
  5980. (__v4sf) __B,
  5981. (__mmask8) -1);
  5982. }
  5983. extern __inline __m128
  5984. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5985. _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
  5986. __m128 __B)
  5987. {
  5988. return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
  5989. /* idx */ ,
  5990. (__v4sf) __A,
  5991. (__v4sf) __B,
  5992. (__mmask8) __U);
  5993. }
  5994. extern __inline __m128
  5995. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5996. _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
  5997. __m128 __B)
  5998. {
  5999. return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
  6000. (__v4si) __I
  6001. /* idx */ ,
  6002. (__v4sf) __B,
  6003. (__mmask8) __U);
  6004. }
  6005. extern __inline __m128
  6006. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6007. _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
  6008. __m128 __B)
  6009. {
  6010. return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
  6011. /* idx */ ,
  6012. (__v4sf) __A,
  6013. (__v4sf) __B,
  6014. (__mmask8)
  6015. __U);
  6016. }
  6017. extern __inline __m128i
  6018. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6019. _mm_srav_epi64 (__m128i __X, __m128i __Y)
  6020. {
  6021. return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
  6022. (__v2di) __Y,
  6023. (__v2di)
  6024. _mm_setzero_si128 (),
  6025. (__mmask8) -1);
  6026. }
  6027. extern __inline __m128i
  6028. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6029. _mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
  6030. __m128i __Y)
  6031. {
  6032. return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
  6033. (__v2di) __Y,
  6034. (__v2di) __W,
  6035. (__mmask8) __U);
  6036. }
  6037. extern __inline __m128i
  6038. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6039. _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
  6040. {
  6041. return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
  6042. (__v2di) __Y,
  6043. (__v2di)
  6044. _mm_setzero_si128 (),
  6045. (__mmask8) __U);
  6046. }
  6047. extern __inline __m256i
  6048. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6049. _mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
  6050. __m256i __Y)
  6051. {
  6052. return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
  6053. (__v8si) __Y,
  6054. (__v8si) __W,
  6055. (__mmask8) __U);
  6056. }
  6057. extern __inline __m256i
  6058. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6059. _mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
  6060. {
  6061. return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
  6062. (__v8si) __Y,
  6063. (__v8si)
  6064. _mm256_setzero_si256 (),
  6065. (__mmask8) __U);
  6066. }
  6067. extern __inline __m128i
  6068. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6069. _mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
  6070. __m128i __Y)
  6071. {
  6072. return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
  6073. (__v4si) __Y,
  6074. (__v4si) __W,
  6075. (__mmask8) __U);
  6076. }
  6077. extern __inline __m128i
  6078. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6079. _mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
  6080. {
  6081. return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
  6082. (__v4si) __Y,
  6083. (__v4si)
  6084. _mm_setzero_si128 (),
  6085. (__mmask8) __U);
  6086. }
  6087. extern __inline __m256i
  6088. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6089. _mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
  6090. __m256i __Y)
  6091. {
  6092. return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
  6093. (__v4di) __Y,
  6094. (__v4di) __W,
  6095. (__mmask8) __U);
  6096. }
  6097. extern __inline __m256i
  6098. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6099. _mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
  6100. {
  6101. return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
  6102. (__v4di) __Y,
  6103. (__v4di)
  6104. _mm256_setzero_si256 (),
  6105. (__mmask8) __U);
  6106. }
  6107. extern __inline __m128i
  6108. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6109. _mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
  6110. __m128i __Y)
  6111. {
  6112. return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
  6113. (__v2di) __Y,
  6114. (__v2di) __W,
  6115. (__mmask8) __U);
  6116. }
  6117. extern __inline __m128i
  6118. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6119. _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
  6120. {
  6121. return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
  6122. (__v2di) __Y,
  6123. (__v2di)
  6124. _mm_setzero_si128 (),
  6125. (__mmask8) __U);
  6126. }
  6127. extern __inline __m256i
  6128. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6129. _mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
  6130. __m256i __Y)
  6131. {
  6132. return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
  6133. (__v8si) __Y,
  6134. (__v8si) __W,
  6135. (__mmask8) __U);
  6136. }
  6137. extern __inline __m256i
  6138. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6139. _mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
  6140. {
  6141. return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
  6142. (__v8si) __Y,
  6143. (__v8si)
  6144. _mm256_setzero_si256 (),
  6145. (__mmask8) __U);
  6146. }
  6147. extern __inline __m128i
  6148. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6149. _mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
  6150. __m128i __Y)
  6151. {
  6152. return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
  6153. (__v4si) __Y,
  6154. (__v4si) __W,
  6155. (__mmask8) __U);
  6156. }
  6157. extern __inline __m128i
  6158. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6159. _mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
  6160. {
  6161. return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
  6162. (__v4si) __Y,
  6163. (__v4si)
  6164. _mm_setzero_si128 (),
  6165. (__mmask8) __U);
  6166. }
  6167. extern __inline __m256i
  6168. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6169. _mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
  6170. __m256i __Y)
  6171. {
  6172. return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
  6173. (__v8si) __Y,
  6174. (__v8si) __W,
  6175. (__mmask8) __U);
  6176. }
  6177. extern __inline __m256i
  6178. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6179. _mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
  6180. {
  6181. return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
  6182. (__v8si) __Y,
  6183. (__v8si)
  6184. _mm256_setzero_si256 (),
  6185. (__mmask8) __U);
  6186. }
  6187. extern __inline __m128i
  6188. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6189. _mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
  6190. __m128i __Y)
  6191. {
  6192. return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
  6193. (__v4si) __Y,
  6194. (__v4si) __W,
  6195. (__mmask8) __U);
  6196. }
  6197. extern __inline __m128i
  6198. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6199. _mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
  6200. {
  6201. return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
  6202. (__v4si) __Y,
  6203. (__v4si)
  6204. _mm_setzero_si128 (),
  6205. (__mmask8) __U);
  6206. }
  6207. extern __inline __m256i
  6208. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6209. _mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
  6210. __m256i __Y)
  6211. {
  6212. return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
  6213. (__v4di) __Y,
  6214. (__v4di) __W,
  6215. (__mmask8) __U);
  6216. }
  6217. extern __inline __m256i
  6218. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6219. _mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
  6220. {
  6221. return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
  6222. (__v4di) __Y,
  6223. (__v4di)
  6224. _mm256_setzero_si256 (),
  6225. (__mmask8) __U);
  6226. }
  6227. extern __inline __m128i
  6228. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6229. _mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
  6230. __m128i __Y)
  6231. {
  6232. return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
  6233. (__v2di) __Y,
  6234. (__v2di) __W,
  6235. (__mmask8) __U);
  6236. }
  6237. extern __inline __m128i
  6238. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6239. _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
  6240. {
  6241. return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
  6242. (__v2di) __Y,
  6243. (__v2di)
  6244. _mm_setzero_si128 (),
  6245. (__mmask8) __U);
  6246. }
  6247. extern __inline __m256i
  6248. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6249. _mm256_rolv_epi32 (__m256i __A, __m256i __B)
  6250. {
  6251. return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
  6252. (__v8si) __B,
  6253. (__v8si)
  6254. _mm256_setzero_si256 (),
  6255. (__mmask8) -1);
  6256. }
  6257. extern __inline __m256i
  6258. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6259. _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  6260. __m256i __B)
  6261. {
  6262. return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
  6263. (__v8si) __B,
  6264. (__v8si) __W,
  6265. (__mmask8) __U);
  6266. }
  6267. extern __inline __m256i
  6268. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6269. _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  6270. {
  6271. return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
  6272. (__v8si) __B,
  6273. (__v8si)
  6274. _mm256_setzero_si256 (),
  6275. (__mmask8) __U);
  6276. }
  6277. extern __inline __m128i
  6278. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6279. _mm_rolv_epi32 (__m128i __A, __m128i __B)
  6280. {
  6281. return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
  6282. (__v4si) __B,
  6283. (__v4si)
  6284. _mm_setzero_si128 (),
  6285. (__mmask8) -1);
  6286. }
  6287. extern __inline __m128i
  6288. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6289. _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  6290. __m128i __B)
  6291. {
  6292. return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
  6293. (__v4si) __B,
  6294. (__v4si) __W,
  6295. (__mmask8) __U);
  6296. }
  6297. extern __inline __m128i
  6298. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6299. _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  6300. {
  6301. return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
  6302. (__v4si) __B,
  6303. (__v4si)
  6304. _mm_setzero_si128 (),
  6305. (__mmask8) __U);
  6306. }
  6307. extern __inline __m256i
  6308. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6309. _mm256_rorv_epi32 (__m256i __A, __m256i __B)
  6310. {
  6311. return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
  6312. (__v8si) __B,
  6313. (__v8si)
  6314. _mm256_setzero_si256 (),
  6315. (__mmask8) -1);
  6316. }
  6317. extern __inline __m256i
  6318. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6319. _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  6320. __m256i __B)
  6321. {
  6322. return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
  6323. (__v8si) __B,
  6324. (__v8si) __W,
  6325. (__mmask8) __U);
  6326. }
  6327. extern __inline __m256i
  6328. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6329. _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  6330. {
  6331. return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
  6332. (__v8si) __B,
  6333. (__v8si)
  6334. _mm256_setzero_si256 (),
  6335. (__mmask8) __U);
  6336. }
  6337. extern __inline __m128i
  6338. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6339. _mm_rorv_epi32 (__m128i __A, __m128i __B)
  6340. {
  6341. return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
  6342. (__v4si) __B,
  6343. (__v4si)
  6344. _mm_setzero_si128 (),
  6345. (__mmask8) -1);
  6346. }
  6347. extern __inline __m128i
  6348. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6349. _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  6350. __m128i __B)
  6351. {
  6352. return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
  6353. (__v4si) __B,
  6354. (__v4si) __W,
  6355. (__mmask8) __U);
  6356. }
  6357. extern __inline __m128i
  6358. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6359. _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  6360. {
  6361. return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
  6362. (__v4si) __B,
  6363. (__v4si)
  6364. _mm_setzero_si128 (),
  6365. (__mmask8) __U);
  6366. }
  6367. extern __inline __m256i
  6368. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6369. _mm256_rolv_epi64 (__m256i __A, __m256i __B)
  6370. {
  6371. return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
  6372. (__v4di) __B,
  6373. (__v4di)
  6374. _mm256_setzero_si256 (),
  6375. (__mmask8) -1);
  6376. }
  6377. extern __inline __m256i
  6378. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6379. _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6380. __m256i __B)
  6381. {
  6382. return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
  6383. (__v4di) __B,
  6384. (__v4di) __W,
  6385. (__mmask8) __U);
  6386. }
  6387. extern __inline __m256i
  6388. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6389. _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6390. {
  6391. return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
  6392. (__v4di) __B,
  6393. (__v4di)
  6394. _mm256_setzero_si256 (),
  6395. (__mmask8) __U);
  6396. }
  6397. extern __inline __m128i
  6398. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6399. _mm_rolv_epi64 (__m128i __A, __m128i __B)
  6400. {
  6401. return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
  6402. (__v2di) __B,
  6403. (__v2di)
  6404. _mm_setzero_si128 (),
  6405. (__mmask8) -1);
  6406. }
  6407. extern __inline __m128i
  6408. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6409. _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  6410. __m128i __B)
  6411. {
  6412. return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
  6413. (__v2di) __B,
  6414. (__v2di) __W,
  6415. (__mmask8) __U);
  6416. }
  6417. extern __inline __m128i
  6418. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6419. _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6420. {
  6421. return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
  6422. (__v2di) __B,
  6423. (__v2di)
  6424. _mm_setzero_si128 (),
  6425. (__mmask8) __U);
  6426. }
  6427. extern __inline __m256i
  6428. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6429. _mm256_rorv_epi64 (__m256i __A, __m256i __B)
  6430. {
  6431. return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
  6432. (__v4di) __B,
  6433. (__v4di)
  6434. _mm256_setzero_si256 (),
  6435. (__mmask8) -1);
  6436. }
  6437. extern __inline __m256i
  6438. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6439. _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6440. __m256i __B)
  6441. {
  6442. return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
  6443. (__v4di) __B,
  6444. (__v4di) __W,
  6445. (__mmask8) __U);
  6446. }
  6447. extern __inline __m256i
  6448. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6449. _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6450. {
  6451. return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
  6452. (__v4di) __B,
  6453. (__v4di)
  6454. _mm256_setzero_si256 (),
  6455. (__mmask8) __U);
  6456. }
  6457. extern __inline __m128i
  6458. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6459. _mm_rorv_epi64 (__m128i __A, __m128i __B)
  6460. {
  6461. return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
  6462. (__v2di) __B,
  6463. (__v2di)
  6464. _mm_setzero_si128 (),
  6465. (__mmask8) -1);
  6466. }
  6467. extern __inline __m128i
  6468. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6469. _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  6470. __m128i __B)
  6471. {
  6472. return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
  6473. (__v2di) __B,
  6474. (__v2di) __W,
  6475. (__mmask8) __U);
  6476. }
  6477. extern __inline __m128i
  6478. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6479. _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6480. {
  6481. return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
  6482. (__v2di) __B,
  6483. (__v2di)
  6484. _mm_setzero_si128 (),
  6485. (__mmask8) __U);
  6486. }
  6487. extern __inline __m256i
  6488. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6489. _mm256_srav_epi64 (__m256i __X, __m256i __Y)
  6490. {
  6491. return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
  6492. (__v4di) __Y,
  6493. (__v4di)
  6494. _mm256_setzero_si256 (),
  6495. (__mmask8) -1);
  6496. }
  6497. extern __inline __m256i
  6498. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6499. _mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
  6500. __m256i __Y)
  6501. {
  6502. return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
  6503. (__v4di) __Y,
  6504. (__v4di) __W,
  6505. (__mmask8) __U);
  6506. }
  6507. extern __inline __m256i
  6508. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6509. _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
  6510. {
  6511. return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
  6512. (__v4di) __Y,
  6513. (__v4di)
  6514. _mm256_setzero_si256 (),
  6515. (__mmask8) __U);
  6516. }
  6517. extern __inline __m256i
  6518. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6519. _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6520. __m256i __B)
  6521. {
  6522. return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
  6523. (__v4di) __B,
  6524. (__v4di) __W, __U);
  6525. }
  6526. extern __inline __m256i
  6527. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6528. _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6529. {
  6530. return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
  6531. (__v4di) __B,
  6532. (__v4di)
  6533. _mm256_setzero_pd (),
  6534. __U);
  6535. }
  6536. extern __inline __m128i
  6537. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6538. _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  6539. __m128i __B)
  6540. {
  6541. return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
  6542. (__v2di) __B,
  6543. (__v2di) __W, __U);
  6544. }
  6545. extern __inline __m128i
  6546. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6547. _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6548. {
  6549. return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
  6550. (__v2di) __B,
  6551. (__v2di)
  6552. _mm_setzero_pd (),
  6553. __U);
  6554. }
  6555. extern __inline __m256i
  6556. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6557. _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6558. __m256i __B)
  6559. {
  6560. return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
  6561. (__v4di) __B,
  6562. (__v4di) __W, __U);
  6563. }
  6564. extern __inline __m256i
  6565. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6566. _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6567. {
  6568. return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
  6569. (__v4di) __B,
  6570. (__v4di)
  6571. _mm256_setzero_pd (),
  6572. __U);
  6573. }
  6574. extern __inline __m128i
  6575. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6576. _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  6577. __m128i __B)
  6578. {
  6579. return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
  6580. (__v2di) __B,
  6581. (__v2di) __W, __U);
  6582. }
  6583. extern __inline __m128i
  6584. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6585. _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6586. {
  6587. return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
  6588. (__v2di) __B,
  6589. (__v2di)
  6590. _mm_setzero_pd (),
  6591. __U);
  6592. }
  6593. extern __inline __m256i
  6594. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6595. _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6596. __m256i __B)
  6597. {
  6598. return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
  6599. (__v4di) __B,
  6600. (__v4di) __W,
  6601. (__mmask8) __U);
  6602. }
  6603. extern __inline __m256i
  6604. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6605. _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6606. {
  6607. return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
  6608. (__v4di) __B,
  6609. (__v4di)
  6610. _mm256_setzero_si256 (),
  6611. (__mmask8) __U);
  6612. }
  6613. extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  6614. _mm256_or_epi64 (__m256i __A, __m256i __B)
  6615. {
  6616. return (__m256i) ((__v4du)__A | (__v4du)__B);
  6617. }
  6618. extern __inline __m128i
  6619. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6620. _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  6621. {
  6622. return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
  6623. (__v2di) __B,
  6624. (__v2di) __W,
  6625. (__mmask8) __U);
  6626. }
  6627. extern __inline __m128i
  6628. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6629. _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6630. {
  6631. return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
  6632. (__v2di) __B,
  6633. (__v2di)
  6634. _mm_setzero_si128 (),
  6635. (__mmask8) __U);
  6636. }
  6637. extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  6638. _mm_or_epi64 (__m128i __A, __m128i __B)
  6639. {
  6640. return (__m128i) ((__v2du)__A | (__v2du)__B);
  6641. }
  6642. extern __inline __m256i
  6643. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6644. _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  6645. __m256i __B)
  6646. {
  6647. return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
  6648. (__v4di) __B,
  6649. (__v4di) __W,
  6650. (__mmask8) __U);
  6651. }
  6652. extern __inline __m256i
  6653. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6654. _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  6655. {
  6656. return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
  6657. (__v4di) __B,
  6658. (__v4di)
  6659. _mm256_setzero_si256 (),
  6660. (__mmask8) __U);
  6661. }
  6662. extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  6663. _mm256_xor_epi64 (__m256i __A, __m256i __B)
  6664. {
  6665. return (__m256i) ((__v4du)__A ^ (__v4du)__B);
  6666. }
  6667. extern __inline __m128i
  6668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6669. _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  6670. __m128i __B)
  6671. {
  6672. return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
  6673. (__v2di) __B,
  6674. (__v2di) __W,
  6675. (__mmask8) __U);
  6676. }
  6677. extern __inline __m128i
  6678. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6679. _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  6680. {
  6681. return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
  6682. (__v2di) __B,
  6683. (__v2di)
  6684. _mm_setzero_si128 (),
  6685. (__mmask8) __U);
  6686. }
  6687. extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  6688. _mm_xor_epi64 (__m128i __A, __m128i __B)
  6689. {
  6690. return (__m128i) ((__v2du)__A ^ (__v2du)__B);
  6691. }
  6692. extern __inline __m256d
  6693. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6694. _mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
  6695. __m256d __B)
  6696. {
  6697. return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
  6698. (__v4df) __B,
  6699. (__v4df) __W,
  6700. (__mmask8) __U);
  6701. }
  6702. extern __inline __m256d
  6703. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6704. _mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
  6705. {
  6706. return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
  6707. (__v4df) __B,
  6708. (__v4df)
  6709. _mm256_setzero_pd (),
  6710. (__mmask8) __U);
  6711. }
  6712. extern __inline __m256
  6713. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6714. _mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  6715. {
  6716. return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
  6717. (__v8sf) __B,
  6718. (__v8sf) __W,
  6719. (__mmask8) __U);
  6720. }
  6721. extern __inline __m256
  6722. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6723. _mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
  6724. {
  6725. return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
  6726. (__v8sf) __B,
  6727. (__v8sf)
  6728. _mm256_setzero_ps (),
  6729. (__mmask8) __U);
  6730. }
  6731. extern __inline __m128
  6732. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6733. _mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  6734. {
  6735. return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
  6736. (__v4sf) __B,
  6737. (__v4sf) __W,
  6738. (__mmask8) __U);
  6739. }
  6740. extern __inline __m128
  6741. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6742. _mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
  6743. {
  6744. return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
  6745. (__v4sf) __B,
  6746. (__v4sf)
  6747. _mm_setzero_ps (),
  6748. (__mmask8) __U);
  6749. }
  6750. extern __inline __m128d
  6751. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6752. _mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  6753. {
  6754. return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
  6755. (__v2df) __B,
  6756. (__v2df) __W,
  6757. (__mmask8) __U);
  6758. }
  6759. extern __inline __m128d
  6760. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6761. _mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
  6762. {
  6763. return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
  6764. (__v2df) __B,
  6765. (__v2df)
  6766. _mm_setzero_pd (),
  6767. (__mmask8) __U);
  6768. }
  6769. extern __inline __m256d
  6770. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6771. _mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
  6772. __m256d __B)
  6773. {
  6774. return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
  6775. (__v4df) __B,
  6776. (__v4df) __W,
  6777. (__mmask8) __U);
  6778. }
  6779. extern __inline __m256d
  6780. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6781. _mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
  6782. __m256d __B)
  6783. {
  6784. return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
  6785. (__v4df) __B,
  6786. (__v4df) __W,
  6787. (__mmask8) __U);
  6788. }
  6789. extern __inline __m256d
  6790. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6791. _mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
  6792. {
  6793. return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
  6794. (__v4df) __B,
  6795. (__v4df)
  6796. _mm256_setzero_pd (),
  6797. (__mmask8) __U);
  6798. }
  6799. extern __inline __m256
  6800. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6801. _mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  6802. {
  6803. return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
  6804. (__v8sf) __B,
  6805. (__v8sf) __W,
  6806. (__mmask8) __U);
  6807. }
  6808. extern __inline __m256d
  6809. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6810. _mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
  6811. {
  6812. return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
  6813. (__v4df) __B,
  6814. (__v4df)
  6815. _mm256_setzero_pd (),
  6816. (__mmask8) __U);
  6817. }
  6818. extern __inline __m256
  6819. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6820. _mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  6821. {
  6822. return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
  6823. (__v8sf) __B,
  6824. (__v8sf) __W,
  6825. (__mmask8) __U);
  6826. }
  6827. extern __inline __m256
  6828. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6829. _mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
  6830. {
  6831. return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
  6832. (__v8sf) __B,
  6833. (__v8sf)
  6834. _mm256_setzero_ps (),
  6835. (__mmask8) __U);
  6836. }
  6837. extern __inline __m256
  6838. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6839. _mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
  6840. {
  6841. return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
  6842. (__v8sf) __B,
  6843. (__v8sf)
  6844. _mm256_setzero_ps (),
  6845. (__mmask8) __U);
  6846. }
  6847. extern __inline __m128
  6848. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6849. _mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  6850. {
  6851. return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
  6852. (__v4sf) __B,
  6853. (__v4sf) __W,
  6854. (__mmask8) __U);
  6855. }
  6856. extern __inline __m128
  6857. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6858. _mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  6859. {
  6860. return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
  6861. (__v4sf) __B,
  6862. (__v4sf) __W,
  6863. (__mmask8) __U);
  6864. }
  6865. extern __inline __m128
  6866. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6867. _mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
  6868. {
  6869. return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
  6870. (__v4sf) __B,
  6871. (__v4sf)
  6872. _mm_setzero_ps (),
  6873. (__mmask8) __U);
  6874. }
  6875. extern __inline __m128
  6876. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6877. _mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
  6878. {
  6879. return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
  6880. (__v4sf) __B,
  6881. (__v4sf)
  6882. _mm_setzero_ps (),
  6883. (__mmask8) __U);
  6884. }
  6885. extern __inline __m128
  6886. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6887. _mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  6888. {
  6889. return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
  6890. (__v4sf) __B,
  6891. (__v4sf) __W,
  6892. (__mmask8) __U);
  6893. }
  6894. extern __inline __m128
  6895. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6896. _mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
  6897. {
  6898. return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
  6899. (__v4sf) __B,
  6900. (__v4sf)
  6901. _mm_setzero_ps (),
  6902. (__mmask8) __U);
  6903. }
  6904. extern __inline __m128d
  6905. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6906. _mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  6907. {
  6908. return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
  6909. (__v2df) __B,
  6910. (__v2df) __W,
  6911. (__mmask8) __U);
  6912. }
  6913. extern __inline __m128d
  6914. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6915. _mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
  6916. {
  6917. return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
  6918. (__v2df) __B,
  6919. (__v2df)
  6920. _mm_setzero_pd (),
  6921. (__mmask8) __U);
  6922. }
  6923. extern __inline __m128d
  6924. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6925. _mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  6926. {
  6927. return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
  6928. (__v2df) __B,
  6929. (__v2df) __W,
  6930. (__mmask8) __U);
  6931. }
  6932. extern __inline __m128d
  6933. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6934. _mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
  6935. {
  6936. return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
  6937. (__v2df) __B,
  6938. (__v2df)
  6939. _mm_setzero_pd (),
  6940. (__mmask8) __U);
  6941. }
  6942. extern __inline __m128d
  6943. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6944. _mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  6945. {
  6946. return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
  6947. (__v2df) __B,
  6948. (__v2df) __W,
  6949. (__mmask8) __U);
  6950. }
  6951. extern __inline __m128d
  6952. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6953. _mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
  6954. {
  6955. return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
  6956. (__v2df) __B,
  6957. (__v2df)
  6958. _mm_setzero_pd (),
  6959. (__mmask8) __U);
  6960. }
  6961. extern __inline __m256
  6962. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6963. _mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  6964. {
  6965. return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
  6966. (__v8sf) __B,
  6967. (__v8sf) __W,
  6968. (__mmask8) __U);
  6969. }
  6970. extern __inline __m256
  6971. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6972. _mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
  6973. {
  6974. return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
  6975. (__v8sf) __B,
  6976. (__v8sf)
  6977. _mm256_setzero_ps (),
  6978. (__mmask8) __U);
  6979. }
  6980. extern __inline __m256d
  6981. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6982. _mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
  6983. __m256d __B)
  6984. {
  6985. return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
  6986. (__v4df) __B,
  6987. (__v4df) __W,
  6988. (__mmask8) __U);
  6989. }
  6990. extern __inline __m256d
  6991. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6992. _mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
  6993. {
  6994. return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
  6995. (__v4df) __B,
  6996. (__v4df)
  6997. _mm256_setzero_pd (),
  6998. (__mmask8) __U);
  6999. }
  7000. extern __inline __m256i
  7001. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7002. _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
  7003. {
  7004. return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
  7005. (__v4di) __B,
  7006. (__v4di)
  7007. _mm256_setzero_si256 (),
  7008. __M);
  7009. }
  7010. extern __inline __m256i
  7011. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7012. _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
  7013. __m256i __B)
  7014. {
  7015. return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
  7016. (__v4di) __B,
  7017. (__v4di) __W, __M);
  7018. }
  7019. extern __inline __m256i
  7020. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7021. _mm256_min_epi64 (__m256i __A, __m256i __B)
  7022. {
  7023. return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
  7024. (__v4di) __B,
  7025. (__v4di)
  7026. _mm256_setzero_si256 (),
  7027. (__mmask8) -1);
  7028. }
  7029. extern __inline __m256i
  7030. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7031. _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
  7032. __m256i __B)
  7033. {
  7034. return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
  7035. (__v4di) __B,
  7036. (__v4di) __W, __M);
  7037. }
  7038. extern __inline __m256i
  7039. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7040. _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
  7041. {
  7042. return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
  7043. (__v4di) __B,
  7044. (__v4di)
  7045. _mm256_setzero_si256 (),
  7046. __M);
  7047. }
  7048. extern __inline __m256i
  7049. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7050. _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
  7051. {
  7052. return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
  7053. (__v4di) __B,
  7054. (__v4di)
  7055. _mm256_setzero_si256 (),
  7056. __M);
  7057. }
  7058. extern __inline __m256i
  7059. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7060. _mm256_max_epi64 (__m256i __A, __m256i __B)
  7061. {
  7062. return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
  7063. (__v4di) __B,
  7064. (__v4di)
  7065. _mm256_setzero_si256 (),
  7066. (__mmask8) -1);
  7067. }
  7068. extern __inline __m256i
  7069. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7070. _mm256_max_epu64 (__m256i __A, __m256i __B)
  7071. {
  7072. return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
  7073. (__v4di) __B,
  7074. (__v4di)
  7075. _mm256_setzero_si256 (),
  7076. (__mmask8) -1);
  7077. }
  7078. extern __inline __m256i
  7079. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7080. _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
  7081. __m256i __B)
  7082. {
  7083. return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
  7084. (__v4di) __B,
  7085. (__v4di) __W, __M);
  7086. }
  7087. extern __inline __m256i
  7088. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7089. _mm256_min_epu64 (__m256i __A, __m256i __B)
  7090. {
  7091. return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
  7092. (__v4di) __B,
  7093. (__v4di)
  7094. _mm256_setzero_si256 (),
  7095. (__mmask8) -1);
  7096. }
  7097. extern __inline __m256i
  7098. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7099. _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
  7100. __m256i __B)
  7101. {
  7102. return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
  7103. (__v4di) __B,
  7104. (__v4di) __W, __M);
  7105. }
  7106. extern __inline __m256i
  7107. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7108. _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
  7109. {
  7110. return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
  7111. (__v4di) __B,
  7112. (__v4di)
  7113. _mm256_setzero_si256 (),
  7114. __M);
  7115. }
  7116. extern __inline __m256i
  7117. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7118. _mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
  7119. {
  7120. return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
  7121. (__v8si) __B,
  7122. (__v8si)
  7123. _mm256_setzero_si256 (),
  7124. __M);
  7125. }
  7126. extern __inline __m256i
  7127. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7128. _mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
  7129. __m256i __B)
  7130. {
  7131. return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
  7132. (__v8si) __B,
  7133. (__v8si) __W, __M);
  7134. }
  7135. extern __inline __m256i
  7136. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7137. _mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
  7138. {
  7139. return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
  7140. (__v8si) __B,
  7141. (__v8si)
  7142. _mm256_setzero_si256 (),
  7143. __M);
  7144. }
  7145. extern __inline __m256i
  7146. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7147. _mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
  7148. __m256i __B)
  7149. {
  7150. return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
  7151. (__v8si) __B,
  7152. (__v8si) __W, __M);
  7153. }
  7154. extern __inline __m256i
  7155. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7156. _mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
  7157. {
  7158. return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
  7159. (__v8si) __B,
  7160. (__v8si)
  7161. _mm256_setzero_si256 (),
  7162. __M);
  7163. }
  7164. extern __inline __m256i
  7165. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7166. _mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
  7167. __m256i __B)
  7168. {
  7169. return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
  7170. (__v8si) __B,
  7171. (__v8si) __W, __M);
  7172. }
  7173. extern __inline __m256i
  7174. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7175. _mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
  7176. {
  7177. return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
  7178. (__v8si) __B,
  7179. (__v8si)
  7180. _mm256_setzero_si256 (),
  7181. __M);
  7182. }
  7183. extern __inline __m256i
  7184. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7185. _mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
  7186. __m256i __B)
  7187. {
  7188. return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
  7189. (__v8si) __B,
  7190. (__v8si) __W, __M);
  7191. }
  7192. extern __inline __m128i
  7193. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7194. _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
  7195. {
  7196. return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
  7197. (__v2di) __B,
  7198. (__v2di)
  7199. _mm_setzero_si128 (),
  7200. __M);
  7201. }
  7202. extern __inline __m128i
  7203. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7204. _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
  7205. __m128i __B)
  7206. {
  7207. return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
  7208. (__v2di) __B,
  7209. (__v2di) __W, __M);
  7210. }
  7211. extern __inline __m128i
  7212. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7213. _mm_min_epi64 (__m128i __A, __m128i __B)
  7214. {
  7215. return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
  7216. (__v2di) __B,
  7217. (__v2di)
  7218. _mm_setzero_si128 (),
  7219. (__mmask8) -1);
  7220. }
  7221. extern __inline __m128i
  7222. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7223. _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
  7224. __m128i __B)
  7225. {
  7226. return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
  7227. (__v2di) __B,
  7228. (__v2di) __W, __M);
  7229. }
  7230. extern __inline __m128i
  7231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7232. _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
  7233. {
  7234. return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
  7235. (__v2di) __B,
  7236. (__v2di)
  7237. _mm_setzero_si128 (),
  7238. __M);
  7239. }
  7240. extern __inline __m128i
  7241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7242. _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
  7243. {
  7244. return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
  7245. (__v2di) __B,
  7246. (__v2di)
  7247. _mm_setzero_si128 (),
  7248. __M);
  7249. }
  7250. extern __inline __m128i
  7251. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7252. _mm_max_epi64 (__m128i __A, __m128i __B)
  7253. {
  7254. return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
  7255. (__v2di) __B,
  7256. (__v2di)
  7257. _mm_setzero_si128 (),
  7258. (__mmask8) -1);
  7259. }
  7260. extern __inline __m128i
  7261. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7262. _mm_max_epu64 (__m128i __A, __m128i __B)
  7263. {
  7264. return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
  7265. (__v2di) __B,
  7266. (__v2di)
  7267. _mm_setzero_si128 (),
  7268. (__mmask8) -1);
  7269. }
  7270. extern __inline __m128i
  7271. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7272. _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
  7273. __m128i __B)
  7274. {
  7275. return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
  7276. (__v2di) __B,
  7277. (__v2di) __W, __M);
  7278. }
  7279. extern __inline __m128i
  7280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7281. _mm_min_epu64 (__m128i __A, __m128i __B)
  7282. {
  7283. return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
  7284. (__v2di) __B,
  7285. (__v2di)
  7286. _mm_setzero_si128 (),
  7287. (__mmask8) -1);
  7288. }
  7289. extern __inline __m128i
  7290. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7291. _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
  7292. __m128i __B)
  7293. {
  7294. return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
  7295. (__v2di) __B,
  7296. (__v2di) __W, __M);
  7297. }
  7298. extern __inline __m128i
  7299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7300. _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
  7301. {
  7302. return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
  7303. (__v2di) __B,
  7304. (__v2di)
  7305. _mm_setzero_si128 (),
  7306. __M);
  7307. }
  7308. extern __inline __m128i
  7309. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7310. _mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
  7311. {
  7312. return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
  7313. (__v4si) __B,
  7314. (__v4si)
  7315. _mm_setzero_si128 (),
  7316. __M);
  7317. }
  7318. extern __inline __m128i
  7319. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7320. _mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
  7321. __m128i __B)
  7322. {
  7323. return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
  7324. (__v4si) __B,
  7325. (__v4si) __W, __M);
  7326. }
  7327. extern __inline __m128i
  7328. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7329. _mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
  7330. {
  7331. return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
  7332. (__v4si) __B,
  7333. (__v4si)
  7334. _mm_setzero_si128 (),
  7335. __M);
  7336. }
  7337. extern __inline __m128i
  7338. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7339. _mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
  7340. __m128i __B)
  7341. {
  7342. return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
  7343. (__v4si) __B,
  7344. (__v4si) __W, __M);
  7345. }
  7346. extern __inline __m128i
  7347. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7348. _mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
  7349. {
  7350. return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
  7351. (__v4si) __B,
  7352. (__v4si)
  7353. _mm_setzero_si128 (),
  7354. __M);
  7355. }
  7356. extern __inline __m128i
  7357. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7358. _mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
  7359. __m128i __B)
  7360. {
  7361. return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
  7362. (__v4si) __B,
  7363. (__v4si) __W, __M);
  7364. }
  7365. extern __inline __m128i
  7366. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7367. _mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
  7368. {
  7369. return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
  7370. (__v4si) __B,
  7371. (__v4si)
  7372. _mm_setzero_si128 (),
  7373. __M);
  7374. }
  7375. extern __inline __m128i
  7376. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7377. _mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
  7378. __m128i __B)
  7379. {
  7380. return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
  7381. (__v4si) __B,
  7382. (__v4si) __W, __M);
  7383. }
  7384. #ifndef __AVX512CD__
  7385. #pragma GCC push_options
  7386. #pragma GCC target("avx512vl,avx512cd")
  7387. #define __DISABLE_AVX512VLCD__
  7388. #endif
  7389. extern __inline __m128i
  7390. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7391. _mm_broadcastmb_epi64 (__mmask8 __A)
  7392. {
  7393. return (__m128i) __builtin_ia32_broadcastmb128 (__A);
  7394. }
  7395. extern __inline __m256i
  7396. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7397. _mm256_broadcastmb_epi64 (__mmask8 __A)
  7398. {
  7399. return (__m256i) __builtin_ia32_broadcastmb256 (__A);
  7400. }
  7401. extern __inline __m128i
  7402. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7403. _mm_broadcastmw_epi32 (__mmask16 __A)
  7404. {
  7405. return (__m128i) __builtin_ia32_broadcastmw128 (__A);
  7406. }
  7407. extern __inline __m256i
  7408. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7409. _mm256_broadcastmw_epi32 (__mmask16 __A)
  7410. {
  7411. return (__m256i) __builtin_ia32_broadcastmw256 (__A);
  7412. }
  7413. extern __inline __m256i
  7414. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7415. _mm256_lzcnt_epi32 (__m256i __A)
  7416. {
  7417. return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
  7418. (__v8si)
  7419. _mm256_setzero_si256 (),
  7420. (__mmask8) -1);
  7421. }
  7422. extern __inline __m256i
  7423. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7424. _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  7425. {
  7426. return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
  7427. (__v8si) __W,
  7428. (__mmask8) __U);
  7429. }
  7430. extern __inline __m256i
  7431. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7432. _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
  7433. {
  7434. return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
  7435. (__v8si)
  7436. _mm256_setzero_si256 (),
  7437. (__mmask8) __U);
  7438. }
  7439. extern __inline __m256i
  7440. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7441. _mm256_lzcnt_epi64 (__m256i __A)
  7442. {
  7443. return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
  7444. (__v4di)
  7445. _mm256_setzero_si256 (),
  7446. (__mmask8) -1);
  7447. }
  7448. extern __inline __m256i
  7449. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7450. _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  7451. {
  7452. return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
  7453. (__v4di) __W,
  7454. (__mmask8) __U);
  7455. }
  7456. extern __inline __m256i
  7457. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7458. _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
  7459. {
  7460. return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
  7461. (__v4di)
  7462. _mm256_setzero_si256 (),
  7463. (__mmask8) __U);
  7464. }
  7465. extern __inline __m256i
  7466. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7467. _mm256_conflict_epi64 (__m256i __A)
  7468. {
  7469. return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
  7470. (__v4di)
  7471. _mm256_setzero_si256 (),
  7472. (__mmask8) -1);
  7473. }
  7474. extern __inline __m256i
  7475. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7476. _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  7477. {
  7478. return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
  7479. (__v4di) __W,
  7480. (__mmask8)
  7481. __U);
  7482. }
  7483. extern __inline __m256i
  7484. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7485. _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
  7486. {
  7487. return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
  7488. (__v4di)
  7489. _mm256_setzero_si256 (),
  7490. (__mmask8)
  7491. __U);
  7492. }
  7493. extern __inline __m256i
  7494. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7495. _mm256_conflict_epi32 (__m256i __A)
  7496. {
  7497. return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
  7498. (__v8si)
  7499. _mm256_setzero_si256 (),
  7500. (__mmask8) -1);
  7501. }
  7502. extern __inline __m256i
  7503. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7504. _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  7505. {
  7506. return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
  7507. (__v8si) __W,
  7508. (__mmask8)
  7509. __U);
  7510. }
  7511. extern __inline __m256i
  7512. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7513. _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
  7514. {
  7515. return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
  7516. (__v8si)
  7517. _mm256_setzero_si256 (),
  7518. (__mmask8)
  7519. __U);
  7520. }
  7521. extern __inline __m128i
  7522. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7523. _mm_lzcnt_epi32 (__m128i __A)
  7524. {
  7525. return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
  7526. (__v4si)
  7527. _mm_setzero_si128 (),
  7528. (__mmask8) -1);
  7529. }
  7530. extern __inline __m128i
  7531. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7532. _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  7533. {
  7534. return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
  7535. (__v4si) __W,
  7536. (__mmask8) __U);
  7537. }
  7538. extern __inline __m128i
  7539. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7540. _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
  7541. {
  7542. return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
  7543. (__v4si)
  7544. _mm_setzero_si128 (),
  7545. (__mmask8) __U);
  7546. }
  7547. extern __inline __m128i
  7548. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7549. _mm_lzcnt_epi64 (__m128i __A)
  7550. {
  7551. return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
  7552. (__v2di)
  7553. _mm_setzero_si128 (),
  7554. (__mmask8) -1);
  7555. }
  7556. extern __inline __m128i
  7557. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7558. _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  7559. {
  7560. return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
  7561. (__v2di) __W,
  7562. (__mmask8) __U);
  7563. }
  7564. extern __inline __m128i
  7565. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7566. _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
  7567. {
  7568. return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
  7569. (__v2di)
  7570. _mm_setzero_si128 (),
  7571. (__mmask8) __U);
  7572. }
  7573. extern __inline __m128i
  7574. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7575. _mm_conflict_epi64 (__m128i __A)
  7576. {
  7577. return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
  7578. (__v2di)
  7579. _mm_setzero_si128 (),
  7580. (__mmask8) -1);
  7581. }
  7582. extern __inline __m128i
  7583. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7584. _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  7585. {
  7586. return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
  7587. (__v2di) __W,
  7588. (__mmask8)
  7589. __U);
  7590. }
  7591. extern __inline __m128i
  7592. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7593. _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
  7594. {
  7595. return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
  7596. (__v2di)
  7597. _mm_setzero_si128 (),
  7598. (__mmask8)
  7599. __U);
  7600. }
  7601. extern __inline __m128i
  7602. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7603. _mm_conflict_epi32 (__m128i __A)
  7604. {
  7605. return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
  7606. (__v4si)
  7607. _mm_setzero_si128 (),
  7608. (__mmask8) -1);
  7609. }
  7610. extern __inline __m128i
  7611. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7612. _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  7613. {
  7614. return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
  7615. (__v4si) __W,
  7616. (__mmask8)
  7617. __U);
  7618. }
  7619. extern __inline __m128i
  7620. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7621. _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
  7622. {
  7623. return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
  7624. (__v4si)
  7625. _mm_setzero_si128 (),
  7626. (__mmask8)
  7627. __U);
  7628. }
  7629. #ifdef __DISABLE_AVX512VLCD__
  7630. #pragma GCC pop_options
  7631. #endif
  7632. extern __inline __m256d
  7633. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7634. _mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
  7635. __m256d __B)
  7636. {
  7637. return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
  7638. (__v4df) __B,
  7639. (__v4df) __W,
  7640. (__mmask8) __U);
  7641. }
  7642. extern __inline __m256d
  7643. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7644. _mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
  7645. {
  7646. return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
  7647. (__v4df) __B,
  7648. (__v4df)
  7649. _mm256_setzero_pd (),
  7650. (__mmask8) __U);
  7651. }
  7652. extern __inline __m128d
  7653. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7654. _mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
  7655. __m128d __B)
  7656. {
  7657. return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
  7658. (__v2df) __B,
  7659. (__v2df) __W,
  7660. (__mmask8) __U);
  7661. }
  7662. extern __inline __m128d
  7663. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7664. _mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
  7665. {
  7666. return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
  7667. (__v2df) __B,
  7668. (__v2df)
  7669. _mm_setzero_pd (),
  7670. (__mmask8) __U);
  7671. }
  7672. extern __inline __m256
  7673. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7674. _mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
  7675. __m256 __B)
  7676. {
  7677. return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
  7678. (__v8sf) __B,
  7679. (__v8sf) __W,
  7680. (__mmask8) __U);
  7681. }
  7682. extern __inline __m256d
  7683. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7684. _mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
  7685. __m256d __B)
  7686. {
  7687. return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
  7688. (__v4df) __B,
  7689. (__v4df) __W,
  7690. (__mmask8) __U);
  7691. }
  7692. extern __inline __m256d
  7693. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7694. _mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
  7695. {
  7696. return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
  7697. (__v4df) __B,
  7698. (__v4df)
  7699. _mm256_setzero_pd (),
  7700. (__mmask8) __U);
  7701. }
  7702. extern __inline __m128d
  7703. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7704. _mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
  7705. __m128d __B)
  7706. {
  7707. return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
  7708. (__v2df) __B,
  7709. (__v2df) __W,
  7710. (__mmask8) __U);
  7711. }
  7712. extern __inline __m128d
  7713. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7714. _mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
  7715. {
  7716. return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
  7717. (__v2df) __B,
  7718. (__v2df)
  7719. _mm_setzero_pd (),
  7720. (__mmask8) __U);
  7721. }
  7722. extern __inline __m256
  7723. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7724. _mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
  7725. __m256 __B)
  7726. {
  7727. return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
  7728. (__v8sf) __B,
  7729. (__v8sf) __W,
  7730. (__mmask8) __U);
  7731. }
  7732. extern __inline __m256
  7733. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7734. _mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
  7735. {
  7736. return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
  7737. (__v8sf) __B,
  7738. (__v8sf)
  7739. _mm256_setzero_ps (),
  7740. (__mmask8) __U);
  7741. }
  7742. extern __inline __m128
  7743. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7744. _mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  7745. {
  7746. return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
  7747. (__v4sf) __B,
  7748. (__v4sf) __W,
  7749. (__mmask8) __U);
  7750. }
  7751. extern __inline __m128
  7752. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7753. _mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
  7754. {
  7755. return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
  7756. (__v4sf) __B,
  7757. (__v4sf)
  7758. _mm_setzero_ps (),
  7759. (__mmask8) __U);
  7760. }
  7761. extern __inline __m128
  7762. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7763. _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
  7764. {
  7765. return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
  7766. (__v4sf) __W,
  7767. (__mmask8) __U);
  7768. }
  7769. extern __inline __m128
  7770. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7771. _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
  7772. {
  7773. return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
  7774. (__v4sf)
  7775. _mm_setzero_ps (),
  7776. (__mmask8) __U);
  7777. }
  7778. extern __inline __m256
  7779. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7780. _mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
  7781. {
  7782. return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
  7783. (__v8sf) __B,
  7784. (__v8sf)
  7785. _mm256_setzero_ps (),
  7786. (__mmask8) __U);
  7787. }
  7788. extern __inline __m256
  7789. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7790. _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
  7791. {
  7792. return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
  7793. (__v8sf) __W,
  7794. (__mmask8) __U);
  7795. }
  7796. extern __inline __m256
  7797. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7798. _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
  7799. {
  7800. return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
  7801. (__v8sf)
  7802. _mm256_setzero_ps (),
  7803. (__mmask8) __U);
  7804. }
  7805. extern __inline __m128
  7806. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7807. _mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  7808. {
  7809. return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
  7810. (__v4sf) __B,
  7811. (__v4sf) __W,
  7812. (__mmask8) __U);
  7813. }
  7814. extern __inline __m128
  7815. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7816. _mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
  7817. {
  7818. return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
  7819. (__v4sf) __B,
  7820. (__v4sf)
  7821. _mm_setzero_ps (),
  7822. (__mmask8) __U);
  7823. }
  7824. extern __inline __m256i
  7825. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7826. _mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  7827. __m128i __B)
  7828. {
  7829. return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
  7830. (__v4si) __B,
  7831. (__v8si) __W,
  7832. (__mmask8) __U);
  7833. }
  7834. extern __inline __m256i
  7835. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7836. _mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
  7837. {
  7838. return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
  7839. (__v4si) __B,
  7840. (__v8si)
  7841. _mm256_setzero_si256 (),
  7842. (__mmask8) __U);
  7843. }
  7844. extern __inline __m128i
  7845. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7846. _mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  7847. __m128i __B)
  7848. {
  7849. return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
  7850. (__v4si) __B,
  7851. (__v4si) __W,
  7852. (__mmask8) __U);
  7853. }
  7854. extern __inline __m128i
  7855. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7856. _mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  7857. {
  7858. return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
  7859. (__v4si) __B,
  7860. (__v4si)
  7861. _mm_setzero_si128 (),
  7862. (__mmask8) __U);
  7863. }
  7864. extern __inline __m256i
  7865. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7866. _mm256_sra_epi64 (__m256i __A, __m128i __B)
  7867. {
  7868. return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
  7869. (__v2di) __B,
  7870. (__v4di)
  7871. _mm256_setzero_si256 (),
  7872. (__mmask8) -1);
  7873. }
  7874. extern __inline __m256i
  7875. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7876. _mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  7877. __m128i __B)
  7878. {
  7879. return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
  7880. (__v2di) __B,
  7881. (__v4di) __W,
  7882. (__mmask8) __U);
  7883. }
  7884. extern __inline __m256i
  7885. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7886. _mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
  7887. {
  7888. return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
  7889. (__v2di) __B,
  7890. (__v4di)
  7891. _mm256_setzero_si256 (),
  7892. (__mmask8) __U);
  7893. }
  7894. extern __inline __m128i
  7895. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7896. _mm_sra_epi64 (__m128i __A, __m128i __B)
  7897. {
  7898. return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
  7899. (__v2di) __B,
  7900. (__v2di)
  7901. _mm_setzero_si128 (),
  7902. (__mmask8) -1);
  7903. }
  7904. extern __inline __m128i
  7905. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7906. _mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  7907. __m128i __B)
  7908. {
  7909. return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
  7910. (__v2di) __B,
  7911. (__v2di) __W,
  7912. (__mmask8) __U);
  7913. }
  7914. extern __inline __m128i
  7915. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7916. _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  7917. {
  7918. return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
  7919. (__v2di) __B,
  7920. (__v2di)
  7921. _mm_setzero_si128 (),
  7922. (__mmask8) __U);
  7923. }
  7924. extern __inline __m128i
  7925. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7926. _mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  7927. __m128i __B)
  7928. {
  7929. return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
  7930. (__v4si) __B,
  7931. (__v4si) __W,
  7932. (__mmask8) __U);
  7933. }
  7934. extern __inline __m128i
  7935. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7936. _mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  7937. {
  7938. return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
  7939. (__v4si) __B,
  7940. (__v4si)
  7941. _mm_setzero_si128 (),
  7942. (__mmask8) __U);
  7943. }
  7944. extern __inline __m128i
  7945. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7946. _mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  7947. __m128i __B)
  7948. {
  7949. return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
  7950. (__v2di) __B,
  7951. (__v2di) __W,
  7952. (__mmask8) __U);
  7953. }
  7954. extern __inline __m128i
  7955. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7956. _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  7957. {
  7958. return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
  7959. (__v2di) __B,
  7960. (__v2di)
  7961. _mm_setzero_si128 (),
  7962. (__mmask8) __U);
  7963. }
  7964. extern __inline __m256i
  7965. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7966. _mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  7967. __m128i __B)
  7968. {
  7969. return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
  7970. (__v4si) __B,
  7971. (__v8si) __W,
  7972. (__mmask8) __U);
  7973. }
  7974. extern __inline __m256i
  7975. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7976. _mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
  7977. {
  7978. return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
  7979. (__v4si) __B,
  7980. (__v8si)
  7981. _mm256_setzero_si256 (),
  7982. (__mmask8) __U);
  7983. }
  7984. extern __inline __m256i
  7985. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7986. _mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  7987. __m128i __B)
  7988. {
  7989. return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
  7990. (__v2di) __B,
  7991. (__v4di) __W,
  7992. (__mmask8) __U);
  7993. }
  7994. extern __inline __m256i
  7995. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7996. _mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
  7997. {
  7998. return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
  7999. (__v2di) __B,
  8000. (__v4di)
  8001. _mm256_setzero_si256 (),
  8002. (__mmask8) __U);
  8003. }
  8004. extern __inline __m256
  8005. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8006. _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
  8007. __m256 __Y)
  8008. {
  8009. return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
  8010. (__v8si) __X,
  8011. (__v8sf) __W,
  8012. (__mmask8) __U);
  8013. }
  8014. extern __inline __m256
  8015. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8016. _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
  8017. {
  8018. return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
  8019. (__v8si) __X,
  8020. (__v8sf)
  8021. _mm256_setzero_ps (),
  8022. (__mmask8) __U);
  8023. }
  8024. extern __inline __m256d
  8025. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8026. _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
  8027. {
  8028. return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
  8029. (__v4di) __X,
  8030. (__v4df)
  8031. _mm256_setzero_pd (),
  8032. (__mmask8) -1);
  8033. }
  8034. extern __inline __m256d
  8035. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8036. _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
  8037. __m256d __Y)
  8038. {
  8039. return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
  8040. (__v4di) __X,
  8041. (__v4df) __W,
  8042. (__mmask8) __U);
  8043. }
  8044. extern __inline __m256d
  8045. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8046. _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
  8047. {
  8048. return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
  8049. (__v4di) __X,
  8050. (__v4df)
  8051. _mm256_setzero_pd (),
  8052. (__mmask8) __U);
  8053. }
  8054. extern __inline __m256d
  8055. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8056. _mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
  8057. __m256i __C)
  8058. {
  8059. return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
  8060. (__v4di) __C,
  8061. (__v4df) __W,
  8062. (__mmask8)
  8063. __U);
  8064. }
  8065. extern __inline __m256d
  8066. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8067. _mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
  8068. {
  8069. return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
  8070. (__v4di) __C,
  8071. (__v4df)
  8072. _mm256_setzero_pd (),
  8073. (__mmask8)
  8074. __U);
  8075. }
  8076. extern __inline __m256
  8077. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8078. _mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
  8079. __m256i __C)
  8080. {
  8081. return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
  8082. (__v8si) __C,
  8083. (__v8sf) __W,
  8084. (__mmask8) __U);
  8085. }
  8086. extern __inline __m256
  8087. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8088. _mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
  8089. {
  8090. return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
  8091. (__v8si) __C,
  8092. (__v8sf)
  8093. _mm256_setzero_ps (),
  8094. (__mmask8) __U);
  8095. }
  8096. extern __inline __m128d
  8097. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8098. _mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
  8099. __m128i __C)
  8100. {
  8101. return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
  8102. (__v2di) __C,
  8103. (__v2df) __W,
  8104. (__mmask8) __U);
  8105. }
  8106. extern __inline __m128d
  8107. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8108. _mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
  8109. {
  8110. return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
  8111. (__v2di) __C,
  8112. (__v2df)
  8113. _mm_setzero_pd (),
  8114. (__mmask8) __U);
  8115. }
  8116. extern __inline __m128
  8117. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8118. _mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
  8119. __m128i __C)
  8120. {
  8121. return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
  8122. (__v4si) __C,
  8123. (__v4sf) __W,
  8124. (__mmask8) __U);
  8125. }
  8126. extern __inline __m128
  8127. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8128. _mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
  8129. {
  8130. return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
  8131. (__v4si) __C,
  8132. (__v4sf)
  8133. _mm_setzero_ps (),
  8134. (__mmask8) __U);
  8135. }
  8136. extern __inline __m256i
  8137. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8138. _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
  8139. {
  8140. return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
  8141. (__v8si) __B,
  8142. (__v8si)
  8143. _mm256_setzero_si256 (),
  8144. __M);
  8145. }
  8146. extern __inline __m256i
  8147. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8148. _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
  8149. {
  8150. return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
  8151. (__v4di) __X,
  8152. (__v4di)
  8153. _mm256_setzero_si256 (),
  8154. __M);
  8155. }
  8156. extern __inline __m256i
  8157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8158. _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
  8159. __m256i __B)
  8160. {
  8161. return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
  8162. (__v8si) __B,
  8163. (__v8si) __W, __M);
  8164. }
  8165. extern __inline __m128i
  8166. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8167. _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
  8168. {
  8169. return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
  8170. (__v4si) __B,
  8171. (__v4si)
  8172. _mm_setzero_si128 (),
  8173. __M);
  8174. }
  8175. extern __inline __m128i
  8176. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8177. _mm_mask_mullo_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
  8178. __m128i __B)
  8179. {
  8180. return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
  8181. (__v4si) __B,
  8182. (__v4si) __W, __M);
  8183. }
  8184. extern __inline __m256i
  8185. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8186. _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
  8187. __m256i __Y)
  8188. {
  8189. return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
  8190. (__v8si) __Y,
  8191. (__v4di) __W, __M);
  8192. }
  8193. extern __inline __m256i
  8194. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8195. _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
  8196. {
  8197. return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
  8198. (__v8si) __Y,
  8199. (__v4di)
  8200. _mm256_setzero_si256 (),
  8201. __M);
  8202. }
  8203. extern __inline __m128i
  8204. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8205. _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
  8206. __m128i __Y)
  8207. {
  8208. return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
  8209. (__v4si) __Y,
  8210. (__v2di) __W, __M);
  8211. }
  8212. extern __inline __m128i
  8213. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8214. _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
  8215. {
  8216. return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
  8217. (__v4si) __Y,
  8218. (__v2di)
  8219. _mm_setzero_si128 (),
  8220. __M);
  8221. }
  8222. extern __inline __m256i
  8223. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8224. _mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
  8225. {
  8226. return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
  8227. (__v4di) __X,
  8228. (__v4di)
  8229. _mm256_setzero_si256 (),
  8230. (__mmask8) -1);
  8231. }
  8232. extern __inline __m256i
  8233. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8234. _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
  8235. __m256i __Y)
  8236. {
  8237. return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
  8238. (__v4di) __X,
  8239. (__v4di) __W,
  8240. __M);
  8241. }
  8242. extern __inline __m256i
  8243. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8244. _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
  8245. __m256i __Y)
  8246. {
  8247. return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
  8248. (__v8si) __Y,
  8249. (__v4di) __W, __M);
  8250. }
  8251. extern __inline __m256i
  8252. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8253. _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
  8254. {
  8255. return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
  8256. (__v8si) __X,
  8257. (__v8si)
  8258. _mm256_setzero_si256 (),
  8259. __M);
  8260. }
  8261. extern __inline __m256i
  8262. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8263. _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
  8264. {
  8265. return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
  8266. (__v8si) __Y,
  8267. (__v4di)
  8268. _mm256_setzero_si256 (),
  8269. __M);
  8270. }
  8271. extern __inline __m128i
  8272. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8273. _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
  8274. __m128i __Y)
  8275. {
  8276. return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
  8277. (__v4si) __Y,
  8278. (__v2di) __W, __M);
  8279. }
  8280. extern __inline __m128i
  8281. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8282. _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
  8283. {
  8284. return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
  8285. (__v4si) __Y,
  8286. (__v2di)
  8287. _mm_setzero_si128 (),
  8288. __M);
  8289. }
  8290. extern __inline __m256i
  8291. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8292. _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
  8293. {
  8294. return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
  8295. (__v8si) __X,
  8296. (__v8si)
  8297. _mm256_setzero_si256 (),
  8298. (__mmask8) -1);
  8299. }
  8300. extern __inline __m256i
  8301. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8302. _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
  8303. __m256i __Y)
  8304. {
  8305. return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
  8306. (__v8si) __X,
  8307. (__v8si) __W,
  8308. __M);
  8309. }
  8310. extern __inline __mmask8
  8311. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8312. _mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8313. {
  8314. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8315. (__v8si) __Y, 4,
  8316. (__mmask8) __M);
  8317. }
  8318. extern __inline __mmask8
  8319. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8320. _mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
  8321. {
  8322. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8323. (__v8si) __Y, 4,
  8324. (__mmask8) -1);
  8325. }
  8326. extern __inline __mmask8
  8327. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8328. _mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8329. {
  8330. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8331. (__v8si) __Y, 1,
  8332. (__mmask8) __M);
  8333. }
  8334. extern __inline __mmask8
  8335. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8336. _mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
  8337. {
  8338. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8339. (__v8si) __Y, 1,
  8340. (__mmask8) -1);
  8341. }
  8342. extern __inline __mmask8
  8343. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8344. _mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8345. {
  8346. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8347. (__v8si) __Y, 5,
  8348. (__mmask8) __M);
  8349. }
  8350. extern __inline __mmask8
  8351. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8352. _mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
  8353. {
  8354. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8355. (__v8si) __Y, 5,
  8356. (__mmask8) -1);
  8357. }
  8358. extern __inline __mmask8
  8359. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8360. _mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8361. {
  8362. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8363. (__v8si) __Y, 2,
  8364. (__mmask8) __M);
  8365. }
  8366. extern __inline __mmask8
  8367. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8368. _mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
  8369. {
  8370. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  8371. (__v8si) __Y, 2,
  8372. (__mmask8) -1);
  8373. }
  8374. extern __inline __mmask8
  8375. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8376. _mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8377. {
  8378. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8379. (__v4di) __Y, 4,
  8380. (__mmask8) __M);
  8381. }
  8382. extern __inline __mmask8
  8383. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8384. _mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
  8385. {
  8386. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8387. (__v4di) __Y, 4,
  8388. (__mmask8) -1);
  8389. }
  8390. extern __inline __mmask8
  8391. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8392. _mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8393. {
  8394. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8395. (__v4di) __Y, 1,
  8396. (__mmask8) __M);
  8397. }
  8398. extern __inline __mmask8
  8399. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8400. _mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
  8401. {
  8402. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8403. (__v4di) __Y, 1,
  8404. (__mmask8) -1);
  8405. }
  8406. extern __inline __mmask8
  8407. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8408. _mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8409. {
  8410. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8411. (__v4di) __Y, 5,
  8412. (__mmask8) __M);
  8413. }
  8414. extern __inline __mmask8
  8415. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8416. _mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
  8417. {
  8418. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8419. (__v4di) __Y, 5,
  8420. (__mmask8) -1);
  8421. }
  8422. extern __inline __mmask8
  8423. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8424. _mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8425. {
  8426. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8427. (__v4di) __Y, 2,
  8428. (__mmask8) __M);
  8429. }
  8430. extern __inline __mmask8
  8431. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8432. _mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
  8433. {
  8434. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  8435. (__v4di) __Y, 2,
  8436. (__mmask8) -1);
  8437. }
  8438. extern __inline __mmask8
  8439. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8440. _mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8441. {
  8442. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8443. (__v8si) __Y, 4,
  8444. (__mmask8) __M);
  8445. }
  8446. extern __inline __mmask8
  8447. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8448. _mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
  8449. {
  8450. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8451. (__v8si) __Y, 4,
  8452. (__mmask8) -1);
  8453. }
  8454. extern __inline __mmask8
  8455. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8456. _mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8457. {
  8458. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8459. (__v8si) __Y, 1,
  8460. (__mmask8) __M);
  8461. }
  8462. extern __inline __mmask8
  8463. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8464. _mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
  8465. {
  8466. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8467. (__v8si) __Y, 1,
  8468. (__mmask8) -1);
  8469. }
  8470. extern __inline __mmask8
  8471. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8472. _mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8473. {
  8474. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8475. (__v8si) __Y, 5,
  8476. (__mmask8) __M);
  8477. }
  8478. extern __inline __mmask8
  8479. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8480. _mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
  8481. {
  8482. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8483. (__v8si) __Y, 5,
  8484. (__mmask8) -1);
  8485. }
  8486. extern __inline __mmask8
  8487. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8488. _mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8489. {
  8490. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8491. (__v8si) __Y, 2,
  8492. (__mmask8) __M);
  8493. }
  8494. extern __inline __mmask8
  8495. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8496. _mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
  8497. {
  8498. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  8499. (__v8si) __Y, 2,
  8500. (__mmask8) -1);
  8501. }
  8502. extern __inline __mmask8
  8503. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8504. _mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8505. {
  8506. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8507. (__v4di) __Y, 4,
  8508. (__mmask8) __M);
  8509. }
  8510. extern __inline __mmask8
  8511. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8512. _mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
  8513. {
  8514. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8515. (__v4di) __Y, 4,
  8516. (__mmask8) -1);
  8517. }
  8518. extern __inline __mmask8
  8519. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8520. _mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8521. {
  8522. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8523. (__v4di) __Y, 1,
  8524. (__mmask8) __M);
  8525. }
  8526. extern __inline __mmask8
  8527. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8528. _mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
  8529. {
  8530. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8531. (__v4di) __Y, 1,
  8532. (__mmask8) -1);
  8533. }
  8534. extern __inline __mmask8
  8535. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8536. _mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8537. {
  8538. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8539. (__v4di) __Y, 5,
  8540. (__mmask8) __M);
  8541. }
  8542. extern __inline __mmask8
  8543. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8544. _mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
  8545. {
  8546. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8547. (__v4di) __Y, 5,
  8548. (__mmask8) -1);
  8549. }
  8550. extern __inline __mmask8
  8551. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8552. _mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
  8553. {
  8554. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8555. (__v4di) __Y, 2,
  8556. (__mmask8) __M);
  8557. }
  8558. extern __inline __mmask8
  8559. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8560. _mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
  8561. {
  8562. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  8563. (__v4di) __Y, 2,
  8564. (__mmask8) -1);
  8565. }
  8566. extern __inline __mmask8
  8567. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8568. _mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8569. {
  8570. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8571. (__v4si) __Y, 4,
  8572. (__mmask8) __M);
  8573. }
  8574. extern __inline __mmask8
  8575. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8576. _mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
  8577. {
  8578. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8579. (__v4si) __Y, 4,
  8580. (__mmask8) -1);
  8581. }
  8582. extern __inline __mmask8
  8583. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8584. _mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8585. {
  8586. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8587. (__v4si) __Y, 1,
  8588. (__mmask8) __M);
  8589. }
  8590. extern __inline __mmask8
  8591. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8592. _mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
  8593. {
  8594. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8595. (__v4si) __Y, 1,
  8596. (__mmask8) -1);
  8597. }
  8598. extern __inline __mmask8
  8599. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8600. _mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8601. {
  8602. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8603. (__v4si) __Y, 5,
  8604. (__mmask8) __M);
  8605. }
  8606. extern __inline __mmask8
  8607. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8608. _mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
  8609. {
  8610. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8611. (__v4si) __Y, 5,
  8612. (__mmask8) -1);
  8613. }
  8614. extern __inline __mmask8
  8615. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8616. _mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8617. {
  8618. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8619. (__v4si) __Y, 2,
  8620. (__mmask8) __M);
  8621. }
  8622. extern __inline __mmask8
  8623. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8624. _mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
  8625. {
  8626. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  8627. (__v4si) __Y, 2,
  8628. (__mmask8) -1);
  8629. }
  8630. extern __inline __mmask8
  8631. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8632. _mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8633. {
  8634. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8635. (__v2di) __Y, 4,
  8636. (__mmask8) __M);
  8637. }
  8638. extern __inline __mmask8
  8639. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8640. _mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
  8641. {
  8642. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8643. (__v2di) __Y, 4,
  8644. (__mmask8) -1);
  8645. }
  8646. extern __inline __mmask8
  8647. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8648. _mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8649. {
  8650. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8651. (__v2di) __Y, 1,
  8652. (__mmask8) __M);
  8653. }
  8654. extern __inline __mmask8
  8655. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8656. _mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
  8657. {
  8658. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8659. (__v2di) __Y, 1,
  8660. (__mmask8) -1);
  8661. }
  8662. extern __inline __mmask8
  8663. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8664. _mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8665. {
  8666. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8667. (__v2di) __Y, 5,
  8668. (__mmask8) __M);
  8669. }
  8670. extern __inline __mmask8
  8671. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8672. _mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
  8673. {
  8674. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8675. (__v2di) __Y, 5,
  8676. (__mmask8) -1);
  8677. }
  8678. extern __inline __mmask8
  8679. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8680. _mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8681. {
  8682. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8683. (__v2di) __Y, 2,
  8684. (__mmask8) __M);
  8685. }
  8686. extern __inline __mmask8
  8687. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8688. _mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
  8689. {
  8690. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  8691. (__v2di) __Y, 2,
  8692. (__mmask8) -1);
  8693. }
  8694. extern __inline __mmask8
  8695. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8696. _mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8697. {
  8698. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8699. (__v4si) __Y, 4,
  8700. (__mmask8) __M);
  8701. }
  8702. extern __inline __mmask8
  8703. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8704. _mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
  8705. {
  8706. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8707. (__v4si) __Y, 4,
  8708. (__mmask8) -1);
  8709. }
  8710. extern __inline __mmask8
  8711. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8712. _mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8713. {
  8714. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8715. (__v4si) __Y, 1,
  8716. (__mmask8) __M);
  8717. }
  8718. extern __inline __mmask8
  8719. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8720. _mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
  8721. {
  8722. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8723. (__v4si) __Y, 1,
  8724. (__mmask8) -1);
  8725. }
  8726. extern __inline __mmask8
  8727. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8728. _mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8729. {
  8730. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8731. (__v4si) __Y, 5,
  8732. (__mmask8) __M);
  8733. }
  8734. extern __inline __mmask8
  8735. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8736. _mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
  8737. {
  8738. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8739. (__v4si) __Y, 5,
  8740. (__mmask8) -1);
  8741. }
  8742. extern __inline __mmask8
  8743. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8744. _mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8745. {
  8746. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8747. (__v4si) __Y, 2,
  8748. (__mmask8) __M);
  8749. }
  8750. extern __inline __mmask8
  8751. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8752. _mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
  8753. {
  8754. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  8755. (__v4si) __Y, 2,
  8756. (__mmask8) -1);
  8757. }
  8758. extern __inline __mmask8
  8759. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8760. _mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8761. {
  8762. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8763. (__v2di) __Y, 4,
  8764. (__mmask8) __M);
  8765. }
  8766. extern __inline __mmask8
  8767. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8768. _mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
  8769. {
  8770. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8771. (__v2di) __Y, 4,
  8772. (__mmask8) -1);
  8773. }
  8774. extern __inline __mmask8
  8775. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8776. _mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8777. {
  8778. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8779. (__v2di) __Y, 1,
  8780. (__mmask8) __M);
  8781. }
  8782. extern __inline __mmask8
  8783. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8784. _mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
  8785. {
  8786. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8787. (__v2di) __Y, 1,
  8788. (__mmask8) -1);
  8789. }
  8790. extern __inline __mmask8
  8791. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8792. _mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8793. {
  8794. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8795. (__v2di) __Y, 5,
  8796. (__mmask8) __M);
  8797. }
  8798. extern __inline __mmask8
  8799. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8800. _mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
  8801. {
  8802. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8803. (__v2di) __Y, 5,
  8804. (__mmask8) -1);
  8805. }
  8806. extern __inline __mmask8
  8807. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8808. _mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
  8809. {
  8810. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8811. (__v2di) __Y, 2,
  8812. (__mmask8) __M);
  8813. }
  8814. extern __inline __mmask8
  8815. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8816. _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
  8817. {
  8818. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  8819. (__v2di) __Y, 2,
  8820. (__mmask8) -1);
  8821. }
  8822. #ifdef __OPTIMIZE__
  8823. extern __inline __m256i
  8824. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8825. _mm256_permutex_epi64 (__m256i __X, const int __I)
  8826. {
  8827. return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
  8828. __I,
  8829. (__v4di)
  8830. _mm256_setzero_si256(),
  8831. (__mmask8) -1);
  8832. }
  8833. extern __inline __m256i
  8834. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8835. _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
  8836. __m256i __X, const int __I)
  8837. {
  8838. return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
  8839. __I,
  8840. (__v4di) __W,
  8841. (__mmask8) __M);
  8842. }
  8843. extern __inline __m256i
  8844. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8845. _mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
  8846. {
  8847. return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
  8848. __I,
  8849. (__v4di)
  8850. _mm256_setzero_si256 (),
  8851. (__mmask8) __M);
  8852. }
  8853. extern __inline __m256d
  8854. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8855. _mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
  8856. __m256d __B, const int __imm)
  8857. {
  8858. return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
  8859. (__v4df) __B, __imm,
  8860. (__v4df) __W,
  8861. (__mmask8) __U);
  8862. }
  8863. extern __inline __m256d
  8864. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8865. _mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
  8866. const int __imm)
  8867. {
  8868. return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
  8869. (__v4df) __B, __imm,
  8870. (__v4df)
  8871. _mm256_setzero_pd (),
  8872. (__mmask8) __U);
  8873. }
  8874. extern __inline __m128d
  8875. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8876. _mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
  8877. __m128d __B, const int __imm)
  8878. {
  8879. return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
  8880. (__v2df) __B, __imm,
  8881. (__v2df) __W,
  8882. (__mmask8) __U);
  8883. }
  8884. extern __inline __m128d
  8885. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8886. _mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
  8887. const int __imm)
  8888. {
  8889. return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
  8890. (__v2df) __B, __imm,
  8891. (__v2df)
  8892. _mm_setzero_pd (),
  8893. (__mmask8) __U);
  8894. }
  8895. extern __inline __m256
  8896. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8897. _mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
  8898. __m256 __B, const int __imm)
  8899. {
  8900. return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
  8901. (__v8sf) __B, __imm,
  8902. (__v8sf) __W,
  8903. (__mmask8) __U);
  8904. }
  8905. extern __inline __m256
  8906. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8907. _mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
  8908. const int __imm)
  8909. {
  8910. return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
  8911. (__v8sf) __B, __imm,
  8912. (__v8sf)
  8913. _mm256_setzero_ps (),
  8914. (__mmask8) __U);
  8915. }
  8916. extern __inline __m128
  8917. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8918. _mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  8919. const int __imm)
  8920. {
  8921. return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
  8922. (__v4sf) __B, __imm,
  8923. (__v4sf) __W,
  8924. (__mmask8) __U);
  8925. }
  8926. extern __inline __m128
  8927. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8928. _mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
  8929. const int __imm)
  8930. {
  8931. return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
  8932. (__v4sf) __B, __imm,
  8933. (__v4sf)
  8934. _mm_setzero_ps (),
  8935. (__mmask8) __U);
  8936. }
  8937. extern __inline __m256i
  8938. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8939. _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
  8940. {
  8941. return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
  8942. (__v4si) __B,
  8943. __imm,
  8944. (__v8si)
  8945. _mm256_setzero_si256 (),
  8946. (__mmask8) -1);
  8947. }
  8948. extern __inline __m256i
  8949. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8950. _mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
  8951. __m128i __B, const int __imm)
  8952. {
  8953. return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
  8954. (__v4si) __B,
  8955. __imm,
  8956. (__v8si) __W,
  8957. (__mmask8)
  8958. __U);
  8959. }
  8960. extern __inline __m256i
  8961. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8962. _mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
  8963. const int __imm)
  8964. {
  8965. return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
  8966. (__v4si) __B,
  8967. __imm,
  8968. (__v8si)
  8969. _mm256_setzero_si256 (),
  8970. (__mmask8)
  8971. __U);
  8972. }
  8973. extern __inline __m256
  8974. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8975. _mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
  8976. {
  8977. return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
  8978. (__v4sf) __B,
  8979. __imm,
  8980. (__v8sf)
  8981. _mm256_setzero_ps (),
  8982. (__mmask8) -1);
  8983. }
  8984. extern __inline __m256
  8985. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8986. _mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
  8987. __m128 __B, const int __imm)
  8988. {
  8989. return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
  8990. (__v4sf) __B,
  8991. __imm,
  8992. (__v8sf) __W,
  8993. (__mmask8) __U);
  8994. }
  8995. extern __inline __m256
  8996. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8997. _mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
  8998. const int __imm)
  8999. {
  9000. return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
  9001. (__v4sf) __B,
  9002. __imm,
  9003. (__v8sf)
  9004. _mm256_setzero_ps (),
  9005. (__mmask8) __U);
  9006. }
  9007. extern __inline __m128i
  9008. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9009. _mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
  9010. {
  9011. return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
  9012. __imm,
  9013. (__v4si)
  9014. _mm_setzero_si128 (),
  9015. (__mmask8) -1);
  9016. }
  9017. extern __inline __m128i
  9018. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9019. _mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
  9020. const int __imm)
  9021. {
  9022. return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
  9023. __imm,
  9024. (__v4si) __W,
  9025. (__mmask8)
  9026. __U);
  9027. }
  9028. extern __inline __m128i
  9029. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9030. _mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
  9031. const int __imm)
  9032. {
  9033. return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
  9034. __imm,
  9035. (__v4si)
  9036. _mm_setzero_si128 (),
  9037. (__mmask8)
  9038. __U);
  9039. }
  9040. extern __inline __m128
  9041. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9042. _mm256_extractf32x4_ps (__m256 __A, const int __imm)
  9043. {
  9044. return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
  9045. __imm,
  9046. (__v4sf)
  9047. _mm_setzero_ps (),
  9048. (__mmask8) -1);
  9049. }
  9050. extern __inline __m128
  9051. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9052. _mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
  9053. const int __imm)
  9054. {
  9055. return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
  9056. __imm,
  9057. (__v4sf) __W,
  9058. (__mmask8)
  9059. __U);
  9060. }
  9061. extern __inline __m128
  9062. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9063. _mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
  9064. const int __imm)
  9065. {
  9066. return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
  9067. __imm,
  9068. (__v4sf)
  9069. _mm_setzero_ps (),
  9070. (__mmask8)
  9071. __U);
  9072. }
  9073. extern __inline __m256i
  9074. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9075. _mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
  9076. {
  9077. return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
  9078. (__v4di) __B,
  9079. __imm,
  9080. (__v4di)
  9081. _mm256_setzero_si256 (),
  9082. (__mmask8) -1);
  9083. }
  9084. extern __inline __m256i
  9085. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9086. _mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
  9087. __m256i __B, const int __imm)
  9088. {
  9089. return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
  9090. (__v4di) __B,
  9091. __imm,
  9092. (__v4di) __W,
  9093. (__mmask8) __U);
  9094. }
  9095. extern __inline __m256i
  9096. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9097. _mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
  9098. const int __imm)
  9099. {
  9100. return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
  9101. (__v4di) __B,
  9102. __imm,
  9103. (__v4di)
  9104. _mm256_setzero_si256 (),
  9105. (__mmask8) __U);
  9106. }
  9107. extern __inline __m256i
  9108. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9109. _mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
  9110. {
  9111. return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
  9112. (__v8si) __B,
  9113. __imm,
  9114. (__v8si)
  9115. _mm256_setzero_si256 (),
  9116. (__mmask8) -1);
  9117. }
  9118. extern __inline __m256i
  9119. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9120. _mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
  9121. __m256i __B, const int __imm)
  9122. {
  9123. return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
  9124. (__v8si) __B,
  9125. __imm,
  9126. (__v8si) __W,
  9127. (__mmask8) __U);
  9128. }
  9129. extern __inline __m256i
  9130. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9131. _mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
  9132. const int __imm)
  9133. {
  9134. return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
  9135. (__v8si) __B,
  9136. __imm,
  9137. (__v8si)
  9138. _mm256_setzero_si256 (),
  9139. (__mmask8) __U);
  9140. }
  9141. extern __inline __m256d
  9142. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9143. _mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
  9144. {
  9145. return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
  9146. (__v4df) __B,
  9147. __imm,
  9148. (__v4df)
  9149. _mm256_setzero_pd (),
  9150. (__mmask8) -1);
  9151. }
  9152. extern __inline __m256d
  9153. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9154. _mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
  9155. __m256d __B, const int __imm)
  9156. {
  9157. return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
  9158. (__v4df) __B,
  9159. __imm,
  9160. (__v4df) __W,
  9161. (__mmask8) __U);
  9162. }
  9163. extern __inline __m256d
  9164. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9165. _mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
  9166. const int __imm)
  9167. {
  9168. return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
  9169. (__v4df) __B,
  9170. __imm,
  9171. (__v4df)
  9172. _mm256_setzero_pd (),
  9173. (__mmask8) __U);
  9174. }
  9175. extern __inline __m256
  9176. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9177. _mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
  9178. {
  9179. return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
  9180. (__v8sf) __B,
  9181. __imm,
  9182. (__v8sf)
  9183. _mm256_setzero_ps (),
  9184. (__mmask8) -1);
  9185. }
  9186. extern __inline __m256
  9187. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9188. _mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
  9189. __m256 __B, const int __imm)
  9190. {
  9191. return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
  9192. (__v8sf) __B,
  9193. __imm,
  9194. (__v8sf) __W,
  9195. (__mmask8) __U);
  9196. }
  9197. extern __inline __m256
  9198. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9199. _mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
  9200. const int __imm)
  9201. {
  9202. return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
  9203. (__v8sf) __B,
  9204. __imm,
  9205. (__v8sf)
  9206. _mm256_setzero_ps (),
  9207. (__mmask8) __U);
  9208. }
  9209. extern __inline __m256d
  9210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9211. _mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
  9212. const int __imm)
  9213. {
  9214. return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
  9215. (__v4df) __B,
  9216. (__v4di) __C,
  9217. __imm,
  9218. (__mmask8) -1);
  9219. }
  9220. extern __inline __m256d
  9221. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9222. _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
  9223. __m256i __C, const int __imm)
  9224. {
  9225. return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
  9226. (__v4df) __B,
  9227. (__v4di) __C,
  9228. __imm,
  9229. (__mmask8) __U);
  9230. }
  9231. extern __inline __m256d
  9232. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9233. _mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
  9234. __m256i __C, const int __imm)
  9235. {
  9236. return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
  9237. (__v4df) __B,
  9238. (__v4di) __C,
  9239. __imm,
  9240. (__mmask8) __U);
  9241. }
  9242. extern __inline __m256
  9243. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9244. _mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
  9245. const int __imm)
  9246. {
  9247. return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
  9248. (__v8sf) __B,
  9249. (__v8si) __C,
  9250. __imm,
  9251. (__mmask8) -1);
  9252. }
  9253. extern __inline __m256
  9254. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9255. _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
  9256. __m256i __C, const int __imm)
  9257. {
  9258. return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
  9259. (__v8sf) __B,
  9260. (__v8si) __C,
  9261. __imm,
  9262. (__mmask8) __U);
  9263. }
  9264. extern __inline __m256
  9265. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9266. _mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
  9267. __m256i __C, const int __imm)
  9268. {
  9269. return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
  9270. (__v8sf) __B,
  9271. (__v8si) __C,
  9272. __imm,
  9273. (__mmask8) __U);
  9274. }
  9275. extern __inline __m128d
  9276. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9277. _mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
  9278. const int __imm)
  9279. {
  9280. return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
  9281. (__v2df) __B,
  9282. (__v2di) __C,
  9283. __imm,
  9284. (__mmask8) -1);
  9285. }
  9286. extern __inline __m128d
  9287. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9288. _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
  9289. __m128i __C, const int __imm)
  9290. {
  9291. return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
  9292. (__v2df) __B,
  9293. (__v2di) __C,
  9294. __imm,
  9295. (__mmask8) __U);
  9296. }
  9297. extern __inline __m128d
  9298. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9299. _mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
  9300. __m128i __C, const int __imm)
  9301. {
  9302. return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
  9303. (__v2df) __B,
  9304. (__v2di) __C,
  9305. __imm,
  9306. (__mmask8) __U);
  9307. }
  9308. extern __inline __m128
  9309. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9310. _mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
  9311. {
  9312. return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
  9313. (__v4sf) __B,
  9314. (__v4si) __C,
  9315. __imm,
  9316. (__mmask8) -1);
  9317. }
  9318. extern __inline __m128
  9319. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9320. _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
  9321. __m128i __C, const int __imm)
  9322. {
  9323. return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
  9324. (__v4sf) __B,
  9325. (__v4si) __C,
  9326. __imm,
  9327. (__mmask8) __U);
  9328. }
  9329. extern __inline __m128
  9330. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9331. _mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
  9332. __m128i __C, const int __imm)
  9333. {
  9334. return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
  9335. (__v4sf) __B,
  9336. (__v4si) __C,
  9337. __imm,
  9338. (__mmask8) __U);
  9339. }
  9340. extern __inline __m256i
  9341. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9342. _mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  9343. const int __imm)
  9344. {
  9345. return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
  9346. (__v8si) __W,
  9347. (__mmask8) __U);
  9348. }
  9349. extern __inline __m256i
  9350. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9351. _mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
  9352. {
  9353. return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
  9354. (__v8si)
  9355. _mm256_setzero_si256 (),
  9356. (__mmask8) __U);
  9357. }
  9358. extern __inline __m128i
  9359. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9360. _mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  9361. const int __imm)
  9362. {
  9363. return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
  9364. (__v4si) __W,
  9365. (__mmask8) __U);
  9366. }
  9367. extern __inline __m128i
  9368. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9369. _mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
  9370. {
  9371. return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
  9372. (__v4si)
  9373. _mm_setzero_si128 (),
  9374. (__mmask8) __U);
  9375. }
  9376. extern __inline __m256i
  9377. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9378. _mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  9379. const int __imm)
  9380. {
  9381. return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
  9382. (__v4di) __W,
  9383. (__mmask8) __U);
  9384. }
  9385. extern __inline __m256i
  9386. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9387. _mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
  9388. {
  9389. return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
  9390. (__v4di)
  9391. _mm256_setzero_si256 (),
  9392. (__mmask8) __U);
  9393. }
  9394. extern __inline __m128i
  9395. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9396. _mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  9397. const int __imm)
  9398. {
  9399. return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
  9400. (__v2di) __W,
  9401. (__mmask8) __U);
  9402. }
  9403. extern __inline __m128i
  9404. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9405. _mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
  9406. {
  9407. return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
  9408. (__v2di)
  9409. _mm_setzero_si128 (),
  9410. (__mmask8) __U);
  9411. }
  9412. extern __inline __m256i
  9413. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9414. _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
  9415. const int __imm)
  9416. {
  9417. return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
  9418. (__v4di) __B,
  9419. (__v4di) __C, __imm,
  9420. (__mmask8) -1);
  9421. }
  9422. extern __inline __m256i
  9423. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9424. _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
  9425. __m256i __B, __m256i __C,
  9426. const int __imm)
  9427. {
  9428. return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
  9429. (__v4di) __B,
  9430. (__v4di) __C, __imm,
  9431. (__mmask8) __U);
  9432. }
  9433. extern __inline __m256i
  9434. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9435. _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
  9436. __m256i __B, __m256i __C,
  9437. const int __imm)
  9438. {
  9439. return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
  9440. (__v4di) __B,
  9441. (__v4di) __C,
  9442. __imm,
  9443. (__mmask8) __U);
  9444. }
  9445. extern __inline __m256i
  9446. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9447. _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
  9448. const int __imm)
  9449. {
  9450. return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
  9451. (__v8si) __B,
  9452. (__v8si) __C, __imm,
  9453. (__mmask8) -1);
  9454. }
  9455. extern __inline __m256i
  9456. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9457. _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
  9458. __m256i __B, __m256i __C,
  9459. const int __imm)
  9460. {
  9461. return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
  9462. (__v8si) __B,
  9463. (__v8si) __C, __imm,
  9464. (__mmask8) __U);
  9465. }
  9466. extern __inline __m256i
  9467. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9468. _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
  9469. __m256i __B, __m256i __C,
  9470. const int __imm)
  9471. {
  9472. return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
  9473. (__v8si) __B,
  9474. (__v8si) __C,
  9475. __imm,
  9476. (__mmask8) __U);
  9477. }
  9478. extern __inline __m128i
  9479. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9480. _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
  9481. const int __imm)
  9482. {
  9483. return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
  9484. (__v2di) __B,
  9485. (__v2di) __C, __imm,
  9486. (__mmask8) -1);
  9487. }
  9488. extern __inline __m128i
  9489. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9490. _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
  9491. __m128i __B, __m128i __C, const int __imm)
  9492. {
  9493. return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
  9494. (__v2di) __B,
  9495. (__v2di) __C, __imm,
  9496. (__mmask8) __U);
  9497. }
  9498. extern __inline __m128i
  9499. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9500. _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
  9501. __m128i __B, __m128i __C, const int __imm)
  9502. {
  9503. return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
  9504. (__v2di) __B,
  9505. (__v2di) __C,
  9506. __imm,
  9507. (__mmask8) __U);
  9508. }
  9509. extern __inline __m128i
  9510. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9511. _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
  9512. const int __imm)
  9513. {
  9514. return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
  9515. (__v4si) __B,
  9516. (__v4si) __C, __imm,
  9517. (__mmask8) -1);
  9518. }
  9519. extern __inline __m128i
  9520. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9521. _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
  9522. __m128i __B, __m128i __C, const int __imm)
  9523. {
  9524. return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
  9525. (__v4si) __B,
  9526. (__v4si) __C, __imm,
  9527. (__mmask8) __U);
  9528. }
  9529. extern __inline __m128i
  9530. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9531. _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
  9532. __m128i __B, __m128i __C, const int __imm)
  9533. {
  9534. return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
  9535. (__v4si) __B,
  9536. (__v4si) __C,
  9537. __imm,
  9538. (__mmask8) __U);
  9539. }
  9540. extern __inline __m256
  9541. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9542. _mm256_roundscale_ps (__m256 __A, const int __imm)
  9543. {
  9544. return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
  9545. __imm,
  9546. (__v8sf)
  9547. _mm256_setzero_ps (),
  9548. (__mmask8) -1);
  9549. }
  9550. extern __inline __m256
  9551. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9552. _mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
  9553. const int __imm)
  9554. {
  9555. return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
  9556. __imm,
  9557. (__v8sf) __W,
  9558. (__mmask8) __U);
  9559. }
  9560. extern __inline __m256
  9561. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9562. _mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
  9563. {
  9564. return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
  9565. __imm,
  9566. (__v8sf)
  9567. _mm256_setzero_ps (),
  9568. (__mmask8) __U);
  9569. }
  9570. extern __inline __m256d
  9571. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9572. _mm256_roundscale_pd (__m256d __A, const int __imm)
  9573. {
  9574. return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
  9575. __imm,
  9576. (__v4df)
  9577. _mm256_setzero_pd (),
  9578. (__mmask8) -1);
  9579. }
  9580. extern __inline __m256d
  9581. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9582. _mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
  9583. const int __imm)
  9584. {
  9585. return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
  9586. __imm,
  9587. (__v4df) __W,
  9588. (__mmask8) __U);
  9589. }
  9590. extern __inline __m256d
  9591. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9592. _mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
  9593. {
  9594. return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
  9595. __imm,
  9596. (__v4df)
  9597. _mm256_setzero_pd (),
  9598. (__mmask8) __U);
  9599. }
  9600. extern __inline __m128
  9601. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9602. _mm_roundscale_ps (__m128 __A, const int __imm)
  9603. {
  9604. return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
  9605. __imm,
  9606. (__v4sf)
  9607. _mm_setzero_ps (),
  9608. (__mmask8) -1);
  9609. }
  9610. extern __inline __m128
  9611. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9612. _mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
  9613. const int __imm)
  9614. {
  9615. return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
  9616. __imm,
  9617. (__v4sf) __W,
  9618. (__mmask8) __U);
  9619. }
  9620. extern __inline __m128
  9621. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9622. _mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
  9623. {
  9624. return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
  9625. __imm,
  9626. (__v4sf)
  9627. _mm_setzero_ps (),
  9628. (__mmask8) __U);
  9629. }
  9630. extern __inline __m128d
  9631. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9632. _mm_roundscale_pd (__m128d __A, const int __imm)
  9633. {
  9634. return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
  9635. __imm,
  9636. (__v2df)
  9637. _mm_setzero_pd (),
  9638. (__mmask8) -1);
  9639. }
  9640. extern __inline __m128d
  9641. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9642. _mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
  9643. const int __imm)
  9644. {
  9645. return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
  9646. __imm,
  9647. (__v2df) __W,
  9648. (__mmask8) __U);
  9649. }
  9650. extern __inline __m128d
  9651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9652. _mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
  9653. {
  9654. return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
  9655. __imm,
  9656. (__v2df)
  9657. _mm_setzero_pd (),
  9658. (__mmask8) __U);
  9659. }
  9660. extern __inline __m256
  9661. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9662. _mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
  9663. _MM_MANTISSA_SIGN_ENUM __C)
  9664. {
  9665. return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
  9666. (__C << 2) | __B,
  9667. (__v8sf)
  9668. _mm256_setzero_ps (),
  9669. (__mmask8) -1);
  9670. }
  9671. extern __inline __m256
  9672. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9673. _mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
  9674. _MM_MANTISSA_NORM_ENUM __B,
  9675. _MM_MANTISSA_SIGN_ENUM __C)
  9676. {
  9677. return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
  9678. (__C << 2) | __B,
  9679. (__v8sf) __W,
  9680. (__mmask8) __U);
  9681. }
  9682. extern __inline __m256
  9683. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9684. _mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
  9685. _MM_MANTISSA_NORM_ENUM __B,
  9686. _MM_MANTISSA_SIGN_ENUM __C)
  9687. {
  9688. return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
  9689. (__C << 2) | __B,
  9690. (__v8sf)
  9691. _mm256_setzero_ps (),
  9692. (__mmask8) __U);
  9693. }
  9694. extern __inline __m128
  9695. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9696. _mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
  9697. _MM_MANTISSA_SIGN_ENUM __C)
  9698. {
  9699. return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
  9700. (__C << 2) | __B,
  9701. (__v4sf)
  9702. _mm_setzero_ps (),
  9703. (__mmask8) -1);
  9704. }
  9705. extern __inline __m128
  9706. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9707. _mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
  9708. _MM_MANTISSA_NORM_ENUM __B,
  9709. _MM_MANTISSA_SIGN_ENUM __C)
  9710. {
  9711. return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
  9712. (__C << 2) | __B,
  9713. (__v4sf) __W,
  9714. (__mmask8) __U);
  9715. }
  9716. extern __inline __m128
  9717. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9718. _mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
  9719. _MM_MANTISSA_NORM_ENUM __B,
  9720. _MM_MANTISSA_SIGN_ENUM __C)
  9721. {
  9722. return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
  9723. (__C << 2) | __B,
  9724. (__v4sf)
  9725. _mm_setzero_ps (),
  9726. (__mmask8) __U);
  9727. }
  9728. extern __inline __m256d
  9729. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9730. _mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
  9731. _MM_MANTISSA_SIGN_ENUM __C)
  9732. {
  9733. return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
  9734. (__C << 2) | __B,
  9735. (__v4df)
  9736. _mm256_setzero_pd (),
  9737. (__mmask8) -1);
  9738. }
  9739. extern __inline __m256d
  9740. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9741. _mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
  9742. _MM_MANTISSA_NORM_ENUM __B,
  9743. _MM_MANTISSA_SIGN_ENUM __C)
  9744. {
  9745. return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
  9746. (__C << 2) | __B,
  9747. (__v4df) __W,
  9748. (__mmask8) __U);
  9749. }
  9750. extern __inline __m256d
  9751. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9752. _mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
  9753. _MM_MANTISSA_NORM_ENUM __B,
  9754. _MM_MANTISSA_SIGN_ENUM __C)
  9755. {
  9756. return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
  9757. (__C << 2) | __B,
  9758. (__v4df)
  9759. _mm256_setzero_pd (),
  9760. (__mmask8) __U);
  9761. }
  9762. extern __inline __m128d
  9763. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9764. _mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
  9765. _MM_MANTISSA_SIGN_ENUM __C)
  9766. {
  9767. return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
  9768. (__C << 2) | __B,
  9769. (__v2df)
  9770. _mm_setzero_pd (),
  9771. (__mmask8) -1);
  9772. }
  9773. extern __inline __m128d
  9774. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9775. _mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
  9776. _MM_MANTISSA_NORM_ENUM __B,
  9777. _MM_MANTISSA_SIGN_ENUM __C)
  9778. {
  9779. return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
  9780. (__C << 2) | __B,
  9781. (__v2df) __W,
  9782. (__mmask8) __U);
  9783. }
  9784. extern __inline __m128d
  9785. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9786. _mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
  9787. _MM_MANTISSA_NORM_ENUM __B,
  9788. _MM_MANTISSA_SIGN_ENUM __C)
  9789. {
  9790. return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
  9791. (__C << 2) | __B,
  9792. (__v2df)
  9793. _mm_setzero_pd (),
  9794. (__mmask8) __U);
  9795. }
  9796. extern __inline __m256
  9797. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9798. _mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
  9799. __m256i __index, void const *__addr,
  9800. int __scale)
  9801. {
  9802. return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
  9803. __addr,
  9804. (__v8si) __index,
  9805. __mask, __scale);
  9806. }
  9807. extern __inline __m128
  9808. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9809. _mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
  9810. __m128i __index, void const *__addr,
  9811. int __scale)
  9812. {
  9813. return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
  9814. __addr,
  9815. (__v4si) __index,
  9816. __mask, __scale);
  9817. }
  9818. extern __inline __m256d
  9819. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9820. _mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
  9821. __m128i __index, void const *__addr,
  9822. int __scale)
  9823. {
  9824. return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
  9825. __addr,
  9826. (__v4si) __index,
  9827. __mask, __scale);
  9828. }
  9829. extern __inline __m128d
  9830. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9831. _mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
  9832. __m128i __index, void const *__addr,
  9833. int __scale)
  9834. {
  9835. return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
  9836. __addr,
  9837. (__v4si) __index,
  9838. __mask, __scale);
  9839. }
  9840. extern __inline __m128
  9841. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9842. _mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
  9843. __m256i __index, void const *__addr,
  9844. int __scale)
  9845. {
  9846. return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
  9847. __addr,
  9848. (__v4di) __index,
  9849. __mask, __scale);
  9850. }
  9851. extern __inline __m128
  9852. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9853. _mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
  9854. __m128i __index, void const *__addr,
  9855. int __scale)
  9856. {
  9857. return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
  9858. __addr,
  9859. (__v2di) __index,
  9860. __mask, __scale);
  9861. }
  9862. extern __inline __m256d
  9863. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9864. _mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
  9865. __m256i __index, void const *__addr,
  9866. int __scale)
  9867. {
  9868. return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
  9869. __addr,
  9870. (__v4di) __index,
  9871. __mask, __scale);
  9872. }
  9873. extern __inline __m128d
  9874. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9875. _mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
  9876. __m128i __index, void const *__addr,
  9877. int __scale)
  9878. {
  9879. return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
  9880. __addr,
  9881. (__v2di) __index,
  9882. __mask, __scale);
  9883. }
  9884. extern __inline __m256i
  9885. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9886. _mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
  9887. __m256i __index, void const *__addr,
  9888. int __scale)
  9889. {
  9890. return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
  9891. __addr,
  9892. (__v8si) __index,
  9893. __mask, __scale);
  9894. }
  9895. extern __inline __m128i
  9896. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9897. _mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
  9898. __m128i __index, void const *__addr,
  9899. int __scale)
  9900. {
  9901. return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
  9902. __addr,
  9903. (__v4si) __index,
  9904. __mask, __scale);
  9905. }
  9906. extern __inline __m256i
  9907. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9908. _mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
  9909. __m128i __index, void const *__addr,
  9910. int __scale)
  9911. {
  9912. return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
  9913. __addr,
  9914. (__v4si) __index,
  9915. __mask, __scale);
  9916. }
  9917. extern __inline __m128i
  9918. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9919. _mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
  9920. __m128i __index, void const *__addr,
  9921. int __scale)
  9922. {
  9923. return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
  9924. __addr,
  9925. (__v4si) __index,
  9926. __mask, __scale);
  9927. }
  9928. extern __inline __m128i
  9929. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9930. _mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
  9931. __m256i __index, void const *__addr,
  9932. int __scale)
  9933. {
  9934. return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
  9935. __addr,
  9936. (__v4di) __index,
  9937. __mask, __scale);
  9938. }
  9939. extern __inline __m128i
  9940. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9941. _mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
  9942. __m128i __index, void const *__addr,
  9943. int __scale)
  9944. {
  9945. return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
  9946. __addr,
  9947. (__v2di) __index,
  9948. __mask, __scale);
  9949. }
  9950. extern __inline __m256i
  9951. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9952. _mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
  9953. __m256i __index, void const *__addr,
  9954. int __scale)
  9955. {
  9956. return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
  9957. __addr,
  9958. (__v4di) __index,
  9959. __mask, __scale);
  9960. }
  9961. extern __inline __m128i
  9962. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9963. _mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
  9964. __m128i __index, void const *__addr,
  9965. int __scale)
  9966. {
  9967. return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
  9968. __addr,
  9969. (__v2di) __index,
  9970. __mask, __scale);
  9971. }
  9972. extern __inline void
  9973. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9974. _mm256_i32scatter_ps (void *__addr, __m256i __index,
  9975. __m256 __v1, const int __scale)
  9976. {
  9977. __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
  9978. (__v8si) __index, (__v8sf) __v1,
  9979. __scale);
  9980. }
  9981. extern __inline void
  9982. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9983. _mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
  9984. __m256i __index, __m256 __v1,
  9985. const int __scale)
  9986. {
  9987. __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
  9988. (__v8sf) __v1, __scale);
  9989. }
  9990. extern __inline void
  9991. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9992. _mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1,
  9993. const int __scale)
  9994. {
  9995. __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
  9996. (__v4si) __index, (__v4sf) __v1,
  9997. __scale);
  9998. }
  9999. extern __inline void
  10000. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10001. _mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
  10002. __m128i __index, __m128 __v1,
  10003. const int __scale)
  10004. {
  10005. __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
  10006. (__v4sf) __v1, __scale);
  10007. }
  10008. extern __inline void
  10009. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10010. _mm256_i32scatter_pd (void *__addr, __m128i __index,
  10011. __m256d __v1, const int __scale)
  10012. {
  10013. __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
  10014. (__v4si) __index, (__v4df) __v1,
  10015. __scale);
  10016. }
  10017. extern __inline void
  10018. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10019. _mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
  10020. __m128i __index, __m256d __v1,
  10021. const int __scale)
  10022. {
  10023. __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
  10024. (__v4df) __v1, __scale);
  10025. }
  10026. extern __inline void
  10027. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10028. _mm_i32scatter_pd (void *__addr, __m128i __index,
  10029. __m128d __v1, const int __scale)
  10030. {
  10031. __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
  10032. (__v4si) __index, (__v2df) __v1,
  10033. __scale);
  10034. }
  10035. extern __inline void
  10036. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10037. _mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
  10038. __m128i __index, __m128d __v1,
  10039. const int __scale)
  10040. {
  10041. __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
  10042. (__v2df) __v1, __scale);
  10043. }
  10044. extern __inline void
  10045. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10046. _mm256_i64scatter_ps (void *__addr, __m256i __index,
  10047. __m128 __v1, const int __scale)
  10048. {
  10049. __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
  10050. (__v4di) __index, (__v4sf) __v1,
  10051. __scale);
  10052. }
  10053. extern __inline void
  10054. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10055. _mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
  10056. __m256i __index, __m128 __v1,
  10057. const int __scale)
  10058. {
  10059. __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
  10060. (__v4sf) __v1, __scale);
  10061. }
  10062. extern __inline void
  10063. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10064. _mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1,
  10065. const int __scale)
  10066. {
  10067. __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
  10068. (__v2di) __index, (__v4sf) __v1,
  10069. __scale);
  10070. }
  10071. extern __inline void
  10072. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10073. _mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
  10074. __m128i __index, __m128 __v1,
  10075. const int __scale)
  10076. {
  10077. __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
  10078. (__v4sf) __v1, __scale);
  10079. }
  10080. extern __inline void
  10081. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10082. _mm256_i64scatter_pd (void *__addr, __m256i __index,
  10083. __m256d __v1, const int __scale)
  10084. {
  10085. __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
  10086. (__v4di) __index, (__v4df) __v1,
  10087. __scale);
  10088. }
  10089. extern __inline void
  10090. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10091. _mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
  10092. __m256i __index, __m256d __v1,
  10093. const int __scale)
  10094. {
  10095. __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
  10096. (__v4df) __v1, __scale);
  10097. }
  10098. extern __inline void
  10099. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10100. _mm_i64scatter_pd (void *__addr, __m128i __index,
  10101. __m128d __v1, const int __scale)
  10102. {
  10103. __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
  10104. (__v2di) __index, (__v2df) __v1,
  10105. __scale);
  10106. }
  10107. extern __inline void
  10108. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10109. _mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
  10110. __m128i __index, __m128d __v1,
  10111. const int __scale)
  10112. {
  10113. __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
  10114. (__v2df) __v1, __scale);
  10115. }
  10116. extern __inline void
  10117. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10118. _mm256_i32scatter_epi32 (void *__addr, __m256i __index,
  10119. __m256i __v1, const int __scale)
  10120. {
  10121. __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
  10122. (__v8si) __index, (__v8si) __v1,
  10123. __scale);
  10124. }
  10125. extern __inline void
  10126. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10127. _mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
  10128. __m256i __index, __m256i __v1,
  10129. const int __scale)
  10130. {
  10131. __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
  10132. (__v8si) __v1, __scale);
  10133. }
  10134. extern __inline void
  10135. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10136. _mm_i32scatter_epi32 (void *__addr, __m128i __index,
  10137. __m128i __v1, const int __scale)
  10138. {
  10139. __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
  10140. (__v4si) __index, (__v4si) __v1,
  10141. __scale);
  10142. }
  10143. extern __inline void
  10144. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10145. _mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
  10146. __m128i __index, __m128i __v1,
  10147. const int __scale)
  10148. {
  10149. __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
  10150. (__v4si) __v1, __scale);
  10151. }
  10152. extern __inline void
  10153. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10154. _mm256_i32scatter_epi64 (void *__addr, __m128i __index,
  10155. __m256i __v1, const int __scale)
  10156. {
  10157. __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
  10158. (__v4si) __index, (__v4di) __v1,
  10159. __scale);
  10160. }
  10161. extern __inline void
  10162. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10163. _mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
  10164. __m128i __index, __m256i __v1,
  10165. const int __scale)
  10166. {
  10167. __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
  10168. (__v4di) __v1, __scale);
  10169. }
  10170. extern __inline void
  10171. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10172. _mm_i32scatter_epi64 (void *__addr, __m128i __index,
  10173. __m128i __v1, const int __scale)
  10174. {
  10175. __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
  10176. (__v4si) __index, (__v2di) __v1,
  10177. __scale);
  10178. }
  10179. extern __inline void
  10180. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10181. _mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
  10182. __m128i __index, __m128i __v1,
  10183. const int __scale)
  10184. {
  10185. __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
  10186. (__v2di) __v1, __scale);
  10187. }
  10188. extern __inline void
  10189. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10190. _mm256_i64scatter_epi32 (void *__addr, __m256i __index,
  10191. __m128i __v1, const int __scale)
  10192. {
  10193. __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
  10194. (__v4di) __index, (__v4si) __v1,
  10195. __scale);
  10196. }
  10197. extern __inline void
  10198. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10199. _mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
  10200. __m256i __index, __m128i __v1,
  10201. const int __scale)
  10202. {
  10203. __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
  10204. (__v4si) __v1, __scale);
  10205. }
  10206. extern __inline void
  10207. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10208. _mm_i64scatter_epi32 (void *__addr, __m128i __index,
  10209. __m128i __v1, const int __scale)
  10210. {
  10211. __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
  10212. (__v2di) __index, (__v4si) __v1,
  10213. __scale);
  10214. }
  10215. extern __inline void
  10216. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10217. _mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
  10218. __m128i __index, __m128i __v1,
  10219. const int __scale)
  10220. {
  10221. __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
  10222. (__v4si) __v1, __scale);
  10223. }
  10224. extern __inline void
  10225. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10226. _mm256_i64scatter_epi64 (void *__addr, __m256i __index,
  10227. __m256i __v1, const int __scale)
  10228. {
  10229. __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
  10230. (__v4di) __index, (__v4di) __v1,
  10231. __scale);
  10232. }
  10233. extern __inline void
  10234. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10235. _mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
  10236. __m256i __index, __m256i __v1,
  10237. const int __scale)
  10238. {
  10239. __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
  10240. (__v4di) __v1, __scale);
  10241. }
  10242. extern __inline void
  10243. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10244. _mm_i64scatter_epi64 (void *__addr, __m128i __index,
  10245. __m128i __v1, const int __scale)
  10246. {
  10247. __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
  10248. (__v2di) __index, (__v2di) __v1,
  10249. __scale);
  10250. }
  10251. extern __inline void
  10252. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10253. _mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
  10254. __m128i __index, __m128i __v1,
  10255. const int __scale)
  10256. {
  10257. __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
  10258. (__v2di) __v1, __scale);
  10259. }
  10260. extern __inline __m256i
  10261. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10262. _mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10263. _MM_PERM_ENUM __mask)
  10264. {
  10265. return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
  10266. (__v8si) __W,
  10267. (__mmask8) __U);
  10268. }
  10269. extern __inline __m256i
  10270. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10271. _mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
  10272. _MM_PERM_ENUM __mask)
  10273. {
  10274. return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
  10275. (__v8si)
  10276. _mm256_setzero_si256 (),
  10277. (__mmask8) __U);
  10278. }
  10279. extern __inline __m128i
  10280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10281. _mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  10282. _MM_PERM_ENUM __mask)
  10283. {
  10284. return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
  10285. (__v4si) __W,
  10286. (__mmask8) __U);
  10287. }
  10288. extern __inline __m128i
  10289. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10290. _mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
  10291. _MM_PERM_ENUM __mask)
  10292. {
  10293. return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
  10294. (__v4si)
  10295. _mm_setzero_si128 (),
  10296. (__mmask8) __U);
  10297. }
  10298. extern __inline __m256i
  10299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10300. _mm256_rol_epi32 (__m256i __A, const int __B)
  10301. {
  10302. return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
  10303. (__v8si)
  10304. _mm256_setzero_si256 (),
  10305. (__mmask8) -1);
  10306. }
  10307. extern __inline __m256i
  10308. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10309. _mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10310. const int __B)
  10311. {
  10312. return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
  10313. (__v8si) __W,
  10314. (__mmask8) __U);
  10315. }
  10316. extern __inline __m256i
  10317. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10318. _mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
  10319. {
  10320. return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
  10321. (__v8si)
  10322. _mm256_setzero_si256 (),
  10323. (__mmask8) __U);
  10324. }
  10325. extern __inline __m128i
  10326. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10327. _mm_rol_epi32 (__m128i __A, const int __B)
  10328. {
  10329. return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
  10330. (__v4si)
  10331. _mm_setzero_si128 (),
  10332. (__mmask8) -1);
  10333. }
  10334. extern __inline __m128i
  10335. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10336. _mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  10337. const int __B)
  10338. {
  10339. return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
  10340. (__v4si) __W,
  10341. (__mmask8) __U);
  10342. }
  10343. extern __inline __m128i
  10344. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10345. _mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
  10346. {
  10347. return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
  10348. (__v4si)
  10349. _mm_setzero_si128 (),
  10350. (__mmask8) __U);
  10351. }
  10352. extern __inline __m256i
  10353. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10354. _mm256_ror_epi32 (__m256i __A, const int __B)
  10355. {
  10356. return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
  10357. (__v8si)
  10358. _mm256_setzero_si256 (),
  10359. (__mmask8) -1);
  10360. }
  10361. extern __inline __m256i
  10362. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10363. _mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10364. const int __B)
  10365. {
  10366. return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
  10367. (__v8si) __W,
  10368. (__mmask8) __U);
  10369. }
  10370. extern __inline __m256i
  10371. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10372. _mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
  10373. {
  10374. return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
  10375. (__v8si)
  10376. _mm256_setzero_si256 (),
  10377. (__mmask8) __U);
  10378. }
  10379. extern __inline __m128i
  10380. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10381. _mm_ror_epi32 (__m128i __A, const int __B)
  10382. {
  10383. return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
  10384. (__v4si)
  10385. _mm_setzero_si128 (),
  10386. (__mmask8) -1);
  10387. }
  10388. extern __inline __m128i
  10389. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10390. _mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  10391. const int __B)
  10392. {
  10393. return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
  10394. (__v4si) __W,
  10395. (__mmask8) __U);
  10396. }
  10397. extern __inline __m128i
  10398. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10399. _mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
  10400. {
  10401. return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
  10402. (__v4si)
  10403. _mm_setzero_si128 (),
  10404. (__mmask8) __U);
  10405. }
  10406. extern __inline __m256i
  10407. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10408. _mm256_rol_epi64 (__m256i __A, const int __B)
  10409. {
  10410. return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
  10411. (__v4di)
  10412. _mm256_setzero_si256 (),
  10413. (__mmask8) -1);
  10414. }
  10415. extern __inline __m256i
  10416. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10417. _mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  10418. const int __B)
  10419. {
  10420. return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
  10421. (__v4di) __W,
  10422. (__mmask8) __U);
  10423. }
  10424. extern __inline __m256i
  10425. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10426. _mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
  10427. {
  10428. return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
  10429. (__v4di)
  10430. _mm256_setzero_si256 (),
  10431. (__mmask8) __U);
  10432. }
  10433. extern __inline __m128i
  10434. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10435. _mm_rol_epi64 (__m128i __A, const int __B)
  10436. {
  10437. return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
  10438. (__v2di)
  10439. _mm_setzero_si128 (),
  10440. (__mmask8) -1);
  10441. }
  10442. extern __inline __m128i
  10443. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10444. _mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  10445. const int __B)
  10446. {
  10447. return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
  10448. (__v2di) __W,
  10449. (__mmask8) __U);
  10450. }
  10451. extern __inline __m128i
  10452. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10453. _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
  10454. {
  10455. return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
  10456. (__v2di)
  10457. _mm_setzero_si128 (),
  10458. (__mmask8) __U);
  10459. }
  10460. extern __inline __m256i
  10461. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10462. _mm256_ror_epi64 (__m256i __A, const int __B)
  10463. {
  10464. return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
  10465. (__v4di)
  10466. _mm256_setzero_si256 (),
  10467. (__mmask8) -1);
  10468. }
  10469. extern __inline __m256i
  10470. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10471. _mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  10472. const int __B)
  10473. {
  10474. return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
  10475. (__v4di) __W,
  10476. (__mmask8) __U);
  10477. }
  10478. extern __inline __m256i
  10479. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10480. _mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
  10481. {
  10482. return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
  10483. (__v4di)
  10484. _mm256_setzero_si256 (),
  10485. (__mmask8) __U);
  10486. }
  10487. extern __inline __m128i
  10488. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10489. _mm_ror_epi64 (__m128i __A, const int __B)
  10490. {
  10491. return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
  10492. (__v2di)
  10493. _mm_setzero_si128 (),
  10494. (__mmask8) -1);
  10495. }
  10496. extern __inline __m128i
  10497. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10498. _mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  10499. const int __B)
  10500. {
  10501. return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
  10502. (__v2di) __W,
  10503. (__mmask8) __U);
  10504. }
  10505. extern __inline __m128i
  10506. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10507. _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
  10508. {
  10509. return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
  10510. (__v2di)
  10511. _mm_setzero_si128 (),
  10512. (__mmask8) __U);
  10513. }
  10514. extern __inline __m128i
  10515. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10516. _mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
  10517. {
  10518. return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
  10519. (__v4si) __B, __imm,
  10520. (__v4si)
  10521. _mm_setzero_si128 (),
  10522. (__mmask8) -1);
  10523. }
  10524. extern __inline __m128i
  10525. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10526. _mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  10527. __m128i __B, const int __imm)
  10528. {
  10529. return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
  10530. (__v4si) __B, __imm,
  10531. (__v4si) __W,
  10532. (__mmask8) __U);
  10533. }
  10534. extern __inline __m128i
  10535. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10536. _mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
  10537. const int __imm)
  10538. {
  10539. return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
  10540. (__v4si) __B, __imm,
  10541. (__v4si)
  10542. _mm_setzero_si128 (),
  10543. (__mmask8) __U);
  10544. }
  10545. extern __inline __m128i
  10546. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10547. _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
  10548. {
  10549. return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
  10550. (__v2di) __B, __imm,
  10551. (__v2di)
  10552. _mm_setzero_si128 (),
  10553. (__mmask8) -1);
  10554. }
  10555. extern __inline __m128i
  10556. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10557. _mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  10558. __m128i __B, const int __imm)
  10559. {
  10560. return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
  10561. (__v2di) __B, __imm,
  10562. (__v2di) __W,
  10563. (__mmask8) __U);
  10564. }
  10565. extern __inline __m128i
  10566. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10567. _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
  10568. const int __imm)
  10569. {
  10570. return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
  10571. (__v2di) __B, __imm,
  10572. (__v2di)
  10573. _mm_setzero_si128 (),
  10574. (__mmask8) __U);
  10575. }
  10576. extern __inline __m256i
  10577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10578. _mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
  10579. {
  10580. return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
  10581. (__v8si) __B, __imm,
  10582. (__v8si)
  10583. _mm256_setzero_si256 (),
  10584. (__mmask8) -1);
  10585. }
  10586. extern __inline __m256i
  10587. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10588. _mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10589. __m256i __B, const int __imm)
  10590. {
  10591. return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
  10592. (__v8si) __B, __imm,
  10593. (__v8si) __W,
  10594. (__mmask8) __U);
  10595. }
  10596. extern __inline __m256i
  10597. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10598. _mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
  10599. const int __imm)
  10600. {
  10601. return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
  10602. (__v8si) __B, __imm,
  10603. (__v8si)
  10604. _mm256_setzero_si256 (),
  10605. (__mmask8) __U);
  10606. }
  10607. extern __inline __m256i
  10608. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10609. _mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
  10610. {
  10611. return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
  10612. (__v4di) __B, __imm,
  10613. (__v4di)
  10614. _mm256_setzero_si256 (),
  10615. (__mmask8) -1);
  10616. }
  10617. extern __inline __m256i
  10618. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10619. _mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  10620. __m256i __B, const int __imm)
  10621. {
  10622. return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
  10623. (__v4di) __B, __imm,
  10624. (__v4di) __W,
  10625. (__mmask8) __U);
  10626. }
  10627. extern __inline __m256i
  10628. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10629. _mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
  10630. const int __imm)
  10631. {
  10632. return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
  10633. (__v4di) __B, __imm,
  10634. (__v4di)
  10635. _mm256_setzero_si256 (),
  10636. (__mmask8) __U);
  10637. }
  10638. extern __inline __m128i
  10639. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10640. _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
  10641. const int __I)
  10642. {
  10643. return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
  10644. (__v8hi) __W,
  10645. (__mmask8) __U);
  10646. }
  10647. extern __inline __m128i
  10648. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10649. _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
  10650. {
  10651. return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
  10652. (__v8hi)
  10653. _mm_setzero_si128 (),
  10654. (__mmask8) __U);
  10655. }
  10656. extern __inline __m128i
  10657. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10658. _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
  10659. const int __I)
  10660. {
  10661. return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
  10662. (__v8hi) __W,
  10663. (__mmask8) __U);
  10664. }
  10665. extern __inline __m128i
  10666. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10667. _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
  10668. {
  10669. return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
  10670. (__v8hi)
  10671. _mm_setzero_si128 (),
  10672. (__mmask8) __U);
  10673. }
  10674. extern __inline __m256i
  10675. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10676. _mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10677. const int __imm)
  10678. {
  10679. return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
  10680. (__v8si) __W,
  10681. (__mmask8) __U);
  10682. }
  10683. extern __inline __m256i
  10684. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10685. _mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
  10686. {
  10687. return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
  10688. (__v8si)
  10689. _mm256_setzero_si256 (),
  10690. (__mmask8) __U);
  10691. }
  10692. extern __inline __m128i
  10693. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10694. _mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  10695. const int __imm)
  10696. {
  10697. return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
  10698. (__v4si) __W,
  10699. (__mmask8) __U);
  10700. }
  10701. extern __inline __m128i
  10702. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10703. _mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
  10704. {
  10705. return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
  10706. (__v4si)
  10707. _mm_setzero_si128 (),
  10708. (__mmask8) __U);
  10709. }
  10710. extern __inline __m256i
  10711. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10712. _mm256_srai_epi64 (__m256i __A, const int __imm)
  10713. {
  10714. return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
  10715. (__v4di)
  10716. _mm256_setzero_si256 (),
  10717. (__mmask8) -1);
  10718. }
  10719. extern __inline __m256i
  10720. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10721. _mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  10722. const int __imm)
  10723. {
  10724. return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
  10725. (__v4di) __W,
  10726. (__mmask8) __U);
  10727. }
  10728. extern __inline __m256i
  10729. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10730. _mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
  10731. {
  10732. return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
  10733. (__v4di)
  10734. _mm256_setzero_si256 (),
  10735. (__mmask8) __U);
  10736. }
  10737. extern __inline __m128i
  10738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10739. _mm_srai_epi64 (__m128i __A, const int __imm)
  10740. {
  10741. return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
  10742. (__v2di)
  10743. _mm_setzero_si128 (),
  10744. (__mmask8) -1);
  10745. }
  10746. extern __inline __m128i
  10747. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10748. _mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  10749. const int __imm)
  10750. {
  10751. return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
  10752. (__v2di) __W,
  10753. (__mmask8) __U);
  10754. }
  10755. extern __inline __m128i
  10756. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10757. _mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
  10758. {
  10759. return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
  10760. (__v2di)
  10761. _mm_setzero_si128 (),
  10762. (__mmask8) __U);
  10763. }
  10764. extern __inline __m128i
  10765. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10766. _mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
  10767. {
  10768. return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
  10769. (__v4si) __W,
  10770. (__mmask8) __U);
  10771. }
  10772. extern __inline __m128i
  10773. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10774. _mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
  10775. {
  10776. return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
  10777. (__v4si)
  10778. _mm_setzero_si128 (),
  10779. (__mmask8) __U);
  10780. }
  10781. extern __inline __m128i
  10782. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10783. _mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
  10784. {
  10785. return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
  10786. (__v2di) __W,
  10787. (__mmask8) __U);
  10788. }
  10789. extern __inline __m128i
  10790. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10791. _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
  10792. {
  10793. return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
  10794. (__v2di)
  10795. _mm_setzero_si128 (),
  10796. (__mmask8) __U);
  10797. }
  10798. extern __inline __m256i
  10799. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10800. _mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  10801. int __B)
  10802. {
  10803. return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
  10804. (__v8si) __W,
  10805. (__mmask8) __U);
  10806. }
  10807. extern __inline __m256i
  10808. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10809. _mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
  10810. {
  10811. return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
  10812. (__v8si)
  10813. _mm256_setzero_si256 (),
  10814. (__mmask8) __U);
  10815. }
  10816. extern __inline __m256i
  10817. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10818. _mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  10819. int __B)
  10820. {
  10821. return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
  10822. (__v4di) __W,
  10823. (__mmask8) __U);
  10824. }
  10825. extern __inline __m256i
  10826. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10827. _mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
  10828. {
  10829. return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
  10830. (__v4di)
  10831. _mm256_setzero_si256 (),
  10832. (__mmask8) __U);
  10833. }
  10834. extern __inline __m256d
  10835. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10836. _mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
  10837. const int __imm)
  10838. {
  10839. return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
  10840. (__v4df) __W,
  10841. (__mmask8) __U);
  10842. }
  10843. extern __inline __m256d
  10844. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10845. _mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
  10846. {
  10847. return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
  10848. (__v4df)
  10849. _mm256_setzero_pd (),
  10850. (__mmask8) __U);
  10851. }
  10852. extern __inline __m256d
  10853. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10854. _mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
  10855. const int __C)
  10856. {
  10857. return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
  10858. (__v4df) __W,
  10859. (__mmask8) __U);
  10860. }
  10861. extern __inline __m256d
  10862. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10863. _mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
  10864. {
  10865. return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
  10866. (__v4df)
  10867. _mm256_setzero_pd (),
  10868. (__mmask8) __U);
  10869. }
  10870. extern __inline __m128d
  10871. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10872. _mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
  10873. const int __C)
  10874. {
  10875. return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
  10876. (__v2df) __W,
  10877. (__mmask8) __U);
  10878. }
  10879. extern __inline __m128d
  10880. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10881. _mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
  10882. {
  10883. return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
  10884. (__v2df)
  10885. _mm_setzero_pd (),
  10886. (__mmask8) __U);
  10887. }
  10888. extern __inline __m256
  10889. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10890. _mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
  10891. const int __C)
  10892. {
  10893. return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
  10894. (__v8sf) __W,
  10895. (__mmask8) __U);
  10896. }
  10897. extern __inline __m256
  10898. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10899. _mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
  10900. {
  10901. return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
  10902. (__v8sf)
  10903. _mm256_setzero_ps (),
  10904. (__mmask8) __U);
  10905. }
  10906. extern __inline __m128
  10907. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10908. _mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
  10909. const int __C)
  10910. {
  10911. return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
  10912. (__v4sf) __W,
  10913. (__mmask8) __U);
  10914. }
  10915. extern __inline __m128
  10916. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10917. _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
  10918. {
  10919. return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
  10920. (__v4sf)
  10921. _mm_setzero_ps (),
  10922. (__mmask8) __U);
  10923. }
  10924. extern __inline __m256d
  10925. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10926. _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
  10927. {
  10928. return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
  10929. (__v4df) __W,
  10930. (__mmask8) __U);
  10931. }
  10932. extern __inline __m256
  10933. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10934. _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
  10935. {
  10936. return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
  10937. (__v8sf) __W,
  10938. (__mmask8) __U);
  10939. }
  10940. extern __inline __m256i
  10941. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10942. _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
  10943. {
  10944. return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
  10945. (__v4di) __W,
  10946. (__mmask8) __U);
  10947. }
  10948. extern __inline __m256i
  10949. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10950. _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
  10951. {
  10952. return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
  10953. (__v8si) __W,
  10954. (__mmask8) __U);
  10955. }
  10956. extern __inline __m128d
  10957. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10958. _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
  10959. {
  10960. return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
  10961. (__v2df) __W,
  10962. (__mmask8) __U);
  10963. }
  10964. extern __inline __m128
  10965. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10966. _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
  10967. {
  10968. return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
  10969. (__v4sf) __W,
  10970. (__mmask8) __U);
  10971. }
  10972. extern __inline __m128i
  10973. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10974. _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
  10975. {
  10976. return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
  10977. (__v2di) __W,
  10978. (__mmask8) __U);
  10979. }
  10980. extern __inline __m128i
  10981. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10982. _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
  10983. {
  10984. return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
  10985. (__v4si) __W,
  10986. (__mmask8) __U);
  10987. }
  10988. extern __inline __mmask8
  10989. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10990. _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
  10991. {
  10992. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  10993. (__v4di) __Y, __P,
  10994. (__mmask8) -1);
  10995. }
  10996. extern __inline __mmask8
  10997. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10998. _mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
  10999. {
  11000. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  11001. (__v8si) __Y, __P,
  11002. (__mmask8) -1);
  11003. }
  11004. extern __inline __mmask8
  11005. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11006. _mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
  11007. {
  11008. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  11009. (__v4di) __Y, __P,
  11010. (__mmask8) -1);
  11011. }
  11012. extern __inline __mmask8
  11013. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11014. _mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
  11015. {
  11016. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  11017. (__v8si) __Y, __P,
  11018. (__mmask8) -1);
  11019. }
  11020. extern __inline __mmask8
  11021. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11022. _mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
  11023. {
  11024. return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
  11025. (__v4df) __Y, __P,
  11026. (__mmask8) -1);
  11027. }
  11028. extern __inline __mmask8
  11029. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11030. _mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
  11031. {
  11032. return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
  11033. (__v8sf) __Y, __P,
  11034. (__mmask8) -1);
  11035. }
  11036. extern __inline __mmask8
  11037. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11038. _mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
  11039. const int __P)
  11040. {
  11041. return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
  11042. (__v4di) __Y, __P,
  11043. (__mmask8) __U);
  11044. }
  11045. extern __inline __mmask8
  11046. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11047. _mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
  11048. const int __P)
  11049. {
  11050. return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
  11051. (__v8si) __Y, __P,
  11052. (__mmask8) __U);
  11053. }
  11054. extern __inline __mmask8
  11055. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11056. _mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
  11057. const int __P)
  11058. {
  11059. return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
  11060. (__v4di) __Y, __P,
  11061. (__mmask8) __U);
  11062. }
  11063. extern __inline __mmask8
  11064. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11065. _mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
  11066. const int __P)
  11067. {
  11068. return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
  11069. (__v8si) __Y, __P,
  11070. (__mmask8) __U);
  11071. }
  11072. extern __inline __mmask8
  11073. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11074. _mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
  11075. const int __P)
  11076. {
  11077. return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
  11078. (__v4df) __Y, __P,
  11079. (__mmask8) __U);
  11080. }
  11081. extern __inline __mmask8
  11082. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11083. _mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
  11084. const int __P)
  11085. {
  11086. return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
  11087. (__v8sf) __Y, __P,
  11088. (__mmask8) __U);
  11089. }
  11090. extern __inline __mmask8
  11091. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11092. _mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
  11093. {
  11094. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  11095. (__v2di) __Y, __P,
  11096. (__mmask8) -1);
  11097. }
  11098. extern __inline __mmask8
  11099. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11100. _mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
  11101. {
  11102. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  11103. (__v4si) __Y, __P,
  11104. (__mmask8) -1);
  11105. }
  11106. extern __inline __mmask8
  11107. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11108. _mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
  11109. {
  11110. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  11111. (__v2di) __Y, __P,
  11112. (__mmask8) -1);
  11113. }
  11114. extern __inline __mmask8
  11115. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11116. _mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
  11117. {
  11118. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  11119. (__v4si) __Y, __P,
  11120. (__mmask8) -1);
  11121. }
  11122. extern __inline __mmask8
  11123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11124. _mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
  11125. {
  11126. return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
  11127. (__v2df) __Y, __P,
  11128. (__mmask8) -1);
  11129. }
  11130. extern __inline __mmask8
  11131. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11132. _mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
  11133. {
  11134. return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
  11135. (__v4sf) __Y, __P,
  11136. (__mmask8) -1);
  11137. }
  11138. extern __inline __mmask8
  11139. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11140. _mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
  11141. const int __P)
  11142. {
  11143. return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
  11144. (__v2di) __Y, __P,
  11145. (__mmask8) __U);
  11146. }
  11147. extern __inline __mmask8
  11148. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11149. _mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
  11150. const int __P)
  11151. {
  11152. return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
  11153. (__v4si) __Y, __P,
  11154. (__mmask8) __U);
  11155. }
  11156. extern __inline __mmask8
  11157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11158. _mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
  11159. const int __P)
  11160. {
  11161. return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
  11162. (__v2di) __Y, __P,
  11163. (__mmask8) __U);
  11164. }
  11165. extern __inline __mmask8
  11166. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11167. _mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
  11168. const int __P)
  11169. {
  11170. return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
  11171. (__v4si) __Y, __P,
  11172. (__mmask8) __U);
  11173. }
  11174. extern __inline __mmask8
  11175. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11176. _mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
  11177. const int __P)
  11178. {
  11179. return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
  11180. (__v2df) __Y, __P,
  11181. (__mmask8) __U);
  11182. }
  11183. extern __inline __mmask8
  11184. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11185. _mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
  11186. const int __P)
  11187. {
  11188. return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
  11189. (__v4sf) __Y, __P,
  11190. (__mmask8) __U);
  11191. }
  11192. extern __inline __m256d
  11193. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11194. _mm256_permutex_pd (__m256d __X, const int __M)
  11195. {
  11196. return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
  11197. (__v4df)
  11198. _mm256_undefined_pd (),
  11199. (__mmask8) -1);
  11200. }
  11201. #else
  11202. #define _mm256_permutex_pd(X, M) \
  11203. ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
  11204. (__v4df)(__m256d) \
  11205. _mm256_undefined_pd (), \
  11206. (__mmask8)-1))
  11207. #define _mm256_permutex_epi64(X, I) \
  11208. ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
  11209. (int)(I), \
  11210. (__v4di)(__m256i) \
  11211. (_mm256_setzero_si256 ()),\
  11212. (__mmask8) -1))
  11213. #define _mm256_maskz_permutex_epi64(M, X, I) \
  11214. ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
  11215. (int)(I), \
  11216. (__v4di)(__m256i) \
  11217. (_mm256_setzero_si256 ()),\
  11218. (__mmask8)(M)))
  11219. #define _mm256_mask_permutex_epi64(W, M, X, I) \
  11220. ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
  11221. (int)(I), \
  11222. (__v4di)(__m256i)(W), \
  11223. (__mmask8)(M)))
  11224. #define _mm256_insertf32x4(X, Y, C) \
  11225. ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
  11226. (__v4sf)(__m128) (Y), (int) (C), \
  11227. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11228. (__mmask8)-1))
  11229. #define _mm256_mask_insertf32x4(W, U, X, Y, C) \
  11230. ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
  11231. (__v4sf)(__m128) (Y), (int) (C), \
  11232. (__v8sf)(__m256)(W), \
  11233. (__mmask8)(U)))
  11234. #define _mm256_maskz_insertf32x4(U, X, Y, C) \
  11235. ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
  11236. (__v4sf)(__m128) (Y), (int) (C), \
  11237. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11238. (__mmask8)(U)))
  11239. #define _mm256_inserti32x4(X, Y, C) \
  11240. ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
  11241. (__v4si)(__m128i) (Y), (int) (C), \
  11242. (__v8si)(__m256i)_mm256_setzero_si256 (), \
  11243. (__mmask8)-1))
  11244. #define _mm256_mask_inserti32x4(W, U, X, Y, C) \
  11245. ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
  11246. (__v4si)(__m128i) (Y), (int) (C), \
  11247. (__v8si)(__m256i)(W), \
  11248. (__mmask8)(U)))
  11249. #define _mm256_maskz_inserti32x4(U, X, Y, C) \
  11250. ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
  11251. (__v4si)(__m128i) (Y), (int) (C), \
  11252. (__v8si)(__m256i)_mm256_setzero_si256 (), \
  11253. (__mmask8)(U)))
  11254. #define _mm256_extractf32x4_ps(X, C) \
  11255. ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
  11256. (int) (C), \
  11257. (__v4sf)(__m128)_mm_setzero_ps (), \
  11258. (__mmask8)-1))
  11259. #define _mm256_mask_extractf32x4_ps(W, U, X, C) \
  11260. ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
  11261. (int) (C), \
  11262. (__v4sf)(__m128)(W), \
  11263. (__mmask8)(U)))
  11264. #define _mm256_maskz_extractf32x4_ps(U, X, C) \
  11265. ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
  11266. (int) (C), \
  11267. (__v4sf)(__m128)_mm_setzero_ps (), \
  11268. (__mmask8)(U)))
  11269. #define _mm256_extracti32x4_epi32(X, C) \
  11270. ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
  11271. (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
  11272. #define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
  11273. ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
  11274. (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
  11275. #define _mm256_maskz_extracti32x4_epi32(U, X, C) \
  11276. ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
  11277. (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
  11278. #define _mm256_shuffle_i64x2(X, Y, C) \
  11279. ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
  11280. (__v4di)(__m256i)(Y), (int)(C), \
  11281. (__v4di)(__m256i)_mm256_setzero_si256 (), \
  11282. (__mmask8)-1))
  11283. #define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
  11284. ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
  11285. (__v4di)(__m256i)(Y), (int)(C), \
  11286. (__v4di)(__m256i)(W),\
  11287. (__mmask8)(U)))
  11288. #define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
  11289. ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
  11290. (__v4di)(__m256i)(Y), (int)(C), \
  11291. (__v4di)(__m256i)_mm256_setzero_si256 (), \
  11292. (__mmask8)(U)))
  11293. #define _mm256_shuffle_i32x4(X, Y, C) \
  11294. ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
  11295. (__v8si)(__m256i)(Y), (int)(C), \
  11296. (__v8si)(__m256i) \
  11297. _mm256_setzero_si256 (), \
  11298. (__mmask8)-1))
  11299. #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
  11300. ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
  11301. (__v8si)(__m256i)(Y), (int)(C), \
  11302. (__v8si)(__m256i)(W), \
  11303. (__mmask8)(U)))
  11304. #define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
  11305. ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
  11306. (__v8si)(__m256i)(Y), (int)(C), \
  11307. (__v8si)(__m256i) \
  11308. _mm256_setzero_si256 (), \
  11309. (__mmask8)(U)))
  11310. #define _mm256_shuffle_f64x2(X, Y, C) \
  11311. ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
  11312. (__v4df)(__m256d)(Y), (int)(C), \
  11313. (__v4df)(__m256d)_mm256_setzero_pd (),\
  11314. (__mmask8)-1))
  11315. #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
  11316. ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
  11317. (__v4df)(__m256d)(Y), (int)(C), \
  11318. (__v4df)(__m256d)(W), \
  11319. (__mmask8)(U)))
  11320. #define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
  11321. ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
  11322. (__v4df)(__m256d)(Y), (int)(C), \
  11323. (__v4df)(__m256d)_mm256_setzero_pd( ),\
  11324. (__mmask8)(U)))
  11325. #define _mm256_shuffle_f32x4(X, Y, C) \
  11326. ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
  11327. (__v8sf)(__m256)(Y), (int)(C), \
  11328. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11329. (__mmask8)-1))
  11330. #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
  11331. ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
  11332. (__v8sf)(__m256)(Y), (int)(C), \
  11333. (__v8sf)(__m256)(W), \
  11334. (__mmask8)(U)))
  11335. #define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
  11336. ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
  11337. (__v8sf)(__m256)(Y), (int)(C), \
  11338. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11339. (__mmask8)(U)))
  11340. #define _mm256_mask_shuffle_pd(W, U, A, B, C) \
  11341. ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
  11342. (__v4df)(__m256d)(B), (int)(C), \
  11343. (__v4df)(__m256d)(W), \
  11344. (__mmask8)(U)))
  11345. #define _mm256_maskz_shuffle_pd(U, A, B, C) \
  11346. ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
  11347. (__v4df)(__m256d)(B), (int)(C), \
  11348. (__v4df)(__m256d) \
  11349. _mm256_setzero_pd (), \
  11350. (__mmask8)(U)))
  11351. #define _mm_mask_shuffle_pd(W, U, A, B, C) \
  11352. ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
  11353. (__v2df)(__m128d)(B), (int)(C), \
  11354. (__v2df)(__m128d)(W), \
  11355. (__mmask8)(U)))
  11356. #define _mm_maskz_shuffle_pd(U, A, B, C) \
  11357. ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
  11358. (__v2df)(__m128d)(B), (int)(C), \
  11359. (__v2df)(__m128d)_mm_setzero_pd (), \
  11360. (__mmask8)(U)))
  11361. #define _mm256_mask_shuffle_ps(W, U, A, B, C) \
  11362. ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
  11363. (__v8sf)(__m256)(B), (int)(C), \
  11364. (__v8sf)(__m256)(W), \
  11365. (__mmask8)(U)))
  11366. #define _mm256_maskz_shuffle_ps(U, A, B, C) \
  11367. ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
  11368. (__v8sf)(__m256)(B), (int)(C), \
  11369. (__v8sf)(__m256)_mm256_setzero_ps (),\
  11370. (__mmask8)(U)))
  11371. #define _mm_mask_shuffle_ps(W, U, A, B, C) \
  11372. ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
  11373. (__v4sf)(__m128)(B), (int)(C), \
  11374. (__v4sf)(__m128)(W), \
  11375. (__mmask8)(U)))
  11376. #define _mm_maskz_shuffle_ps(U, A, B, C) \
  11377. ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
  11378. (__v4sf)(__m128)(B), (int)(C), \
  11379. (__v4sf)(__m128)_mm_setzero_ps (), \
  11380. (__mmask8)(U)))
  11381. #define _mm256_fixupimm_pd(X, Y, Z, C) \
  11382. ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
  11383. (__v4df)(__m256d)(Y), \
  11384. (__v4di)(__m256i)(Z), (int)(C), \
  11385. (__mmask8)(-1)))
  11386. #define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
  11387. ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
  11388. (__v4df)(__m256d)(Y), \
  11389. (__v4di)(__m256i)(Z), (int)(C), \
  11390. (__mmask8)(U)))
  11391. #define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
  11392. ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
  11393. (__v4df)(__m256d)(Y), \
  11394. (__v4di)(__m256i)(Z), (int)(C),\
  11395. (__mmask8)(U)))
  11396. #define _mm256_fixupimm_ps(X, Y, Z, C) \
  11397. ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
  11398. (__v8sf)(__m256)(Y), \
  11399. (__v8si)(__m256i)(Z), (int)(C), \
  11400. (__mmask8)(-1)))
  11401. #define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
  11402. ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
  11403. (__v8sf)(__m256)(Y), \
  11404. (__v8si)(__m256i)(Z), (int)(C), \
  11405. (__mmask8)(U)))
  11406. #define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
  11407. ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
  11408. (__v8sf)(__m256)(Y), \
  11409. (__v8si)(__m256i)(Z), (int)(C),\
  11410. (__mmask8)(U)))
  11411. #define _mm_fixupimm_pd(X, Y, Z, C) \
  11412. ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
  11413. (__v2df)(__m128d)(Y), \
  11414. (__v2di)(__m128i)(Z), (int)(C), \
  11415. (__mmask8)(-1)))
  11416. #define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
  11417. ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
  11418. (__v2df)(__m128d)(Y), \
  11419. (__v2di)(__m128i)(Z), (int)(C), \
  11420. (__mmask8)(U)))
  11421. #define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
  11422. ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
  11423. (__v2df)(__m128d)(Y), \
  11424. (__v2di)(__m128i)(Z), (int)(C),\
  11425. (__mmask8)(U)))
  11426. #define _mm_fixupimm_ps(X, Y, Z, C) \
  11427. ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
  11428. (__v4sf)(__m128)(Y), \
  11429. (__v4si)(__m128i)(Z), (int)(C), \
  11430. (__mmask8)(-1)))
  11431. #define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
  11432. ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
  11433. (__v4sf)(__m128)(Y), \
  11434. (__v4si)(__m128i)(Z), (int)(C),\
  11435. (__mmask8)(U)))
  11436. #define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
  11437. ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
  11438. (__v4sf)(__m128)(Y), \
  11439. (__v4si)(__m128i)(Z), (int)(C),\
  11440. (__mmask8)(U)))
  11441. #define _mm256_mask_srli_epi32(W, U, A, B) \
  11442. ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
  11443. (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
  11444. #define _mm256_maskz_srli_epi32(U, A, B) \
  11445. ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
  11446. (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
  11447. #define _mm_mask_srli_epi32(W, U, A, B) \
  11448. ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
  11449. (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
  11450. #define _mm_maskz_srli_epi32(U, A, B) \
  11451. ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
  11452. (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
  11453. #define _mm256_mask_srli_epi64(W, U, A, B) \
  11454. ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
  11455. (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
  11456. #define _mm256_maskz_srli_epi64(U, A, B) \
  11457. ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
  11458. (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
  11459. #define _mm_mask_srli_epi64(W, U, A, B) \
  11460. ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
  11461. (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
  11462. #define _mm_maskz_srli_epi64(U, A, B) \
  11463. ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
  11464. (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
  11465. #define _mm256_mask_slli_epi32(W, U, X, C) \
  11466. ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
  11467. (__v8si)(__m256i)(W), \
  11468. (__mmask8)(U)))
  11469. #define _mm256_maskz_slli_epi32(U, X, C) \
  11470. ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
  11471. (__v8si)(__m256i)_mm256_setzero_si256 (), \
  11472. (__mmask8)(U)))
  11473. #define _mm256_mask_slli_epi64(W, U, X, C) \
  11474. ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
  11475. (__v4di)(__m256i)(W), \
  11476. (__mmask8)(U)))
  11477. #define _mm256_maskz_slli_epi64(U, X, C) \
  11478. ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
  11479. (__v4di)(__m256i)_mm256_setzero_si256 (), \
  11480. (__mmask8)(U)))
  11481. #define _mm_mask_slli_epi32(W, U, X, C) \
  11482. ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
  11483. (__v4si)(__m128i)(W),\
  11484. (__mmask8)(U)))
  11485. #define _mm_maskz_slli_epi32(U, X, C) \
  11486. ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
  11487. (__v4si)(__m128i)_mm_setzero_si128 (),\
  11488. (__mmask8)(U)))
  11489. #define _mm_mask_slli_epi64(W, U, X, C) \
  11490. ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
  11491. (__v2di)(__m128i)(W),\
  11492. (__mmask8)(U)))
  11493. #define _mm_maskz_slli_epi64(U, X, C) \
  11494. ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
  11495. (__v2di)(__m128i)_mm_setzero_si128 (),\
  11496. (__mmask8)(U)))
  11497. #define _mm256_ternarylogic_epi64(A, B, C, I) \
  11498. ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
  11499. (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
  11500. #define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
  11501. ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
  11502. (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
  11503. #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
  11504. ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
  11505. (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
  11506. #define _mm256_ternarylogic_epi32(A, B, C, I) \
  11507. ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
  11508. (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
  11509. #define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
  11510. ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
  11511. (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
  11512. #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
  11513. ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
  11514. (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
  11515. #define _mm_ternarylogic_epi64(A, B, C, I) \
  11516. ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
  11517. (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
  11518. #define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
  11519. ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
  11520. (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
  11521. #define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
  11522. ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
  11523. (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
  11524. #define _mm_ternarylogic_epi32(A, B, C, I) \
  11525. ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
  11526. (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
  11527. #define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
  11528. ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
  11529. (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
  11530. #define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
  11531. ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
  11532. (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
  11533. #define _mm256_roundscale_ps(A, B) \
  11534. ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
  11535. (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
  11536. #define _mm256_mask_roundscale_ps(W, U, A, B) \
  11537. ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
  11538. (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
  11539. #define _mm256_maskz_roundscale_ps(U, A, B) \
  11540. ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
  11541. (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
  11542. #define _mm256_roundscale_pd(A, B) \
  11543. ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
  11544. (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
  11545. #define _mm256_mask_roundscale_pd(W, U, A, B) \
  11546. ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
  11547. (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
  11548. #define _mm256_maskz_roundscale_pd(U, A, B) \
  11549. ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
  11550. (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
  11551. #define _mm_roundscale_ps(A, B) \
  11552. ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
  11553. (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
  11554. #define _mm_mask_roundscale_ps(W, U, A, B) \
  11555. ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
  11556. (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
  11557. #define _mm_maskz_roundscale_ps(U, A, B) \
  11558. ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
  11559. (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
  11560. #define _mm_roundscale_pd(A, B) \
  11561. ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
  11562. (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
  11563. #define _mm_mask_roundscale_pd(W, U, A, B) \
  11564. ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
  11565. (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
  11566. #define _mm_maskz_roundscale_pd(U, A, B) \
  11567. ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
  11568. (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
  11569. #define _mm256_getmant_ps(X, B, C) \
  11570. ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
  11571. (int)(((C)<<2) | (B)), \
  11572. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11573. (__mmask8)-1))
  11574. #define _mm256_mask_getmant_ps(W, U, X, B, C) \
  11575. ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
  11576. (int)(((C)<<2) | (B)), \
  11577. (__v8sf)(__m256)(W), \
  11578. (__mmask8)(U)))
  11579. #define _mm256_maskz_getmant_ps(U, X, B, C) \
  11580. ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
  11581. (int)(((C)<<2) | (B)), \
  11582. (__v8sf)(__m256)_mm256_setzero_ps (), \
  11583. (__mmask8)(U)))
  11584. #define _mm_getmant_ps(X, B, C) \
  11585. ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
  11586. (int)(((C)<<2) | (B)), \
  11587. (__v4sf)(__m128)_mm_setzero_ps (), \
  11588. (__mmask8)-1))
  11589. #define _mm_mask_getmant_ps(W, U, X, B, C) \
  11590. ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
  11591. (int)(((C)<<2) | (B)), \
  11592. (__v4sf)(__m128)(W), \
  11593. (__mmask8)(U)))
  11594. #define _mm_maskz_getmant_ps(U, X, B, C) \
  11595. ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
  11596. (int)(((C)<<2) | (B)), \
  11597. (__v4sf)(__m128)_mm_setzero_ps (), \
  11598. (__mmask8)(U)))
  11599. #define _mm256_getmant_pd(X, B, C) \
  11600. ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
  11601. (int)(((C)<<2) | (B)), \
  11602. (__v4df)(__m256d)_mm256_setzero_pd (),\
  11603. (__mmask8)-1))
  11604. #define _mm256_mask_getmant_pd(W, U, X, B, C) \
  11605. ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
  11606. (int)(((C)<<2) | (B)), \
  11607. (__v4df)(__m256d)(W), \
  11608. (__mmask8)(U)))
  11609. #define _mm256_maskz_getmant_pd(U, X, B, C) \
  11610. ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
  11611. (int)(((C)<<2) | (B)), \
  11612. (__v4df)(__m256d)_mm256_setzero_pd (),\
  11613. (__mmask8)(U)))
  11614. #define _mm_getmant_pd(X, B, C) \
  11615. ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
  11616. (int)(((C)<<2) | (B)), \
  11617. (__v2df)(__m128d)_mm_setzero_pd (), \
  11618. (__mmask8)-1))
  11619. #define _mm_mask_getmant_pd(W, U, X, B, C) \
  11620. ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
  11621. (int)(((C)<<2) | (B)), \
  11622. (__v2df)(__m128d)(W), \
  11623. (__mmask8)(U)))
  11624. #define _mm_maskz_getmant_pd(U, X, B, C) \
  11625. ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
  11626. (int)(((C)<<2) | (B)), \
  11627. (__v2df)(__m128d)_mm_setzero_pd (), \
  11628. (__mmask8)(U)))
  11629. #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11630. (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
  11631. (void const *)ADDR, \
  11632. (__v8si)(__m256i)INDEX, \
  11633. (__mmask8)MASK, (int)SCALE)
  11634. #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11635. (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
  11636. (void const *)ADDR, \
  11637. (__v4si)(__m128i)INDEX, \
  11638. (__mmask8)MASK, (int)SCALE)
  11639. #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11640. (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
  11641. (void const *)ADDR, \
  11642. (__v4si)(__m128i)INDEX, \
  11643. (__mmask8)MASK, (int)SCALE)
  11644. #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11645. (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
  11646. (void const *)ADDR, \
  11647. (__v4si)(__m128i)INDEX, \
  11648. (__mmask8)MASK, (int)SCALE)
  11649. #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11650. (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
  11651. (void const *)ADDR, \
  11652. (__v4di)(__m256i)INDEX, \
  11653. (__mmask8)MASK, (int)SCALE)
  11654. #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11655. (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
  11656. (void const *)ADDR, \
  11657. (__v2di)(__m128i)INDEX, \
  11658. (__mmask8)MASK, (int)SCALE)
  11659. #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11660. (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
  11661. (void const *)ADDR, \
  11662. (__v4di)(__m256i)INDEX, \
  11663. (__mmask8)MASK, (int)SCALE)
  11664. #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11665. (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
  11666. (void const *)ADDR, \
  11667. (__v2di)(__m128i)INDEX, \
  11668. (__mmask8)MASK, (int)SCALE)
  11669. #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11670. (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
  11671. (void const *)ADDR, \
  11672. (__v8si)(__m256i)INDEX, \
  11673. (__mmask8)MASK, (int)SCALE)
  11674. #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11675. (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
  11676. (void const *)ADDR, \
  11677. (__v4si)(__m128i)INDEX, \
  11678. (__mmask8)MASK, (int)SCALE)
  11679. #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11680. (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
  11681. (void const *)ADDR, \
  11682. (__v4si)(__m128i)INDEX, \
  11683. (__mmask8)MASK, (int)SCALE)
  11684. #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11685. (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
  11686. (void const *)ADDR, \
  11687. (__v4si)(__m128i)INDEX, \
  11688. (__mmask8)MASK, (int)SCALE)
  11689. #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11690. (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
  11691. (void const *)ADDR, \
  11692. (__v4di)(__m256i)INDEX, \
  11693. (__mmask8)MASK, (int)SCALE)
  11694. #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11695. (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
  11696. (void const *)ADDR, \
  11697. (__v2di)(__m128i)INDEX, \
  11698. (__mmask8)MASK, (int)SCALE)
  11699. #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11700. (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
  11701. (void const *)ADDR, \
  11702. (__v4di)(__m256i)INDEX, \
  11703. (__mmask8)MASK, (int)SCALE)
  11704. #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  11705. (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
  11706. (void const *)ADDR, \
  11707. (__v2di)(__m128i)INDEX, \
  11708. (__mmask8)MASK, (int)SCALE)
  11709. #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
  11710. __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)0xFF, \
  11711. (__v8si)(__m256i)INDEX, \
  11712. (__v8sf)(__m256)V1, (int)SCALE)
  11713. #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  11714. __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)MASK, \
  11715. (__v8si)(__m256i)INDEX, \
  11716. (__v8sf)(__m256)V1, (int)SCALE)
  11717. #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
  11718. __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)0xFF, \
  11719. (__v4si)(__m128i)INDEX, \
  11720. (__v4sf)(__m128)V1, (int)SCALE)
  11721. #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  11722. __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)MASK, \
  11723. (__v4si)(__m128i)INDEX, \
  11724. (__v4sf)(__m128)V1, (int)SCALE)
  11725. #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
  11726. __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)0xFF, \
  11727. (__v4si)(__m128i)INDEX, \
  11728. (__v4df)(__m256d)V1, (int)SCALE)
  11729. #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  11730. __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)MASK, \
  11731. (__v4si)(__m128i)INDEX, \
  11732. (__v4df)(__m256d)V1, (int)SCALE)
  11733. #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
  11734. __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)0xFF, \
  11735. (__v4si)(__m128i)INDEX, \
  11736. (__v2df)(__m128d)V1, (int)SCALE)
  11737. #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  11738. __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)MASK, \
  11739. (__v4si)(__m128i)INDEX, \
  11740. (__v2df)(__m128d)V1, (int)SCALE)
  11741. #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
  11742. __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)0xFF, \
  11743. (__v4di)(__m256i)INDEX, \
  11744. (__v4sf)(__m128)V1, (int)SCALE)
  11745. #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  11746. __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)MASK, \
  11747. (__v4di)(__m256i)INDEX, \
  11748. (__v4sf)(__m128)V1, (int)SCALE)
  11749. #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
  11750. __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)0xFF, \
  11751. (__v2di)(__m128i)INDEX, \
  11752. (__v4sf)(__m128)V1, (int)SCALE)
  11753. #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  11754. __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)MASK, \
  11755. (__v2di)(__m128i)INDEX, \
  11756. (__v4sf)(__m128)V1, (int)SCALE)
  11757. #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
  11758. __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)0xFF, \
  11759. (__v4di)(__m256i)INDEX, \
  11760. (__v4df)(__m256d)V1, (int)SCALE)
  11761. #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  11762. __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)MASK, \
  11763. (__v4di)(__m256i)INDEX, \
  11764. (__v4df)(__m256d)V1, (int)SCALE)
  11765. #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
  11766. __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)0xFF, \
  11767. (__v2di)(__m128i)INDEX, \
  11768. (__v2df)(__m128d)V1, (int)SCALE)
  11769. #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  11770. __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)MASK, \
  11771. (__v2di)(__m128i)INDEX, \
  11772. (__v2df)(__m128d)V1, (int)SCALE)
  11773. #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
  11774. __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)0xFF, \
  11775. (__v8si)(__m256i)INDEX, \
  11776. (__v8si)(__m256i)V1, (int)SCALE)
  11777. #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  11778. __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)MASK, \
  11779. (__v8si)(__m256i)INDEX, \
  11780. (__v8si)(__m256i)V1, (int)SCALE)
  11781. #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
  11782. __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)0xFF, \
  11783. (__v4si)(__m128i)INDEX, \
  11784. (__v4si)(__m128i)V1, (int)SCALE)
  11785. #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  11786. __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)MASK, \
  11787. (__v4si)(__m128i)INDEX, \
  11788. (__v4si)(__m128i)V1, (int)SCALE)
  11789. #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
  11790. __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)0xFF, \
  11791. (__v4si)(__m128i)INDEX, \
  11792. (__v4di)(__m256i)V1, (int)SCALE)
  11793. #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  11794. __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)MASK, \
  11795. (__v4si)(__m128i)INDEX, \
  11796. (__v4di)(__m256i)V1, (int)SCALE)
  11797. #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
  11798. __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)0xFF, \
  11799. (__v4si)(__m128i)INDEX, \
  11800. (__v2di)(__m128i)V1, (int)SCALE)
  11801. #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  11802. __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)MASK, \
  11803. (__v4si)(__m128i)INDEX, \
  11804. (__v2di)(__m128i)V1, (int)SCALE)
  11805. #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
  11806. __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)0xFF, \
  11807. (__v4di)(__m256i)INDEX, \
  11808. (__v4si)(__m128i)V1, (int)SCALE)
  11809. #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  11810. __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)MASK, \
  11811. (__v4di)(__m256i)INDEX, \
  11812. (__v4si)(__m128i)V1, (int)SCALE)
  11813. #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
  11814. __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)0xFF, \
  11815. (__v2di)(__m128i)INDEX, \
  11816. (__v4si)(__m128i)V1, (int)SCALE)
  11817. #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  11818. __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)MASK, \
  11819. (__v2di)(__m128i)INDEX, \
  11820. (__v4si)(__m128i)V1, (int)SCALE)
  11821. #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
  11822. __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)0xFF, \
  11823. (__v4di)(__m256i)INDEX, \
  11824. (__v4di)(__m256i)V1, (int)SCALE)
  11825. #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  11826. __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)MASK, \
  11827. (__v4di)(__m256i)INDEX, \
  11828. (__v4di)(__m256i)V1, (int)SCALE)
  11829. #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
  11830. __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)0xFF, \
  11831. (__v2di)(__m128i)INDEX, \
  11832. (__v2di)(__m128i)V1, (int)SCALE)
  11833. #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  11834. __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)MASK, \
  11835. (__v2di)(__m128i)INDEX, \
  11836. (__v2di)(__m128i)V1, (int)SCALE)
  11837. #define _mm256_mask_shuffle_epi32(W, U, X, C) \
  11838. ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
  11839. (__v8si)(__m256i)(W), \
  11840. (__mmask8)(U)))
  11841. #define _mm256_maskz_shuffle_epi32(U, X, C) \
  11842. ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
  11843. (__v8si)(__m256i) \
  11844. _mm256_setzero_si256 (), \
  11845. (__mmask8)(U)))
  11846. #define _mm_mask_shuffle_epi32(W, U, X, C) \
  11847. ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
  11848. (__v4si)(__m128i)(W), \
  11849. (__mmask8)(U)))
  11850. #define _mm_maskz_shuffle_epi32(U, X, C) \
  11851. ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
  11852. (__v4si)(__m128i)_mm_setzero_si128 (), \
  11853. (__mmask8)(U)))
  11854. #define _mm256_rol_epi64(A, B) \
  11855. ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  11856. (__v4di)(__m256i)_mm256_setzero_si256 (),\
  11857. (__mmask8)-1))
  11858. #define _mm256_mask_rol_epi64(W, U, A, B) \
  11859. ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  11860. (__v4di)(__m256i)(W), \
  11861. (__mmask8)(U)))
  11862. #define _mm256_maskz_rol_epi64(U, A, B) \
  11863. ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  11864. (__v4di)(__m256i)_mm256_setzero_si256 (),\
  11865. (__mmask8)(U)))
  11866. #define _mm_rol_epi64(A, B) \
  11867. ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  11868. (__v2di)(__m128i)_mm_setzero_si128 (),\
  11869. (__mmask8)-1))
  11870. #define _mm_mask_rol_epi64(W, U, A, B) \
  11871. ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  11872. (__v2di)(__m128i)(W), \
  11873. (__mmask8)(U)))
  11874. #define _mm_maskz_rol_epi64(U, A, B) \
  11875. ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  11876. (__v2di)(__m128i)_mm_setzero_si128 (),\
  11877. (__mmask8)(U)))
  11878. #define _mm256_ror_epi64(A, B) \
  11879. ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  11880. (__v4di)(__m256i)_mm256_setzero_si256 (),\
  11881. (__mmask8)-1))
  11882. #define _mm256_mask_ror_epi64(W, U, A, B) \
  11883. ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  11884. (__v4di)(__m256i)(W), \
  11885. (__mmask8)(U)))
  11886. #define _mm256_maskz_ror_epi64(U, A, B) \
  11887. ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
  11888. (__v4di)(__m256i)_mm256_setzero_si256 (),\
  11889. (__mmask8)(U)))
  11890. #define _mm_ror_epi64(A, B) \
  11891. ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  11892. (__v2di)(__m128i)_mm_setzero_si128 (),\
  11893. (__mmask8)-1))
  11894. #define _mm_mask_ror_epi64(W, U, A, B) \
  11895. ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  11896. (__v2di)(__m128i)(W), \
  11897. (__mmask8)(U)))
  11898. #define _mm_maskz_ror_epi64(U, A, B) \
  11899. ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
  11900. (__v2di)(__m128i)_mm_setzero_si128 (),\
  11901. (__mmask8)(U)))
  11902. #define _mm256_rol_epi32(A, B) \
  11903. ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
  11904. (__v8si)(__m256i)_mm256_setzero_si256 (),\
  11905. (__mmask8)-1))
  11906. #define _mm256_mask_rol_epi32(W, U, A, B) \
  11907. ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
  11908. (__v8si)(__m256i)(W), \
  11909. (__mmask8)(U)))
  11910. #define _mm256_maskz_rol_epi32(U, A, B) \
  11911. ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
  11912. (__v8si)(__m256i)_mm256_setzero_si256 (),\
  11913. (__mmask8)(U)))
  11914. #define _mm_rol_epi32(A, B) \
  11915. ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
  11916. (__v4si)(__m128i)_mm_setzero_si128 (),\
  11917. (__mmask8)-1))
  11918. #define _mm_mask_rol_epi32(W, U, A, B) \
  11919. ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
  11920. (__v4si)(__m128i)(W), \
  11921. (__mmask8)(U)))
  11922. #define _mm_maskz_rol_epi32(U, A, B) \
  11923. ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
  11924. (__v4si)(__m128i)_mm_setzero_si128 (),\
  11925. (__mmask8)(U)))
  11926. #define _mm256_ror_epi32(A, B) \
  11927. ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
  11928. (__v8si)(__m256i)_mm256_setzero_si256 (),\
  11929. (__mmask8)-1))
  11930. #define _mm256_mask_ror_epi32(W, U, A, B) \
  11931. ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
  11932. (__v8si)(__m256i)(W), \
  11933. (__mmask8)(U)))
  11934. #define _mm256_maskz_ror_epi32(U, A, B) \
  11935. ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
  11936. (__v8si)(__m256i) \
  11937. _mm256_setzero_si256 (), \
  11938. (__mmask8)(U)))
  11939. #define _mm_ror_epi32(A, B) \
  11940. ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
  11941. (__v4si)(__m128i)_mm_setzero_si128 (),\
  11942. (__mmask8)-1))
  11943. #define _mm_mask_ror_epi32(W, U, A, B) \
  11944. ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
  11945. (__v4si)(__m128i)(W), \
  11946. (__mmask8)(U)))
  11947. #define _mm_maskz_ror_epi32(U, A, B) \
  11948. ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
  11949. (__v4si)(__m128i)_mm_setzero_si128 (),\
  11950. (__mmask8)(U)))
  11951. #define _mm256_alignr_epi32(X, Y, C) \
  11952. ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
  11953. (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
  11954. #define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
  11955. ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
  11956. (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
  11957. #define _mm256_maskz_alignr_epi32(U, X, Y, C) \
  11958. ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
  11959. (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
  11960. (__mmask8)(U)))
  11961. #define _mm256_alignr_epi64(X, Y, C) \
  11962. ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
  11963. (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
  11964. #define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
  11965. ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
  11966. (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
  11967. #define _mm256_maskz_alignr_epi64(U, X, Y, C) \
  11968. ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
  11969. (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
  11970. (__mmask8)(U)))
  11971. #define _mm_alignr_epi32(X, Y, C) \
  11972. ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
  11973. (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
  11974. #define _mm_mask_alignr_epi32(W, U, X, Y, C) \
  11975. ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
  11976. (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
  11977. #define _mm_maskz_alignr_epi32(U, X, Y, C) \
  11978. ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
  11979. (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
  11980. (__mmask8)(U)))
  11981. #define _mm_alignr_epi64(X, Y, C) \
  11982. ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
  11983. (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
  11984. #define _mm_mask_alignr_epi64(W, U, X, Y, C) \
  11985. ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
  11986. (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
  11987. #define _mm_maskz_alignr_epi64(U, X, Y, C) \
  11988. ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
  11989. (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
  11990. (__mmask8)(U)))
  11991. #define _mm_mask_cvtps_ph(W, U, A, I) \
  11992. ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
  11993. (__v8hi)(__m128i) (W), (__mmask8) (U)))
  11994. #define _mm_maskz_cvtps_ph(U, A, I) \
  11995. ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
  11996. (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
  11997. #define _mm256_mask_cvtps_ph(W, U, A, I) \
  11998. ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
  11999. (__v8hi)(__m128i) (W), (__mmask8) (U)))
  12000. #define _mm256_maskz_cvtps_ph(U, A, I) \
  12001. ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
  12002. (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
  12003. #define _mm256_mask_srai_epi32(W, U, A, B) \
  12004. ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
  12005. (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
  12006. #define _mm256_maskz_srai_epi32(U, A, B) \
  12007. ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
  12008. (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
  12009. #define _mm_mask_srai_epi32(W, U, A, B) \
  12010. ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
  12011. (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
  12012. #define _mm_maskz_srai_epi32(U, A, B) \
  12013. ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
  12014. (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
  12015. #define _mm256_srai_epi64(A, B) \
  12016. ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
  12017. (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
  12018. #define _mm256_mask_srai_epi64(W, U, A, B) \
  12019. ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
  12020. (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
  12021. #define _mm256_maskz_srai_epi64(U, A, B) \
  12022. ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
  12023. (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
  12024. #define _mm_srai_epi64(A, B) \
  12025. ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
  12026. (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
  12027. #define _mm_mask_srai_epi64(W, U, A, B) \
  12028. ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
  12029. (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
  12030. #define _mm_maskz_srai_epi64(U, A, B) \
  12031. ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
  12032. (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
  12033. #define _mm256_mask_permutex_pd(W, U, A, B) \
  12034. ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
  12035. (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
  12036. #define _mm256_maskz_permutex_pd(U, A, B) \
  12037. ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
  12038. (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
  12039. #define _mm256_mask_permute_pd(W, U, X, C) \
  12040. ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
  12041. (__v4df)(__m256d)(W), \
  12042. (__mmask8)(U)))
  12043. #define _mm256_maskz_permute_pd(U, X, C) \
  12044. ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
  12045. (__v4df)(__m256d)_mm256_setzero_pd (),\
  12046. (__mmask8)(U)))
  12047. #define _mm256_mask_permute_ps(W, U, X, C) \
  12048. ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
  12049. (__v8sf)(__m256)(W), (__mmask8)(U)))
  12050. #define _mm256_maskz_permute_ps(U, X, C) \
  12051. ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
  12052. (__v8sf)(__m256)_mm256_setzero_ps (), \
  12053. (__mmask8)(U)))
  12054. #define _mm_mask_permute_pd(W, U, X, C) \
  12055. ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
  12056. (__v2df)(__m128d)(W), (__mmask8)(U)))
  12057. #define _mm_maskz_permute_pd(U, X, C) \
  12058. ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
  12059. (__v2df)(__m128d)_mm_setzero_pd (), \
  12060. (__mmask8)(U)))
  12061. #define _mm_mask_permute_ps(W, U, X, C) \
  12062. ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
  12063. (__v4sf)(__m128)(W), (__mmask8)(U)))
  12064. #define _mm_maskz_permute_ps(U, X, C) \
  12065. ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
  12066. (__v4sf)(__m128)_mm_setzero_ps (), \
  12067. (__mmask8)(U)))
  12068. #define _mm256_mask_blend_pd(__U, __A, __W) \
  12069. ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
  12070. (__v4df) (__W), \
  12071. (__mmask8) (__U)))
  12072. #define _mm256_mask_blend_ps(__U, __A, __W) \
  12073. ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
  12074. (__v8sf) (__W), \
  12075. (__mmask8) (__U)))
  12076. #define _mm256_mask_blend_epi64(__U, __A, __W) \
  12077. ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
  12078. (__v4di) (__W), \
  12079. (__mmask8) (__U)))
  12080. #define _mm256_mask_blend_epi32(__U, __A, __W) \
  12081. ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
  12082. (__v8si) (__W), \
  12083. (__mmask8) (__U)))
  12084. #define _mm_mask_blend_pd(__U, __A, __W) \
  12085. ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
  12086. (__v2df) (__W), \
  12087. (__mmask8) (__U)))
  12088. #define _mm_mask_blend_ps(__U, __A, __W) \
  12089. ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
  12090. (__v4sf) (__W), \
  12091. (__mmask8) (__U)))
  12092. #define _mm_mask_blend_epi64(__U, __A, __W) \
  12093. ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
  12094. (__v2di) (__W), \
  12095. (__mmask8) (__U)))
  12096. #define _mm_mask_blend_epi32(__U, __A, __W) \
  12097. ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
  12098. (__v4si) (__W), \
  12099. (__mmask8) (__U)))
  12100. #define _mm256_cmp_epu32_mask(X, Y, P) \
  12101. ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
  12102. (__v8si)(__m256i)(Y), (int)(P),\
  12103. (__mmask8)-1))
  12104. #define _mm256_cmp_epi64_mask(X, Y, P) \
  12105. ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
  12106. (__v4di)(__m256i)(Y), (int)(P),\
  12107. (__mmask8)-1))
  12108. #define _mm256_cmp_epi32_mask(X, Y, P) \
  12109. ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
  12110. (__v8si)(__m256i)(Y), (int)(P),\
  12111. (__mmask8)-1))
  12112. #define _mm256_cmp_epu64_mask(X, Y, P) \
  12113. ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
  12114. (__v4di)(__m256i)(Y), (int)(P),\
  12115. (__mmask8)-1))
  12116. #define _mm256_cmp_pd_mask(X, Y, P) \
  12117. ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
  12118. (__v4df)(__m256d)(Y), (int)(P),\
  12119. (__mmask8)-1))
  12120. #define _mm256_cmp_ps_mask(X, Y, P) \
  12121. ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
  12122. (__v8sf)(__m256)(Y), (int)(P),\
  12123. (__mmask8)-1))
  12124. #define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
  12125. ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
  12126. (__v4di)(__m256i)(Y), (int)(P),\
  12127. (__mmask8)(M)))
  12128. #define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
  12129. ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
  12130. (__v8si)(__m256i)(Y), (int)(P),\
  12131. (__mmask8)(M)))
  12132. #define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
  12133. ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
  12134. (__v4di)(__m256i)(Y), (int)(P),\
  12135. (__mmask8)(M)))
  12136. #define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
  12137. ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
  12138. (__v8si)(__m256i)(Y), (int)(P),\
  12139. (__mmask8)(M)))
  12140. #define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
  12141. ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
  12142. (__v4df)(__m256d)(Y), (int)(P),\
  12143. (__mmask8)(M)))
  12144. #define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
  12145. ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
  12146. (__v8sf)(__m256)(Y), (int)(P),\
  12147. (__mmask8)(M)))
  12148. #define _mm_cmp_epi64_mask(X, Y, P) \
  12149. ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
  12150. (__v2di)(__m128i)(Y), (int)(P),\
  12151. (__mmask8)-1))
  12152. #define _mm_cmp_epi32_mask(X, Y, P) \
  12153. ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
  12154. (__v4si)(__m128i)(Y), (int)(P),\
  12155. (__mmask8)-1))
  12156. #define _mm_cmp_epu64_mask(X, Y, P) \
  12157. ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
  12158. (__v2di)(__m128i)(Y), (int)(P),\
  12159. (__mmask8)-1))
  12160. #define _mm_cmp_epu32_mask(X, Y, P) \
  12161. ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
  12162. (__v4si)(__m128i)(Y), (int)(P),\
  12163. (__mmask8)-1))
  12164. #define _mm_cmp_pd_mask(X, Y, P) \
  12165. ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
  12166. (__v2df)(__m128d)(Y), (int)(P),\
  12167. (__mmask8)-1))
  12168. #define _mm_cmp_ps_mask(X, Y, P) \
  12169. ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
  12170. (__v4sf)(__m128)(Y), (int)(P),\
  12171. (__mmask8)-1))
  12172. #define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
  12173. ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
  12174. (__v2di)(__m128i)(Y), (int)(P),\
  12175. (__mmask8)(M)))
  12176. #define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
  12177. ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
  12178. (__v4si)(__m128i)(Y), (int)(P),\
  12179. (__mmask8)(M)))
  12180. #define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
  12181. ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
  12182. (__v2di)(__m128i)(Y), (int)(P),\
  12183. (__mmask8)(M)))
  12184. #define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
  12185. ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
  12186. (__v4si)(__m128i)(Y), (int)(P),\
  12187. (__mmask8)(M)))
  12188. #define _mm_mask_cmp_pd_mask(M, X, Y, P) \
  12189. ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
  12190. (__v2df)(__m128d)(Y), (int)(P),\
  12191. (__mmask8)(M)))
  12192. #define _mm_mask_cmp_ps_mask(M, X, Y, P) \
  12193. ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
  12194. (__v4sf)(__m128)(Y), (int)(P),\
  12195. (__mmask8)(M)))
  12196. #endif
  12197. #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A))
  12198. #ifdef __DISABLE_AVX512VL__
  12199. #undef __DISABLE_AVX512VL__
  12200. #pragma GCC pop_options
  12201. #endif /* __DISABLE_AVX512VL__ */
  12202. #endif /* _AVX512VLINTRIN_H_INCLUDED */