avx512vbmi2vlintrin.h 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037
  1. /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
  2. This file is part of GCC.
  3. GCC is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. GCC is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. Under Section 7 of GPL version 3, you are granted additional
  12. permissions described in the GCC Runtime Library Exception, version
  13. 3.1, as published by the Free Software Foundation.
  14. You should have received a copy of the GNU General Public License and
  15. a copy of the GCC Runtime Library Exception along with this program;
  16. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  17. <http://www.gnu.org/licenses/>. */
  18. #ifndef _IMMINTRIN_H_INCLUDED
  19. #error "Never use <avx512vbmi2vlintrin.h> directly; include <immintrin.h> instead."
  20. #endif
  21. #ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED
  22. #define _AVX512VBMI2VLINTRIN_H_INCLUDED
  23. #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__)
  24. #pragma GCC push_options
  25. #pragma GCC target("avx512vbmi2,avx512vl")
  26. #define __DISABLE_AVX512VBMI2VL__
  27. #endif /* __AVX512VBMIVL__ */
  28. extern __inline __m128i
  29. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  30. _mm_mask_compress_epi8 (__m128i __A, __mmask16 __B, __m128i __C)
  31. {
  32. return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi)__C,
  33. (__v16qi)__A, (__mmask16)__B);
  34. }
  35. extern __inline __m128i
  36. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  37. _mm_maskz_compress_epi8 (__mmask16 __A, __m128i __B)
  38. {
  39. return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __B,
  40. (__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
  41. }
  42. extern __inline void
  43. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  44. _mm256_mask_compressstoreu_epi16 (void * __A, __mmask16 __B, __m256i __C)
  45. {
  46. __builtin_ia32_compressstoreuhi256_mask ((__v16hi *) __A, (__v16hi) __C,
  47. (__mmask16) __B);
  48. }
  49. extern __inline __m128i
  50. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  51. _mm_mask_compress_epi16 (__m128i __A, __mmask8 __B, __m128i __C)
  52. {
  53. return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi)__C, (__v8hi)__A,
  54. (__mmask8)__B);
  55. }
  56. extern __inline __m128i
  57. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  58. _mm_maskz_compress_epi16 (__mmask8 __A, __m128i __B)
  59. {
  60. return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __B,
  61. (__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
  62. }
  63. extern __inline __m256i
  64. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  65. _mm256_mask_compress_epi16 (__m256i __A, __mmask16 __B, __m256i __C)
  66. {
  67. return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi)__C,
  68. (__v16hi)__A, (__mmask16)__B);
  69. }
  70. extern __inline __m256i
  71. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  72. _mm256_maskz_compress_epi16 (__mmask16 __A, __m256i __B)
  73. {
  74. return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __B,
  75. (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
  76. }
  77. extern __inline void
  78. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  79. _mm_mask_compressstoreu_epi8 (void * __A, __mmask16 __B, __m128i __C)
  80. {
  81. __builtin_ia32_compressstoreuqi128_mask ((__v16qi *) __A, (__v16qi) __C,
  82. (__mmask16) __B);
  83. }
  84. extern __inline void
  85. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  86. _mm_mask_compressstoreu_epi16 (void * __A, __mmask8 __B, __m128i __C)
  87. {
  88. __builtin_ia32_compressstoreuhi128_mask ((__v8hi *) __A, (__v8hi) __C,
  89. (__mmask8) __B);
  90. }
  91. extern __inline __m128i
  92. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  93. _mm_mask_expand_epi8 (__m128i __A, __mmask16 __B, __m128i __C)
  94. {
  95. return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __C,
  96. (__v16qi) __A,
  97. (__mmask16) __B);
  98. }
  99. extern __inline __m128i
  100. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  101. _mm_maskz_expand_epi8 (__mmask16 __A, __m128i __B)
  102. {
  103. return (__m128i) __builtin_ia32_expandqi128_maskz ((__v16qi) __B,
  104. (__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
  105. }
  106. extern __inline __m128i
  107. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  108. _mm_mask_expandloadu_epi8 (__m128i __A, __mmask16 __B, const void * __C)
  109. {
  110. return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *) __C,
  111. (__v16qi) __A, (__mmask16) __B);
  112. }
  113. extern __inline __m128i
  114. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  115. _mm_maskz_expandloadu_epi8 (__mmask16 __A, const void * __B)
  116. {
  117. return (__m128i) __builtin_ia32_expandloadqi128_maskz ((const __v16qi *) __B,
  118. (__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
  119. }
  120. extern __inline __m128i
  121. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  122. _mm_mask_expand_epi16 (__m128i __A, __mmask8 __B, __m128i __C)
  123. {
  124. return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __C,
  125. (__v8hi) __A,
  126. (__mmask8) __B);
  127. }
  128. extern __inline __m128i
  129. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  130. _mm_maskz_expand_epi16 (__mmask8 __A, __m128i __B)
  131. {
  132. return (__m128i) __builtin_ia32_expandhi128_maskz ((__v8hi) __B,
  133. (__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
  134. }
  135. extern __inline __m128i
  136. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  137. _mm_mask_expandloadu_epi16 (__m128i __A, __mmask8 __B, const void * __C)
  138. {
  139. return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *) __C,
  140. (__v8hi) __A, (__mmask8) __B);
  141. }
  142. extern __inline __m128i
  143. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  144. _mm_maskz_expandloadu_epi16 (__mmask8 __A, const void * __B)
  145. {
  146. return (__m128i) __builtin_ia32_expandloadhi128_maskz ((const __v8hi *) __B,
  147. (__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
  148. }
  149. extern __inline __m256i
  150. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  151. _mm256_mask_expand_epi16 (__m256i __A, __mmask16 __B, __m256i __C)
  152. {
  153. return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __C,
  154. (__v16hi) __A,
  155. (__mmask16) __B);
  156. }
  157. extern __inline __m256i
  158. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  159. _mm256_maskz_expand_epi16 (__mmask16 __A, __m256i __B)
  160. {
  161. return (__m256i) __builtin_ia32_expandhi256_maskz ((__v16hi) __B,
  162. (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
  163. }
  164. extern __inline __m256i
  165. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  166. _mm256_mask_expandloadu_epi16 (__m256i __A, __mmask16 __B, const void * __C)
  167. {
  168. return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *) __C,
  169. (__v16hi) __A, (__mmask16) __B);
  170. }
  171. extern __inline __m256i
  172. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  173. _mm256_maskz_expandloadu_epi16 (__mmask16 __A, const void * __B)
  174. {
  175. return (__m256i) __builtin_ia32_expandloadhi256_maskz ((const __v16hi *) __B,
  176. (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
  177. }
  178. #ifdef __OPTIMIZE__
  179. extern __inline __m256i
  180. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  181. _mm256_shrdi_epi16 (__m256i __A, __m256i __B, int __C)
  182. {
  183. return (__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)__A, (__v16hi) __B,
  184. __C);
  185. }
  186. extern __inline __m256i
  187. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  188. _mm256_mask_shrdi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D,
  189. int __E)
  190. {
  191. return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__C,
  192. (__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B);
  193. }
  194. extern __inline __m256i
  195. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  196. _mm256_maskz_shrdi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
  197. {
  198. return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__B,
  199. (__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
  200. }
  201. extern __inline __m256i
  202. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  203. _mm256_mask_shrdi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
  204. int __E)
  205. {
  206. return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__C, (__v8si) __D,
  207. __E, (__v8si) __A, (__mmask8)__B);
  208. }
  209. extern __inline __m256i
  210. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  211. _mm256_maskz_shrdi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
  212. {
  213. return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__B, (__v8si) __C,
  214. __D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
  215. }
  216. extern __inline __m256i
  217. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  218. _mm256_shrdi_epi32 (__m256i __A, __m256i __B, int __C)
  219. {
  220. return (__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)__A, (__v8si) __B, __C);
  221. }
  222. extern __inline __m256i
  223. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  224. _mm256_mask_shrdi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
  225. int __E)
  226. {
  227. return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__C, (__v4di) __D,
  228. __E, (__v4di) __A, (__mmask8)__B);
  229. }
  230. extern __inline __m256i
  231. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  232. _mm256_maskz_shrdi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
  233. {
  234. return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__B, (__v4di) __C,
  235. __D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
  236. }
  237. extern __inline __m256i
  238. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  239. _mm256_shrdi_epi64 (__m256i __A, __m256i __B, int __C)
  240. {
  241. return (__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)__A, (__v4di) __B, __C);
  242. }
  243. extern __inline __m128i
  244. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  245. _mm_mask_shrdi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
  246. int __E)
  247. {
  248. return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__C, (__v8hi) __D,
  249. __E, (__v8hi) __A, (__mmask8)__B);
  250. }
  251. extern __inline __m128i
  252. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  253. _mm_maskz_shrdi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
  254. {
  255. return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
  256. __D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
  257. }
  258. extern __inline __m128i
  259. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  260. _mm_shrdi_epi16 (__m128i __A, __m128i __B, int __C)
  261. {
  262. return (__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)__A, (__v8hi) __B, __C);
  263. }
  264. extern __inline __m128i
  265. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  266. _mm_mask_shrdi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
  267. int __E)
  268. {
  269. return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__C, (__v4si) __D,
  270. __E, (__v4si) __A, (__mmask8)__B);
  271. }
  272. extern __inline __m128i
  273. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  274. _mm_maskz_shrdi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
  275. {
  276. return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__B, (__v4si) __C,
  277. __D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
  278. }
  279. extern __inline __m128i
  280. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  281. _mm_shrdi_epi32 (__m128i __A, __m128i __B, int __C)
  282. {
  283. return (__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)__A, (__v4si) __B, __C);
  284. }
  285. extern __inline __m128i
  286. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  287. _mm_mask_shrdi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
  288. int __E)
  289. {
  290. return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__C, (__v2di) __D,
  291. __E, (__v2di) __A, (__mmask8)__B);
  292. }
  293. extern __inline __m128i
  294. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  295. _mm_maskz_shrdi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
  296. {
  297. return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__B, (__v2di) __C,
  298. __D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
  299. }
  300. extern __inline __m128i
  301. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  302. _mm_shrdi_epi64 (__m128i __A, __m128i __B, int __C)
  303. {
  304. return (__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)__A, (__v2di) __B, __C);
  305. }
  306. extern __inline __m256i
  307. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  308. _mm256_shldi_epi16 (__m256i __A, __m256i __B, int __C)
  309. {
  310. return (__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)__A, (__v16hi) __B,
  311. __C);
  312. }
  313. extern __inline __m256i
  314. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  315. _mm256_mask_shldi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D,
  316. int __E)
  317. {
  318. return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__C,
  319. (__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B);
  320. }
  321. extern __inline __m256i
  322. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  323. _mm256_maskz_shldi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
  324. {
  325. return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__B,
  326. (__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
  327. }
  328. extern __inline __m256i
  329. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  330. _mm256_mask_shldi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
  331. int __E)
  332. {
  333. return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__C, (__v8si) __D,
  334. __E, (__v8si) __A, (__mmask8)__B);
  335. }
  336. extern __inline __m256i
  337. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  338. _mm256_maskz_shldi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
  339. {
  340. return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__B, (__v8si) __C,
  341. __D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
  342. }
  343. extern __inline __m256i
  344. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  345. _mm256_shldi_epi32 (__m256i __A, __m256i __B, int __C)
  346. {
  347. return (__m256i) __builtin_ia32_vpshld_v8si ((__v8si)__A, (__v8si) __B, __C);
  348. }
  349. extern __inline __m256i
  350. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  351. _mm256_mask_shldi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
  352. int __E)
  353. {
  354. return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__C, (__v4di) __D,
  355. __E, (__v4di) __A, (__mmask8)__B);
  356. }
  357. extern __inline __m256i
  358. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  359. _mm256_maskz_shldi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
  360. {
  361. return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__B, (__v4di) __C,
  362. __D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
  363. }
  364. extern __inline __m256i
  365. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  366. _mm256_shldi_epi64 (__m256i __A, __m256i __B, int __C)
  367. {
  368. return (__m256i) __builtin_ia32_vpshld_v4di ((__v4di)__A, (__v4di) __B, __C);
  369. }
  370. extern __inline __m128i
  371. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  372. _mm_mask_shldi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
  373. int __E)
  374. {
  375. return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__C, (__v8hi) __D,
  376. __E, (__v8hi) __A, (__mmask8)__B);
  377. }
  378. extern __inline __m128i
  379. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  380. _mm_maskz_shldi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
  381. {
  382. return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
  383. __D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
  384. }
  385. extern __inline __m128i
  386. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  387. _mm_shldi_epi16 (__m128i __A, __m128i __B, int __C)
  388. {
  389. return (__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)__A, (__v8hi) __B, __C);
  390. }
  391. extern __inline __m128i
  392. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  393. _mm_mask_shldi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
  394. int __E)
  395. {
  396. return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__C, (__v4si) __D,
  397. __E, (__v4si) __A, (__mmask8)__B);
  398. }
  399. extern __inline __m128i
  400. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  401. _mm_maskz_shldi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
  402. {
  403. return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__B, (__v4si) __C,
  404. __D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
  405. }
  406. extern __inline __m128i
  407. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  408. _mm_shldi_epi32 (__m128i __A, __m128i __B, int __C)
  409. {
  410. return (__m128i) __builtin_ia32_vpshld_v4si ((__v4si)__A, (__v4si) __B, __C);
  411. }
  412. extern __inline __m128i
  413. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  414. _mm_mask_shldi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
  415. int __E)
  416. {
  417. return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__C, (__v2di) __D,
  418. __E, (__v2di) __A, (__mmask8)__B);
  419. }
  420. extern __inline __m128i
  421. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  422. _mm_maskz_shldi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
  423. {
  424. return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__B, (__v2di) __C,
  425. __D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
  426. }
  427. extern __inline __m128i
  428. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  429. _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
  430. {
  431. return (__m128i) __builtin_ia32_vpshld_v2di ((__v2di)__A, (__v2di) __B, __C);
  432. }
  433. #else
  434. #define _mm256_shrdi_epi16(A, B, C) \
  435. ((__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)(__m256i)(A), \
  436. (__v16hi)(__m256i)(B),(int)(C)))
  437. #define _mm256_mask_shrdi_epi16(A, B, C, D, E) \
  438. ((__m256i) __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(C), \
  439. (__v16hi)(__m256i)(D), \
  440. (int)(E), \
  441. (__v16hi)(__m256i)(A), \
  442. (__mmask16)(B)))
  443. #define _mm256_maskz_shrdi_epi16(A, B, C, D) \
  444. ((__m256i) \
  445. __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(B), \
  446. (__v16hi)(__m256i)(C),(int)(D), \
  447. (__v16hi)(__m256i)_mm256_setzero_si256 (), \
  448. (__mmask16)(A)))
  449. #define _mm256_shrdi_epi32(A, B, C) \
  450. ((__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)(__m256i)(A), \
  451. (__v8si)(__m256i)(B),(int)(C)))
  452. #define _mm256_mask_shrdi_epi32(A, B, C, D, E) \
  453. ((__m256i) __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(C), \
  454. (__v8si)(__m256i)(D), \
  455. (int)(E), \
  456. (__v8si)(__m256i)(A), \
  457. (__mmask8)(B)))
  458. #define _mm256_maskz_shrdi_epi32(A, B, C, D) \
  459. ((__m256i) \
  460. __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(B), \
  461. (__v8si)(__m256i)(C),(int)(D), \
  462. (__v8si)(__m256i)_mm256_setzero_si256 (), \
  463. (__mmask8)(A)))
  464. #define _mm256_shrdi_epi64(A, B, C) \
  465. ((__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)(__m256i)(A), \
  466. (__v4di)(__m256i)(B),(int)(C)))
  467. #define _mm256_mask_shrdi_epi64(A, B, C, D, E) \
  468. ((__m256i) __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(C), \
  469. (__v4di)(__m256i)(D), (int)(E), \
  470. (__v4di)(__m256i)(A), \
  471. (__mmask8)(B)))
  472. #define _mm256_maskz_shrdi_epi64(A, B, C, D) \
  473. ((__m256i) \
  474. __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(B), \
  475. (__v4di)(__m256i)(C),(int)(D), \
  476. (__v4di)(__m256i)_mm256_setzero_si256 (), \
  477. (__mmask8)(A)))
  478. #define _mm_shrdi_epi16(A, B, C) \
  479. ((__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)(__m128i)(A), \
  480. (__v8hi)(__m128i)(B),(int)(C)))
  481. #define _mm_mask_shrdi_epi16(A, B, C, D, E) \
  482. ((__m128i) __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(C), \
  483. (__v8hi)(__m128i)(D), (int)(E), \
  484. (__v8hi)(__m128i)(A), \
  485. (__mmask8)(B)))
  486. #define _mm_maskz_shrdi_epi16(A, B, C, D) \
  487. ((__m128i) \
  488. __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(B), \
  489. (__v8hi)(__m128i)(C),(int)(D), \
  490. (__v8hi)(__m128i)_mm_setzero_si128 (), \
  491. (__mmask8)(A)))
  492. #define _mm_shrdi_epi32(A, B, C) \
  493. ((__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)(__m128i)(A), \
  494. (__v4si)(__m128i)(B),(int)(C)))
  495. #define _mm_mask_shrdi_epi32(A, B, C, D, E) \
  496. ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \
  497. (__v4si)(__m128i)(D), (int)(E), \
  498. (__v4si)(__m128i)(A), \
  499. (__mmask8)(B)))
  500. #define _mm_maskz_shrdi_epi32(A, B, C, D) \
  501. ((__m128i) \
  502. __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \
  503. (__v4si)(__m128i)(C),(int)(D), \
  504. (__v4si)(__m128i)_mm_setzero_si128 (), \
  505. (__mmask8)(A)))
  506. #define _mm_shrdi_epi64(A, B, C) \
  507. ((__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)(__m128i)(A), \
  508. (__v2di)(__m128i)(B),(int)(C)))
  509. #define _mm_mask_shrdi_epi64(A, B, C, D, E) \
  510. ((__m128i) __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(C), \
  511. (__v2di)(__m128i)(D), (int)(E), \
  512. (__v2di)(__m128i)(A), \
  513. (__mmask8)(B)))
  514. #define _mm_maskz_shrdi_epi64(A, B, C, D) \
  515. ((__m128i) \
  516. __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(B), \
  517. (__v2di)(__m128i)(C),(int)(D), \
  518. (__v2di)(__m128i)_mm_setzero_si128 (), \
  519. (__mmask8)(A)))
  520. #define _mm256_shldi_epi16(A, B, C) \
  521. ((__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)(__m256i)(A), \
  522. (__v16hi)(__m256i)(B),(int)(C)))
  523. #define _mm256_mask_shldi_epi16(A, B, C, D, E) \
  524. ((__m256i) __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(C), \
  525. (__v16hi)(__m256i)(D), \
  526. (int)(E), \
  527. (__v16hi)(__m256i)(A), \
  528. (__mmask16)(B)))
  529. #define _mm256_maskz_shldi_epi16(A, B, C, D) \
  530. ((__m256i) \
  531. __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(B), \
  532. (__v16hi)(__m256i)(C),(int)(D), \
  533. (__v16hi)(__m256i)_mm256_setzero_si256 (), \
  534. (__mmask16)(A)))
  535. #define _mm256_shldi_epi32(A, B, C) \
  536. ((__m256i) __builtin_ia32_vpshld_v8si ((__v8si)(__m256i)(A), \
  537. (__v8si)(__m256i)(B),(int)(C)))
  538. #define _mm256_mask_shldi_epi32(A, B, C, D, E) \
  539. ((__m256i) __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(C), \
  540. (__v8si)(__m256i)(D), (int)(E), \
  541. (__v8si)(__m256i)(A), \
  542. (__mmask8)(B)))
  543. #define _mm256_maskz_shldi_epi32(A, B, C, D) \
  544. ((__m256i) \
  545. __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(B), \
  546. (__v8si)(__m256i)(C),(int)(D), \
  547. (__v8si)(__m256i)_mm256_setzero_si256 (), \
  548. (__mmask8)(A)))
  549. #define _mm256_shldi_epi64(A, B, C) \
  550. ((__m256i) __builtin_ia32_vpshld_v4di ((__v4di)(__m256i)(A), \
  551. (__v4di)(__m256i)(B),(int)(C)))
  552. #define _mm256_mask_shldi_epi64(A, B, C, D, E) \
  553. ((__m256i) __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(C), \
  554. (__v4di)(__m256i)(D), (int)(E), \
  555. (__v4di)(__m256i)(A), \
  556. (__mmask8)(B)))
  557. #define _mm256_maskz_shldi_epi64(A, B, C, D) \
  558. ((__m256i) \
  559. __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(B), \
  560. (__v4di)(__m256i)(C),(int)(D), \
  561. (__v4di)(__m256i)_mm256_setzero_si256 (), \
  562. (__mmask8)(A)))
  563. #define _mm_shldi_epi16(A, B, C) \
  564. ((__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)(__m128i)(A), \
  565. (__v8hi)(__m128i)(B),(int)(C)))
  566. #define _mm_mask_shldi_epi16(A, B, C, D, E) \
  567. ((__m128i) __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(C), \
  568. (__v8hi)(__m128i)(D), (int)(E), \
  569. (__v8hi)(__m128i)(A), \
  570. (__mmask8)(B)))
  571. #define _mm_maskz_shldi_epi16(A, B, C, D) \
  572. ((__m128i) \
  573. __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(B), \
  574. (__v8hi)(__m128i)(C),(int)(D), \
  575. (__v8hi)(__m128i)_mm_setzero_si128 (), \
  576. (__mmask8)(A)))
  577. #define _mm_shldi_epi32(A, B, C) \
  578. ((__m128i) __builtin_ia32_vpshld_v4si ((__v4si)(__m128i)(A), \
  579. (__v4si)(__m128i)(B),(int)(C)))
  580. #define _mm_mask_shldi_epi32(A, B, C, D, E) \
  581. ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
  582. (__v4si)(__m128i)(D), (int)(E), \
  583. (__v4si)(__m128i)(A), \
  584. (__mmask8)(B)))
  585. #define _mm_maskz_shldi_epi32(A, B, C, D) \
  586. ((__m128i) \
  587. __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \
  588. (__v4si)(__m128i)(C),(int)(D), \
  589. (__v4si)(__m128i)_mm_setzero_si128 (), \
  590. (__mmask8)(A)))
  591. #define _mm_shldi_epi64(A, B, C) \
  592. ((__m128i) __builtin_ia32_vpshld_v2di ((__v2di)(__m128i)(A), \
  593. (__v2di)(__m128i)(B),(int)(C)))
  594. #define _mm_mask_shldi_epi64(A, B, C, D, E) \
  595. ((__m128i) __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(C), \
  596. (__v2di)(__m128i)(D), (int)(E), \
  597. (__v2di)(__m128i)(A), \
  598. (__mmask8)(B)))
  599. #define _mm_maskz_shldi_epi64(A, B, C, D) \
  600. ((__m128i) \
  601. __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(B), \
  602. (__v2di)(__m128i)(C),(int)(D), \
  603. (__v2di)(__m128i)_mm_setzero_si128 (), \
  604. (__mmask8)(A)))
  605. #endif
  606. extern __inline __m256i
  607. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  608. _mm256_shrdv_epi16 (__m256i __A, __m256i __B, __m256i __C)
  609. {
  610. return (__m256i) __builtin_ia32_vpshrdv_v16hi ((__v16hi)__A, (__v16hi) __B,
  611. (__v16hi) __C);
  612. }
  613. extern __inline __m256i
  614. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  615. _mm256_mask_shrdv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D)
  616. {
  617. return (__m256i)__builtin_ia32_vpshrdv_v16hi_mask ((__v16hi)__A,
  618. (__v16hi) __C, (__v16hi) __D, (__mmask16)__B);
  619. }
  620. extern __inline __m256i
  621. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  622. _mm256_maskz_shrdv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D)
  623. {
  624. return (__m256i)__builtin_ia32_vpshrdv_v16hi_maskz ((__v16hi)__B,
  625. (__v16hi) __C, (__v16hi) __D, (__mmask16)__A);
  626. }
  627. extern __inline __m256i
  628. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  629. _mm256_shrdv_epi32 (__m256i __A, __m256i __B, __m256i __C)
  630. {
  631. return (__m256i) __builtin_ia32_vpshrdv_v8si ((__v8si)__A, (__v8si) __B,
  632. (__v8si) __C);
  633. }
  634. extern __inline __m256i
  635. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  636. _mm256_mask_shrdv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
  637. {
  638. return (__m256i)__builtin_ia32_vpshrdv_v8si_mask ((__v8si)__A, (__v8si) __C,
  639. (__v8si) __D, (__mmask8)__B);
  640. }
  641. extern __inline __m256i
  642. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  643. _mm256_maskz_shrdv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
  644. {
  645. return (__m256i)__builtin_ia32_vpshrdv_v8si_maskz ((__v8si)__B, (__v8si) __C,
  646. (__v8si) __D, (__mmask8)__A);
  647. }
  648. extern __inline __m256i
  649. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  650. _mm256_shrdv_epi64 (__m256i __A, __m256i __B, __m256i __C)
  651. {
  652. return (__m256i) __builtin_ia32_vpshrdv_v4di ((__v4di)__A, (__v4di) __B,
  653. (__v4di) __C);
  654. }
  655. extern __inline __m256i
  656. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  657. _mm256_mask_shrdv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
  658. {
  659. return (__m256i)__builtin_ia32_vpshrdv_v4di_mask ((__v4di)__A, (__v4di) __C,
  660. (__v4di) __D, (__mmask8)__B);
  661. }
  662. extern __inline __m256i
  663. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  664. _mm256_maskz_shrdv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
  665. {
  666. return (__m256i)__builtin_ia32_vpshrdv_v4di_maskz ((__v4di)__B, (__v4di) __C,
  667. (__v4di) __D, (__mmask8)__A);
  668. }
  669. extern __inline __m128i
  670. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  671. _mm_shrdv_epi16 (__m128i __A, __m128i __B, __m128i __C)
  672. {
  673. return (__m128i) __builtin_ia32_vpshrdv_v8hi ((__v8hi)__A, (__v8hi) __B,
  674. (__v8hi) __C);
  675. }
  676. extern __inline __m128i
  677. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  678. _mm_mask_shrdv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
  679. {
  680. return (__m128i)__builtin_ia32_vpshrdv_v8hi_mask ((__v8hi)__A, (__v8hi) __C,
  681. (__v8hi) __D, (__mmask8)__B);
  682. }
  683. extern __inline __m128i
  684. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  685. _mm_maskz_shrdv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
  686. {
  687. return (__m128i)__builtin_ia32_vpshrdv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C,
  688. (__v8hi) __D, (__mmask8)__A);
  689. }
  690. extern __inline __m128i
  691. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  692. _mm_shrdv_epi32 (__m128i __A, __m128i __B, __m128i __C)
  693. {
  694. return (__m128i) __builtin_ia32_vpshrdv_v4si ((__v4si)__A, (__v4si) __B,
  695. (__v4si) __C);
  696. }
  697. extern __inline __m128i
  698. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  699. _mm_mask_shrdv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
  700. {
  701. return (__m128i)__builtin_ia32_vpshrdv_v4si_mask ((__v4si)__A, (__v4si) __C,
  702. (__v4si) __D, (__mmask8)__B);
  703. }
  704. extern __inline __m128i
  705. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  706. _mm_maskz_shrdv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
  707. {
  708. return (__m128i)__builtin_ia32_vpshrdv_v4si_maskz ((__v4si)__B, (__v4si) __C,
  709. (__v4si) __D, (__mmask8)__A);
  710. }
  711. extern __inline __m128i
  712. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  713. _mm_shrdv_epi64 (__m128i __A, __m128i __B, __m128i __C)
  714. {
  715. return (__m128i) __builtin_ia32_vpshrdv_v2di ((__v2di)__A, (__v2di) __B,
  716. (__v2di) __C);
  717. }
  718. extern __inline __m128i
  719. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  720. _mm_mask_shrdv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
  721. {
  722. return (__m128i)__builtin_ia32_vpshrdv_v2di_mask ((__v2di)__A, (__v2di) __C,
  723. (__v2di) __D, (__mmask8)__B);
  724. }
  725. extern __inline __m128i
  726. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  727. _mm_maskz_shrdv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
  728. {
  729. return (__m128i)__builtin_ia32_vpshrdv_v2di_maskz ((__v2di)__B, (__v2di) __C,
  730. (__v2di) __D, (__mmask8)__A);
  731. }
  732. extern __inline __m256i
  733. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  734. _mm256_shldv_epi16 (__m256i __A, __m256i __B, __m256i __C)
  735. {
  736. return (__m256i) __builtin_ia32_vpshldv_v16hi ((__v16hi)__A, (__v16hi) __B,
  737. (__v16hi) __C);
  738. }
  739. extern __inline __m256i
  740. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  741. _mm256_mask_shldv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D)
  742. {
  743. return (__m256i)__builtin_ia32_vpshldv_v16hi_mask ((__v16hi)__A,
  744. (__v16hi) __C, (__v16hi) __D, (__mmask16)__B);
  745. }
  746. extern __inline __m256i
  747. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  748. _mm256_maskz_shldv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D)
  749. {
  750. return (__m256i)__builtin_ia32_vpshldv_v16hi_maskz ((__v16hi)__B,
  751. (__v16hi) __C, (__v16hi) __D, (__mmask16)__A);
  752. }
  753. extern __inline __m256i
  754. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  755. _mm256_shldv_epi32 (__m256i __A, __m256i __B, __m256i __C)
  756. {
  757. return (__m256i) __builtin_ia32_vpshldv_v8si ((__v8si)__A, (__v8si) __B,
  758. (__v8si) __C);
  759. }
  760. extern __inline __m256i
  761. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  762. _mm256_mask_shldv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
  763. {
  764. return (__m256i)__builtin_ia32_vpshldv_v8si_mask ((__v8si)__A, (__v8si) __C,
  765. (__v8si) __D, (__mmask8)__B) ;
  766. }
  767. extern __inline __m256i
  768. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  769. _mm256_maskz_shldv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
  770. {
  771. return (__m256i)__builtin_ia32_vpshldv_v8si_maskz ((__v8si)__B, (__v8si) __C,
  772. (__v8si) __D, (__mmask8)__A);
  773. }
  774. extern __inline __m256i
  775. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  776. _mm256_shldv_epi64 (__m256i __A, __m256i __B, __m256i __C)
  777. {
  778. return (__m256i) __builtin_ia32_vpshldv_v4di ((__v4di)__A, (__v4di) __B,
  779. (__v4di) __C);
  780. }
  781. extern __inline __m256i
  782. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  783. _mm256_mask_shldv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
  784. {
  785. return (__m256i)__builtin_ia32_vpshldv_v4di_mask ((__v4di)__A, (__v4di) __C,
  786. (__v4di) __D, (__mmask8)__B);
  787. }
  788. extern __inline __m256i
  789. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  790. _mm256_maskz_shldv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
  791. {
  792. return (__m256i)__builtin_ia32_vpshldv_v4di_maskz ((__v4di)__B, (__v4di) __C,
  793. (__v4di) __D, (__mmask8)__A);
  794. }
  795. extern __inline __m128i
  796. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  797. _mm_shldv_epi16 (__m128i __A, __m128i __B, __m128i __C)
  798. {
  799. return (__m128i) __builtin_ia32_vpshldv_v8hi ((__v8hi)__A, (__v8hi) __B,
  800. (__v8hi) __C);
  801. }
  802. extern __inline __m128i
  803. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  804. _mm_mask_shldv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
  805. {
  806. return (__m128i)__builtin_ia32_vpshldv_v8hi_mask ((__v8hi)__A, (__v8hi) __C,
  807. (__v8hi) __D, (__mmask8)__B);
  808. }
  809. extern __inline __m128i
  810. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  811. _mm_maskz_shldv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
  812. {
  813. return (__m128i)__builtin_ia32_vpshldv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C,
  814. (__v8hi) __D, (__mmask8)__A);
  815. }
  816. extern __inline __m128i
  817. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  818. _mm_shldv_epi32 (__m128i __A, __m128i __B, __m128i __C)
  819. {
  820. return (__m128i) __builtin_ia32_vpshldv_v4si ((__v4si)__A, (__v4si) __B,
  821. (__v4si) __C);
  822. }
  823. extern __inline __m128i
  824. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  825. _mm_mask_shldv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
  826. {
  827. return (__m128i)__builtin_ia32_vpshldv_v4si_mask ((__v4si)__A, (__v4si) __C,
  828. (__v4si) __D, (__mmask8)__B);
  829. }
  830. extern __inline __m128i
  831. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  832. _mm_maskz_shldv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
  833. {
  834. return (__m128i)__builtin_ia32_vpshldv_v4si_maskz ((__v4si)__B, (__v4si) __C,
  835. (__v4si) __D, (__mmask8)__A);
  836. }
  837. extern __inline __m128i
  838. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  839. _mm_shldv_epi64 (__m128i __A, __m128i __B, __m128i __C)
  840. {
  841. return (__m128i) __builtin_ia32_vpshldv_v2di ((__v2di)__A, (__v2di) __B,
  842. (__v2di) __C);
  843. }
  844. extern __inline __m128i
  845. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  846. _mm_mask_shldv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
  847. {
  848. return (__m128i)__builtin_ia32_vpshldv_v2di_mask ((__v2di)__A, (__v2di) __C,
  849. (__v2di) __D, (__mmask8)__B);
  850. }
  851. extern __inline __m128i
  852. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  853. _mm_maskz_shldv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
  854. {
  855. return (__m128i)__builtin_ia32_vpshldv_v2di_maskz ((__v2di)__B, (__v2di) __C,
  856. (__v2di) __D, (__mmask8)__A);
  857. }
  858. #ifdef __DISABLE_AVX512VBMI2VL__
  859. #undef __DISABLE_AVX512VBMI2VL__
  860. #pragma GCC pop_options
  861. #endif /* __DISABLE_AVX512VBMIVL__ */
  862. #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || \
  863. !defined(__AVX512BW__)
  864. #pragma GCC push_options
  865. #pragma GCC target("avx512vbmi2,avx512vl,avx512bw")
  866. #define __DISABLE_AVX512VBMI2VLBW__
  867. #endif /* __AVX512VBMIVLBW__ */
  868. extern __inline __m256i
  869. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  870. _mm256_mask_compress_epi8 (__m256i __A, __mmask32 __B, __m256i __C)
  871. {
  872. return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi)__C,
  873. (__v32qi)__A, (__mmask32)__B);
  874. }
  875. extern __inline __m256i
  876. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  877. _mm256_maskz_compress_epi8 (__mmask32 __A, __m256i __B)
  878. {
  879. return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __B,
  880. (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
  881. }
  882. extern __inline void
  883. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  884. _mm256_mask_compressstoreu_epi8 (void * __A, __mmask32 __B, __m256i __C)
  885. {
  886. __builtin_ia32_compressstoreuqi256_mask ((__v32qi *) __A, (__v32qi) __C,
  887. (__mmask32) __B);
  888. }
  889. extern __inline __m256i
  890. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  891. _mm256_mask_expand_epi8 (__m256i __A, __mmask32 __B, __m256i __C)
  892. {
  893. return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __C,
  894. (__v32qi) __A,
  895. (__mmask32) __B);
  896. }
  897. extern __inline __m256i
  898. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  899. _mm256_maskz_expand_epi8 (__mmask32 __A, __m256i __B)
  900. {
  901. return (__m256i) __builtin_ia32_expandqi256_maskz ((__v32qi) __B,
  902. (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
  903. }
  904. extern __inline __m256i
  905. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  906. _mm256_mask_expandloadu_epi8 (__m256i __A, __mmask32 __B, const void * __C)
  907. {
  908. return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *) __C,
  909. (__v32qi) __A, (__mmask32) __B);
  910. }
  911. extern __inline __m256i
  912. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  913. _mm256_maskz_expandloadu_epi8 (__mmask32 __A, const void * __B)
  914. {
  915. return (__m256i) __builtin_ia32_expandloadqi256_maskz ((const __v32qi *) __B,
  916. (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
  917. }
  918. #ifdef __DISABLE_AVX512VBMI2VLBW__
  919. #undef __DISABLE_AVX512VBMI2VLBW__
  920. #pragma GCC pop_options
  921. #endif /* __DISABLE_AVX512VBMIVLBW__ */
  922. #endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */