avx512vldqintrin.h 60 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016
  1. /* Copyright (C) 2014-2019 Free Software Foundation, Inc.
  2. This file is part of GCC.
  3. GCC is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. GCC is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. Under Section 7 of GPL version 3, you are granted additional
  12. permissions described in the GCC Runtime Library Exception, version
  13. 3.1, as published by the Free Software Foundation.
  14. You should have received a copy of the GNU General Public License and
  15. a copy of the GCC Runtime Library Exception along with this program;
  16. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  17. <http://www.gnu.org/licenses/>. */
  18. #ifndef _IMMINTRIN_H_INCLUDED
  19. #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
  20. #endif
  21. #ifndef _AVX512VLDQINTRIN_H_INCLUDED
  22. #define _AVX512VLDQINTRIN_H_INCLUDED
  23. #if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
  24. #pragma GCC push_options
  25. #pragma GCC target("avx512vl,avx512dq")
  26. #define __DISABLE_AVX512VLDQ__
  27. #endif /* __AVX512VLDQ__ */
  28. extern __inline __m256i
  29. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  30. _mm256_cvttpd_epi64 (__m256d __A)
  31. {
  32. return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
  33. (__v4di)
  34. _mm256_setzero_si256 (),
  35. (__mmask8) -1);
  36. }
  37. extern __inline __m256i
  38. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  39. _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
  40. {
  41. return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
  42. (__v4di) __W,
  43. (__mmask8) __U);
  44. }
  45. extern __inline __m256i
  46. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  47. _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
  48. {
  49. return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
  50. (__v4di)
  51. _mm256_setzero_si256 (),
  52. (__mmask8) __U);
  53. }
  54. extern __inline __m128i
  55. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  56. _mm_cvttpd_epi64 (__m128d __A)
  57. {
  58. return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
  59. (__v2di)
  60. _mm_setzero_si128 (),
  61. (__mmask8) -1);
  62. }
  63. extern __inline __m128i
  64. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  65. _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
  66. {
  67. return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
  68. (__v2di) __W,
  69. (__mmask8) __U);
  70. }
  71. extern __inline __m128i
  72. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  73. _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
  74. {
  75. return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
  76. (__v2di)
  77. _mm_setzero_si128 (),
  78. (__mmask8) __U);
  79. }
  80. extern __inline __m256i
  81. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  82. _mm256_cvttpd_epu64 (__m256d __A)
  83. {
  84. return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
  85. (__v4di)
  86. _mm256_setzero_si256 (),
  87. (__mmask8) -1);
  88. }
  89. extern __inline __m256i
  90. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  91. _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
  92. {
  93. return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
  94. (__v4di) __W,
  95. (__mmask8) __U);
  96. }
  97. extern __inline __m256i
  98. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  99. _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
  100. {
  101. return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
  102. (__v4di)
  103. _mm256_setzero_si256 (),
  104. (__mmask8) __U);
  105. }
  106. extern __inline __m128i
  107. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  108. _mm_cvttpd_epu64 (__m128d __A)
  109. {
  110. return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
  111. (__v2di)
  112. _mm_setzero_si128 (),
  113. (__mmask8) -1);
  114. }
  115. extern __inline __m128i
  116. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  117. _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
  118. {
  119. return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
  120. (__v2di) __W,
  121. (__mmask8) __U);
  122. }
  123. extern __inline __m128i
  124. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  125. _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
  126. {
  127. return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
  128. (__v2di)
  129. _mm_setzero_si128 (),
  130. (__mmask8) __U);
  131. }
  132. extern __inline __m256i
  133. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  134. _mm256_cvtpd_epi64 (__m256d __A)
  135. {
  136. return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
  137. (__v4di)
  138. _mm256_setzero_si256 (),
  139. (__mmask8) -1);
  140. }
  141. extern __inline __m256i
  142. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  143. _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
  144. {
  145. return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
  146. (__v4di) __W,
  147. (__mmask8) __U);
  148. }
  149. extern __inline __m256i
  150. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  151. _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
  152. {
  153. return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
  154. (__v4di)
  155. _mm256_setzero_si256 (),
  156. (__mmask8) __U);
  157. }
  158. extern __inline __m128i
  159. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  160. _mm_cvtpd_epi64 (__m128d __A)
  161. {
  162. return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
  163. (__v2di)
  164. _mm_setzero_si128 (),
  165. (__mmask8) -1);
  166. }
  167. extern __inline __m128i
  168. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  169. _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
  170. {
  171. return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
  172. (__v2di) __W,
  173. (__mmask8) __U);
  174. }
  175. extern __inline __m128i
  176. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  177. _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
  178. {
  179. return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
  180. (__v2di)
  181. _mm_setzero_si128 (),
  182. (__mmask8) __U);
  183. }
  184. extern __inline __m256i
  185. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  186. _mm256_cvtpd_epu64 (__m256d __A)
  187. {
  188. return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
  189. (__v4di)
  190. _mm256_setzero_si256 (),
  191. (__mmask8) -1);
  192. }
  193. extern __inline __m256i
  194. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  195. _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
  196. {
  197. return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
  198. (__v4di) __W,
  199. (__mmask8) __U);
  200. }
  201. extern __inline __m256i
  202. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  203. _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
  204. {
  205. return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
  206. (__v4di)
  207. _mm256_setzero_si256 (),
  208. (__mmask8) __U);
  209. }
  210. extern __inline __m128i
  211. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  212. _mm_cvtpd_epu64 (__m128d __A)
  213. {
  214. return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
  215. (__v2di)
  216. _mm_setzero_si128 (),
  217. (__mmask8) -1);
  218. }
  219. extern __inline __m128i
  220. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  221. _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
  222. {
  223. return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
  224. (__v2di) __W,
  225. (__mmask8) __U);
  226. }
  227. extern __inline __m128i
  228. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  229. _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
  230. {
  231. return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
  232. (__v2di)
  233. _mm_setzero_si128 (),
  234. (__mmask8) __U);
  235. }
  236. extern __inline __m256i
  237. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  238. _mm256_cvttps_epi64 (__m128 __A)
  239. {
  240. return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
  241. (__v4di)
  242. _mm256_setzero_si256 (),
  243. (__mmask8) -1);
  244. }
  245. extern __inline __m256i
  246. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  247. _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
  248. {
  249. return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
  250. (__v4di) __W,
  251. (__mmask8) __U);
  252. }
  253. extern __inline __m256i
  254. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  255. _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
  256. {
  257. return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
  258. (__v4di)
  259. _mm256_setzero_si256 (),
  260. (__mmask8) __U);
  261. }
  262. extern __inline __m128i
  263. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  264. _mm_cvttps_epi64 (__m128 __A)
  265. {
  266. return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
  267. (__v2di)
  268. _mm_setzero_si128 (),
  269. (__mmask8) -1);
  270. }
  271. extern __inline __m128i
  272. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  273. _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
  274. {
  275. return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
  276. (__v2di) __W,
  277. (__mmask8) __U);
  278. }
  279. extern __inline __m128i
  280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  281. _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
  282. {
  283. return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
  284. (__v2di)
  285. _mm_setzero_si128 (),
  286. (__mmask8) __U);
  287. }
  288. extern __inline __m256i
  289. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  290. _mm256_cvttps_epu64 (__m128 __A)
  291. {
  292. return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
  293. (__v4di)
  294. _mm256_setzero_si256 (),
  295. (__mmask8) -1);
  296. }
  297. extern __inline __m256i
  298. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  299. _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
  300. {
  301. return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
  302. (__v4di) __W,
  303. (__mmask8) __U);
  304. }
  305. extern __inline __m256i
  306. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  307. _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
  308. {
  309. return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
  310. (__v4di)
  311. _mm256_setzero_si256 (),
  312. (__mmask8) __U);
  313. }
  314. extern __inline __m128i
  315. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  316. _mm_cvttps_epu64 (__m128 __A)
  317. {
  318. return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
  319. (__v2di)
  320. _mm_setzero_si128 (),
  321. (__mmask8) -1);
  322. }
  323. extern __inline __m128i
  324. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  325. _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
  326. {
  327. return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
  328. (__v2di) __W,
  329. (__mmask8) __U);
  330. }
  331. extern __inline __m128i
  332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  333. _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
  334. {
  335. return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
  336. (__v2di)
  337. _mm_setzero_si128 (),
  338. (__mmask8) __U);
  339. }
  340. extern __inline __m256d
  341. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  342. _mm256_broadcast_f64x2 (__m128d __A)
  343. {
  344. return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
  345. __A,
  346. (__v4df)_mm256_undefined_pd(),
  347. (__mmask8) -1);
  348. }
  349. extern __inline __m256d
  350. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  351. _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
  352. {
  353. return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
  354. __A,
  355. (__v4df)
  356. __O, __M);
  357. }
  358. extern __inline __m256d
  359. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  360. _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
  361. {
  362. return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
  363. __A,
  364. (__v4df)
  365. _mm256_setzero_ps (),
  366. __M);
  367. }
  368. extern __inline __m256i
  369. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  370. _mm256_broadcast_i64x2 (__m128i __A)
  371. {
  372. return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
  373. __A,
  374. (__v4di)_mm256_undefined_si256(),
  375. (__mmask8) -1);
  376. }
  377. extern __inline __m256i
  378. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  379. _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
  380. {
  381. return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
  382. __A,
  383. (__v4di)
  384. __O, __M);
  385. }
  386. extern __inline __m256i
  387. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  388. _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
  389. {
  390. return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
  391. __A,
  392. (__v4di)
  393. _mm256_setzero_si256 (),
  394. __M);
  395. }
  396. extern __inline __m256
  397. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  398. _mm256_broadcast_f32x2 (__m128 __A)
  399. {
  400. return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
  401. (__v8sf)_mm256_undefined_ps(),
  402. (__mmask8) -1);
  403. }
  404. extern __inline __m256
  405. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  406. _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
  407. {
  408. return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
  409. (__v8sf) __O,
  410. __M);
  411. }
  412. extern __inline __m256
  413. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  414. _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
  415. {
  416. return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
  417. (__v8sf)
  418. _mm256_setzero_ps (),
  419. __M);
  420. }
  421. extern __inline __m256i
  422. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  423. _mm256_broadcast_i32x2 (__m128i __A)
  424. {
  425. return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
  426. __A,
  427. (__v8si)_mm256_undefined_si256(),
  428. (__mmask8) -1);
  429. }
  430. extern __inline __m256i
  431. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  432. _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
  433. {
  434. return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
  435. __A,
  436. (__v8si)
  437. __O, __M);
  438. }
  439. extern __inline __m256i
  440. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  441. _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
  442. {
  443. return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
  444. __A,
  445. (__v8si)
  446. _mm256_setzero_si256 (),
  447. __M);
  448. }
  449. extern __inline __m128i
  450. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  451. _mm_broadcast_i32x2 (__m128i __A)
  452. {
  453. return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
  454. __A,
  455. (__v4si)_mm_undefined_si128(),
  456. (__mmask8) -1);
  457. }
  458. extern __inline __m128i
  459. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  460. _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
  461. {
  462. return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
  463. __A,
  464. (__v4si)
  465. __O, __M);
  466. }
  467. extern __inline __m128i
  468. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  469. _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
  470. {
  471. return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
  472. __A,
  473. (__v4si)
  474. _mm_setzero_si128 (),
  475. __M);
  476. }
  477. extern __inline __m256i
  478. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  479. _mm256_mullo_epi64 (__m256i __A, __m256i __B)
  480. {
  481. return (__m256i) ((__v4du) __A * (__v4du) __B);
  482. }
  483. extern __inline __m256i
  484. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  485. _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  486. __m256i __B)
  487. {
  488. return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
  489. (__v4di) __B,
  490. (__v4di) __W,
  491. (__mmask8) __U);
  492. }
  493. extern __inline __m256i
  494. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  495. _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  496. {
  497. return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
  498. (__v4di) __B,
  499. (__v4di)
  500. _mm256_setzero_si256 (),
  501. (__mmask8) __U);
  502. }
  503. extern __inline __m128i
  504. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  505. _mm_mullo_epi64 (__m128i __A, __m128i __B)
  506. {
  507. return (__m128i) ((__v2du) __A * (__v2du) __B);
  508. }
  509. extern __inline __m128i
  510. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  511. _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  512. __m128i __B)
  513. {
  514. return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
  515. (__v2di) __B,
  516. (__v2di) __W,
  517. (__mmask8) __U);
  518. }
  519. extern __inline __m128i
  520. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  521. _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  522. {
  523. return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
  524. (__v2di) __B,
  525. (__v2di)
  526. _mm_setzero_si128 (),
  527. (__mmask8) __U);
  528. }
  529. extern __inline __m256d
  530. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  531. _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
  532. __m256d __B)
  533. {
  534. return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
  535. (__v4df) __B,
  536. (__v4df) __W,
  537. (__mmask8) __U);
  538. }
  539. extern __inline __m256d
  540. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  541. _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
  542. {
  543. return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
  544. (__v4df) __B,
  545. (__v4df)
  546. _mm256_setzero_pd (),
  547. (__mmask8) __U);
  548. }
  549. extern __inline __m128d
  550. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  551. _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
  552. __m128d __B)
  553. {
  554. return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
  555. (__v2df) __B,
  556. (__v2df) __W,
  557. (__mmask8) __U);
  558. }
  559. extern __inline __m128d
  560. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  561. _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
  562. {
  563. return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
  564. (__v2df) __B,
  565. (__v2df)
  566. _mm_setzero_pd (),
  567. (__mmask8) __U);
  568. }
  569. extern __inline __m256
  570. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  571. _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
  572. __m256 __B)
  573. {
  574. return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
  575. (__v8sf) __B,
  576. (__v8sf) __W,
  577. (__mmask8) __U);
  578. }
  579. extern __inline __m256
  580. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  581. _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
  582. {
  583. return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
  584. (__v8sf) __B,
  585. (__v8sf)
  586. _mm256_setzero_ps (),
  587. (__mmask8) __U);
  588. }
  589. extern __inline __m128
  590. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  591. _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  592. {
  593. return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
  594. (__v4sf) __B,
  595. (__v4sf) __W,
  596. (__mmask8) __U);
  597. }
  598. extern __inline __m128
  599. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  600. _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
  601. {
  602. return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
  603. (__v4sf) __B,
  604. (__v4sf)
  605. _mm_setzero_ps (),
  606. (__mmask8) __U);
  607. }
  608. extern __inline __m256i
  609. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  610. _mm256_cvtps_epi64 (__m128 __A)
  611. {
  612. return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
  613. (__v4di)
  614. _mm256_setzero_si256 (),
  615. (__mmask8) -1);
  616. }
  617. extern __inline __m256i
  618. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  619. _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
  620. {
  621. return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
  622. (__v4di) __W,
  623. (__mmask8) __U);
  624. }
  625. extern __inline __m256i
  626. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  627. _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
  628. {
  629. return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
  630. (__v4di)
  631. _mm256_setzero_si256 (),
  632. (__mmask8) __U);
  633. }
  634. extern __inline __m128i
  635. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  636. _mm_cvtps_epi64 (__m128 __A)
  637. {
  638. return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
  639. (__v2di)
  640. _mm_setzero_si128 (),
  641. (__mmask8) -1);
  642. }
  643. extern __inline __m128i
  644. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  645. _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
  646. {
  647. return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
  648. (__v2di) __W,
  649. (__mmask8) __U);
  650. }
  651. extern __inline __m128i
  652. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  653. _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
  654. {
  655. return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
  656. (__v2di)
  657. _mm_setzero_si128 (),
  658. (__mmask8) __U);
  659. }
  660. extern __inline __m256i
  661. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  662. _mm256_cvtps_epu64 (__m128 __A)
  663. {
  664. return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
  665. (__v4di)
  666. _mm256_setzero_si256 (),
  667. (__mmask8) -1);
  668. }
  669. extern __inline __m256i
  670. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  671. _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
  672. {
  673. return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
  674. (__v4di) __W,
  675. (__mmask8) __U);
  676. }
  677. extern __inline __m256i
  678. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  679. _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
  680. {
  681. return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
  682. (__v4di)
  683. _mm256_setzero_si256 (),
  684. (__mmask8) __U);
  685. }
  686. extern __inline __m128i
  687. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  688. _mm_cvtps_epu64 (__m128 __A)
  689. {
  690. return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
  691. (__v2di)
  692. _mm_setzero_si128 (),
  693. (__mmask8) -1);
  694. }
  695. extern __inline __m128i
  696. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  697. _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
  698. {
  699. return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
  700. (__v2di) __W,
  701. (__mmask8) __U);
  702. }
  703. extern __inline __m128i
  704. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  705. _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
  706. {
  707. return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
  708. (__v2di)
  709. _mm_setzero_si128 (),
  710. (__mmask8) __U);
  711. }
  712. extern __inline __m128
  713. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  714. _mm256_cvtepi64_ps (__m256i __A)
  715. {
  716. return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
  717. (__v4sf)
  718. _mm_setzero_ps (),
  719. (__mmask8) -1);
  720. }
  721. extern __inline __m128
  722. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  723. _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
  724. {
  725. return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
  726. (__v4sf) __W,
  727. (__mmask8) __U);
  728. }
  729. extern __inline __m128
  730. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  731. _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
  732. {
  733. return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
  734. (__v4sf)
  735. _mm_setzero_ps (),
  736. (__mmask8) __U);
  737. }
  738. extern __inline __m128
  739. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  740. _mm_cvtepi64_ps (__m128i __A)
  741. {
  742. return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
  743. (__v4sf)
  744. _mm_setzero_ps (),
  745. (__mmask8) -1);
  746. }
  747. extern __inline __m128
  748. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  749. _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
  750. {
  751. return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
  752. (__v4sf) __W,
  753. (__mmask8) __U);
  754. }
  755. extern __inline __m128
  756. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  757. _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
  758. {
  759. return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
  760. (__v4sf)
  761. _mm_setzero_ps (),
  762. (__mmask8) __U);
  763. }
  764. extern __inline __m128
  765. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  766. _mm256_cvtepu64_ps (__m256i __A)
  767. {
  768. return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
  769. (__v4sf)
  770. _mm_setzero_ps (),
  771. (__mmask8) -1);
  772. }
  773. extern __inline __m128
  774. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  775. _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
  776. {
  777. return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
  778. (__v4sf) __W,
  779. (__mmask8) __U);
  780. }
  781. extern __inline __m128
  782. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  783. _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
  784. {
  785. return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
  786. (__v4sf)
  787. _mm_setzero_ps (),
  788. (__mmask8) __U);
  789. }
  790. extern __inline __m128
  791. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  792. _mm_cvtepu64_ps (__m128i __A)
  793. {
  794. return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
  795. (__v4sf)
  796. _mm_setzero_ps (),
  797. (__mmask8) -1);
  798. }
  799. extern __inline __m128
  800. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  801. _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
  802. {
  803. return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
  804. (__v4sf) __W,
  805. (__mmask8) __U);
  806. }
  807. extern __inline __m128
  808. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  809. _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
  810. {
  811. return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
  812. (__v4sf)
  813. _mm_setzero_ps (),
  814. (__mmask8) __U);
  815. }
  816. extern __inline __m256d
  817. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  818. _mm256_cvtepi64_pd (__m256i __A)
  819. {
  820. return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
  821. (__v4df)
  822. _mm256_setzero_pd (),
  823. (__mmask8) -1);
  824. }
  825. extern __inline __m256d
  826. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  827. _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
  828. {
  829. return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
  830. (__v4df) __W,
  831. (__mmask8) __U);
  832. }
  833. extern __inline __m256d
  834. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  835. _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
  836. {
  837. return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
  838. (__v4df)
  839. _mm256_setzero_pd (),
  840. (__mmask8) __U);
  841. }
  842. extern __inline __m128d
  843. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  844. _mm_cvtepi64_pd (__m128i __A)
  845. {
  846. return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
  847. (__v2df)
  848. _mm_setzero_pd (),
  849. (__mmask8) -1);
  850. }
  851. extern __inline __m128d
  852. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  853. _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
  854. {
  855. return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
  856. (__v2df) __W,
  857. (__mmask8) __U);
  858. }
  859. extern __inline __m128d
  860. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  861. _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
  862. {
  863. return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
  864. (__v2df)
  865. _mm_setzero_pd (),
  866. (__mmask8) __U);
  867. }
  868. extern __inline __m256d
  869. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  870. _mm256_cvtepu64_pd (__m256i __A)
  871. {
  872. return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
  873. (__v4df)
  874. _mm256_setzero_pd (),
  875. (__mmask8) -1);
  876. }
  877. extern __inline __m256d
  878. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  879. _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
  880. {
  881. return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
  882. (__v4df) __W,
  883. (__mmask8) __U);
  884. }
  885. extern __inline __m256d
  886. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  887. _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
  888. {
  889. return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
  890. (__v4df)
  891. _mm256_setzero_pd (),
  892. (__mmask8) __U);
  893. }
  894. extern __inline __m256d
  895. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  896. _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
  897. __m256d __B)
  898. {
  899. return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
  900. (__v4df) __B,
  901. (__v4df) __W,
  902. (__mmask8) __U);
  903. }
  904. extern __inline __m256d
  905. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  906. _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
  907. {
  908. return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
  909. (__v4df) __B,
  910. (__v4df)
  911. _mm256_setzero_pd (),
  912. (__mmask8) __U);
  913. }
  914. extern __inline __m128d
  915. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  916. _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  917. {
  918. return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
  919. (__v2df) __B,
  920. (__v2df) __W,
  921. (__mmask8) __U);
  922. }
  923. extern __inline __m128d
  924. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  925. _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
  926. {
  927. return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
  928. (__v2df) __B,
  929. (__v2df)
  930. _mm_setzero_pd (),
  931. (__mmask8) __U);
  932. }
  933. extern __inline __m256
  934. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  935. _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  936. {
  937. return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
  938. (__v8sf) __B,
  939. (__v8sf) __W,
  940. (__mmask8) __U);
  941. }
  942. extern __inline __m256
  943. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  944. _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
  945. {
  946. return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
  947. (__v8sf) __B,
  948. (__v8sf)
  949. _mm256_setzero_ps (),
  950. (__mmask8) __U);
  951. }
  952. extern __inline __m128
  953. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  954. _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  955. {
  956. return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
  957. (__v4sf) __B,
  958. (__v4sf) __W,
  959. (__mmask8) __U);
  960. }
  961. extern __inline __m128
  962. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  963. _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
  964. {
  965. return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
  966. (__v4sf) __B,
  967. (__v4sf)
  968. _mm_setzero_ps (),
  969. (__mmask8) __U);
  970. }
  971. extern __inline __m128d
  972. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  973. _mm_cvtepu64_pd (__m128i __A)
  974. {
  975. return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
  976. (__v2df)
  977. _mm_setzero_pd (),
  978. (__mmask8) -1);
  979. }
  980. extern __inline __m128d
  981. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  982. _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
  983. {
  984. return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
  985. (__v2df) __W,
  986. (__mmask8) __U);
  987. }
  988. extern __inline __m128d
  989. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  990. _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
  991. {
  992. return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
  993. (__v2df)
  994. _mm_setzero_pd (),
  995. (__mmask8) __U);
  996. }
  997. extern __inline __m256d
  998. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  999. _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
  1000. __m256d __B)
  1001. {
  1002. return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
  1003. (__v4df) __B,
  1004. (__v4df) __W,
  1005. (__mmask8) __U);
  1006. }
  1007. extern __inline __m256d
  1008. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1009. _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
  1010. {
  1011. return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
  1012. (__v4df) __B,
  1013. (__v4df)
  1014. _mm256_setzero_pd (),
  1015. (__mmask8) __U);
  1016. }
  1017. extern __inline __m128d
  1018. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1019. _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  1020. {
  1021. return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
  1022. (__v2df) __B,
  1023. (__v2df) __W,
  1024. (__mmask8) __U);
  1025. }
  1026. extern __inline __m128d
  1027. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1028. _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
  1029. {
  1030. return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
  1031. (__v2df) __B,
  1032. (__v2df)
  1033. _mm_setzero_pd (),
  1034. (__mmask8) __U);
  1035. }
  1036. extern __inline __m256
  1037. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1038. _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  1039. {
  1040. return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
  1041. (__v8sf) __B,
  1042. (__v8sf) __W,
  1043. (__mmask8) __U);
  1044. }
  1045. extern __inline __m256
  1046. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1047. _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
  1048. {
  1049. return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
  1050. (__v8sf) __B,
  1051. (__v8sf)
  1052. _mm256_setzero_ps (),
  1053. (__mmask8) __U);
  1054. }
  1055. extern __inline __m128
  1056. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1057. _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  1058. {
  1059. return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
  1060. (__v4sf) __B,
  1061. (__v4sf) __W,
  1062. (__mmask8) __U);
  1063. }
  1064. extern __inline __m128
  1065. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1066. _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
  1067. {
  1068. return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
  1069. (__v4sf) __B,
  1070. (__v4sf)
  1071. _mm_setzero_ps (),
  1072. (__mmask8) __U);
  1073. }
  1074. extern __inline __m256d
  1075. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1076. _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
  1077. {
  1078. return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
  1079. (__v4df) __B,
  1080. (__v4df) __W,
  1081. (__mmask8) __U);
  1082. }
  1083. extern __inline __m256d
  1084. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1085. _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
  1086. {
  1087. return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
  1088. (__v4df) __B,
  1089. (__v4df)
  1090. _mm256_setzero_pd (),
  1091. (__mmask8) __U);
  1092. }
  1093. extern __inline __m128d
  1094. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1095. _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  1096. {
  1097. return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
  1098. (__v2df) __B,
  1099. (__v2df) __W,
  1100. (__mmask8) __U);
  1101. }
  1102. extern __inline __m128d
  1103. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1104. _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
  1105. {
  1106. return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
  1107. (__v2df) __B,
  1108. (__v2df)
  1109. _mm_setzero_pd (),
  1110. (__mmask8) __U);
  1111. }
  1112. extern __inline __m256
  1113. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1114. _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  1115. {
  1116. return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
  1117. (__v8sf) __B,
  1118. (__v8sf) __W,
  1119. (__mmask8) __U);
  1120. }
  1121. extern __inline __m256
  1122. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1123. _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
  1124. {
  1125. return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
  1126. (__v8sf) __B,
  1127. (__v8sf)
  1128. _mm256_setzero_ps (),
  1129. (__mmask8) __U);
  1130. }
  1131. extern __inline __m128
  1132. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1133. _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  1134. {
  1135. return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
  1136. (__v4sf) __B,
  1137. (__v4sf) __W,
  1138. (__mmask8) __U);
  1139. }
  1140. extern __inline __m128
  1141. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1142. _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
  1143. {
  1144. return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
  1145. (__v4sf) __B,
  1146. (__v4sf)
  1147. _mm_setzero_ps (),
  1148. (__mmask8) __U);
  1149. }
  1150. extern __inline __m128i
  1151. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1152. _mm_movm_epi32 (__mmask8 __A)
  1153. {
  1154. return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
  1155. }
  1156. extern __inline __m256i
  1157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1158. _mm256_movm_epi32 (__mmask8 __A)
  1159. {
  1160. return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
  1161. }
  1162. extern __inline __m128i
  1163. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1164. _mm_movm_epi64 (__mmask8 __A)
  1165. {
  1166. return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
  1167. }
  1168. extern __inline __m256i
  1169. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1170. _mm256_movm_epi64 (__mmask8 __A)
  1171. {
  1172. return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
  1173. }
  1174. extern __inline __mmask8
  1175. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1176. _mm_movepi32_mask (__m128i __A)
  1177. {
  1178. return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
  1179. }
  1180. extern __inline __mmask8
  1181. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1182. _mm256_movepi32_mask (__m256i __A)
  1183. {
  1184. return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
  1185. }
  1186. extern __inline __mmask8
  1187. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1188. _mm_movepi64_mask (__m128i __A)
  1189. {
  1190. return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
  1191. }
  1192. extern __inline __mmask8
  1193. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1194. _mm256_movepi64_mask (__m256i __A)
  1195. {
  1196. return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
  1197. }
  1198. #ifdef __OPTIMIZE__
  1199. extern __inline __m128d
  1200. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1201. _mm256_extractf64x2_pd (__m256d __A, const int __imm)
  1202. {
  1203. return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
  1204. __imm,
  1205. (__v2df)
  1206. _mm_setzero_pd (),
  1207. (__mmask8) -1);
  1208. }
  1209. extern __inline __m128d
  1210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1211. _mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
  1212. const int __imm)
  1213. {
  1214. return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
  1215. __imm,
  1216. (__v2df) __W,
  1217. (__mmask8)
  1218. __U);
  1219. }
  1220. extern __inline __m128d
  1221. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1222. _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
  1223. const int __imm)
  1224. {
  1225. return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
  1226. __imm,
  1227. (__v2df)
  1228. _mm_setzero_pd (),
  1229. (__mmask8)
  1230. __U);
  1231. }
  1232. extern __inline __m128i
  1233. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1234. _mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
  1235. {
  1236. return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
  1237. __imm,
  1238. (__v2di)
  1239. _mm_setzero_si128 (),
  1240. (__mmask8) -1);
  1241. }
  1242. extern __inline __m128i
  1243. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1244. _mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
  1245. const int __imm)
  1246. {
  1247. return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
  1248. __imm,
  1249. (__v2di) __W,
  1250. (__mmask8)
  1251. __U);
  1252. }
  1253. extern __inline __m128i
  1254. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1255. _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
  1256. const int __imm)
  1257. {
  1258. return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
  1259. __imm,
  1260. (__v2di)
  1261. _mm_setzero_si128 (),
  1262. (__mmask8)
  1263. __U);
  1264. }
  1265. extern __inline __m256d
  1266. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1267. _mm256_reduce_pd (__m256d __A, int __B)
  1268. {
  1269. return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
  1270. (__v4df)
  1271. _mm256_setzero_pd (),
  1272. (__mmask8) -1);
  1273. }
  1274. extern __inline __m256d
  1275. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1276. _mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
  1277. {
  1278. return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
  1279. (__v4df) __W,
  1280. (__mmask8) __U);
  1281. }
  1282. extern __inline __m256d
  1283. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1284. _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
  1285. {
  1286. return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
  1287. (__v4df)
  1288. _mm256_setzero_pd (),
  1289. (__mmask8) __U);
  1290. }
  1291. extern __inline __m128d
  1292. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1293. _mm_reduce_pd (__m128d __A, int __B)
  1294. {
  1295. return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
  1296. (__v2df)
  1297. _mm_setzero_pd (),
  1298. (__mmask8) -1);
  1299. }
  1300. extern __inline __m128d
  1301. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1302. _mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
  1303. {
  1304. return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
  1305. (__v2df) __W,
  1306. (__mmask8) __U);
  1307. }
  1308. extern __inline __m128d
  1309. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1310. _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
  1311. {
  1312. return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
  1313. (__v2df)
  1314. _mm_setzero_pd (),
  1315. (__mmask8) __U);
  1316. }
  1317. extern __inline __m256
  1318. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1319. _mm256_reduce_ps (__m256 __A, int __B)
  1320. {
  1321. return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
  1322. (__v8sf)
  1323. _mm256_setzero_ps (),
  1324. (__mmask8) -1);
  1325. }
  1326. extern __inline __m256
  1327. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1328. _mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
  1329. {
  1330. return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
  1331. (__v8sf) __W,
  1332. (__mmask8) __U);
  1333. }
  1334. extern __inline __m256
  1335. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1336. _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
  1337. {
  1338. return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
  1339. (__v8sf)
  1340. _mm256_setzero_ps (),
  1341. (__mmask8) __U);
  1342. }
  1343. extern __inline __m128
  1344. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1345. _mm_reduce_ps (__m128 __A, int __B)
  1346. {
  1347. return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
  1348. (__v4sf)
  1349. _mm_setzero_ps (),
  1350. (__mmask8) -1);
  1351. }
  1352. extern __inline __m128
  1353. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1354. _mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
  1355. {
  1356. return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
  1357. (__v4sf) __W,
  1358. (__mmask8) __U);
  1359. }
  1360. extern __inline __m128
  1361. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1362. _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
  1363. {
  1364. return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
  1365. (__v4sf)
  1366. _mm_setzero_ps (),
  1367. (__mmask8) __U);
  1368. }
  1369. extern __inline __m256d
  1370. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1371. _mm256_range_pd (__m256d __A, __m256d __B, int __C)
  1372. {
  1373. return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
  1374. (__v4df) __B, __C,
  1375. (__v4df)
  1376. _mm256_setzero_pd (),
  1377. (__mmask8) -1);
  1378. }
  1379. extern __inline __m256d
  1380. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1381. _mm256_mask_range_pd (__m256d __W, __mmask8 __U,
  1382. __m256d __A, __m256d __B, int __C)
  1383. {
  1384. return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
  1385. (__v4df) __B, __C,
  1386. (__v4df) __W,
  1387. (__mmask8) __U);
  1388. }
  1389. extern __inline __m256d
  1390. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1391. _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
  1392. {
  1393. return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
  1394. (__v4df) __B, __C,
  1395. (__v4df)
  1396. _mm256_setzero_pd (),
  1397. (__mmask8) __U);
  1398. }
  1399. extern __inline __m128d
  1400. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1401. _mm_range_pd (__m128d __A, __m128d __B, int __C)
  1402. {
  1403. return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
  1404. (__v2df) __B, __C,
  1405. (__v2df)
  1406. _mm_setzero_pd (),
  1407. (__mmask8) -1);
  1408. }
  1409. extern __inline __m128d
  1410. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1411. _mm_mask_range_pd (__m128d __W, __mmask8 __U,
  1412. __m128d __A, __m128d __B, int __C)
  1413. {
  1414. return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
  1415. (__v2df) __B, __C,
  1416. (__v2df) __W,
  1417. (__mmask8) __U);
  1418. }
  1419. extern __inline __m128d
  1420. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1421. _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
  1422. {
  1423. return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
  1424. (__v2df) __B, __C,
  1425. (__v2df)
  1426. _mm_setzero_pd (),
  1427. (__mmask8) __U);
  1428. }
  1429. extern __inline __m256
  1430. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1431. _mm256_range_ps (__m256 __A, __m256 __B, int __C)
  1432. {
  1433. return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
  1434. (__v8sf) __B, __C,
  1435. (__v8sf)
  1436. _mm256_setzero_ps (),
  1437. (__mmask8) -1);
  1438. }
  1439. extern __inline __m256
  1440. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1441. _mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
  1442. int __C)
  1443. {
  1444. return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
  1445. (__v8sf) __B, __C,
  1446. (__v8sf) __W,
  1447. (__mmask8) __U);
  1448. }
  1449. extern __inline __m256
  1450. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1451. _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
  1452. {
  1453. return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
  1454. (__v8sf) __B, __C,
  1455. (__v8sf)
  1456. _mm256_setzero_ps (),
  1457. (__mmask8) __U);
  1458. }
  1459. extern __inline __m128
  1460. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1461. _mm_range_ps (__m128 __A, __m128 __B, int __C)
  1462. {
  1463. return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
  1464. (__v4sf) __B, __C,
  1465. (__v4sf)
  1466. _mm_setzero_ps (),
  1467. (__mmask8) -1);
  1468. }
  1469. extern __inline __m128
  1470. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1471. _mm_mask_range_ps (__m128 __W, __mmask8 __U,
  1472. __m128 __A, __m128 __B, int __C)
  1473. {
  1474. return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
  1475. (__v4sf) __B, __C,
  1476. (__v4sf) __W,
  1477. (__mmask8) __U);
  1478. }
  1479. extern __inline __m128
  1480. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1481. _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
  1482. {
  1483. return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
  1484. (__v4sf) __B, __C,
  1485. (__v4sf)
  1486. _mm_setzero_ps (),
  1487. (__mmask8) __U);
  1488. }
  1489. extern __inline __mmask8
  1490. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1491. _mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
  1492. const int __imm)
  1493. {
  1494. return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
  1495. __imm, __U);
  1496. }
  1497. extern __inline __mmask8
  1498. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1499. _mm256_fpclass_pd_mask (__m256d __A, const int __imm)
  1500. {
  1501. return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
  1502. __imm,
  1503. (__mmask8) -1);
  1504. }
  1505. extern __inline __mmask8
  1506. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1507. _mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
  1508. {
  1509. return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
  1510. __imm, __U);
  1511. }
  1512. extern __inline __mmask8
  1513. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1514. _mm256_fpclass_ps_mask (__m256 __A, const int __imm)
  1515. {
  1516. return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
  1517. __imm,
  1518. (__mmask8) -1);
  1519. }
  1520. extern __inline __mmask8
  1521. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1522. _mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
  1523. {
  1524. return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
  1525. __imm, __U);
  1526. }
  1527. extern __inline __mmask8
  1528. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1529. _mm_fpclass_pd_mask (__m128d __A, const int __imm)
  1530. {
  1531. return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
  1532. __imm,
  1533. (__mmask8) -1);
  1534. }
  1535. extern __inline __mmask8
  1536. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1537. _mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
  1538. {
  1539. return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
  1540. __imm, __U);
  1541. }
  1542. extern __inline __mmask8
  1543. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1544. _mm_fpclass_ps_mask (__m128 __A, const int __imm)
  1545. {
  1546. return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
  1547. __imm,
  1548. (__mmask8) -1);
  1549. }
  1550. extern __inline __m256i
  1551. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1552. _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
  1553. {
  1554. return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
  1555. (__v2di) __B,
  1556. __imm,
  1557. (__v4di)
  1558. _mm256_setzero_si256 (),
  1559. (__mmask8) -1);
  1560. }
  1561. extern __inline __m256i
  1562. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1563. _mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
  1564. __m128i __B, const int __imm)
  1565. {
  1566. return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
  1567. (__v2di) __B,
  1568. __imm,
  1569. (__v4di) __W,
  1570. (__mmask8)
  1571. __U);
  1572. }
  1573. extern __inline __m256i
  1574. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1575. _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
  1576. const int __imm)
  1577. {
  1578. return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
  1579. (__v2di) __B,
  1580. __imm,
  1581. (__v4di)
  1582. _mm256_setzero_si256 (),
  1583. (__mmask8)
  1584. __U);
  1585. }
  1586. extern __inline __m256d
  1587. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1588. _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
  1589. {
  1590. return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
  1591. (__v2df) __B,
  1592. __imm,
  1593. (__v4df)
  1594. _mm256_setzero_pd (),
  1595. (__mmask8) -1);
  1596. }
  1597. extern __inline __m256d
  1598. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1599. _mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
  1600. __m128d __B, const int __imm)
  1601. {
  1602. return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
  1603. (__v2df) __B,
  1604. __imm,
  1605. (__v4df) __W,
  1606. (__mmask8)
  1607. __U);
  1608. }
  1609. extern __inline __m256d
  1610. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1611. _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
  1612. const int __imm)
  1613. {
  1614. return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
  1615. (__v2df) __B,
  1616. __imm,
  1617. (__v4df)
  1618. _mm256_setzero_pd (),
  1619. (__mmask8)
  1620. __U);
  1621. }
  1622. #else
  1623. #define _mm256_insertf64x2(X, Y, C) \
  1624. ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
  1625. (__v2df)(__m128d) (Y), (int) (C), \
  1626. (__v4df)(__m256d)_mm256_setzero_pd(), \
  1627. (__mmask8)-1))
  1628. #define _mm256_mask_insertf64x2(W, U, X, Y, C) \
  1629. ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
  1630. (__v2df)(__m128d) (Y), (int) (C), \
  1631. (__v4df)(__m256d)(W), \
  1632. (__mmask8)(U)))
  1633. #define _mm256_maskz_insertf64x2(U, X, Y, C) \
  1634. ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
  1635. (__v2df)(__m128d) (Y), (int) (C), \
  1636. (__v4df)(__m256d)_mm256_setzero_pd(), \
  1637. (__mmask8)(U)))
  1638. #define _mm256_inserti64x2(X, Y, C) \
  1639. ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
  1640. (__v2di)(__m128i) (Y), (int) (C), \
  1641. (__v4di)(__m256i)_mm256_setzero_si256 (), \
  1642. (__mmask8)-1))
  1643. #define _mm256_mask_inserti64x2(W, U, X, Y, C) \
  1644. ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
  1645. (__v2di)(__m128i) (Y), (int) (C), \
  1646. (__v4di)(__m256i)(W), \
  1647. (__mmask8)(U)))
  1648. #define _mm256_maskz_inserti64x2(U, X, Y, C) \
  1649. ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
  1650. (__v2di)(__m128i) (Y), (int) (C), \
  1651. (__v4di)(__m256i)_mm256_setzero_si256 (), \
  1652. (__mmask8)(U)))
  1653. #define _mm256_extractf64x2_pd(X, C) \
  1654. ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
  1655. (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
  1656. #define _mm256_mask_extractf64x2_pd(W, U, X, C) \
  1657. ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
  1658. (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
  1659. #define _mm256_maskz_extractf64x2_pd(U, X, C) \
  1660. ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
  1661. (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
  1662. #define _mm256_extracti64x2_epi64(X, C) \
  1663. ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
  1664. (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
  1665. #define _mm256_mask_extracti64x2_epi64(W, U, X, C) \
  1666. ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
  1667. (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
  1668. #define _mm256_maskz_extracti64x2_epi64(U, X, C) \
  1669. ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
  1670. (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
  1671. #define _mm256_reduce_pd(A, B) \
  1672. ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
  1673. (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
  1674. #define _mm256_mask_reduce_pd(W, U, A, B) \
  1675. ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
  1676. (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
  1677. #define _mm256_maskz_reduce_pd(U, A, B) \
  1678. ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
  1679. (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
  1680. #define _mm_reduce_pd(A, B) \
  1681. ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
  1682. (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
  1683. #define _mm_mask_reduce_pd(W, U, A, B) \
  1684. ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
  1685. (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
  1686. #define _mm_maskz_reduce_pd(U, A, B) \
  1687. ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
  1688. (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
  1689. #define _mm256_reduce_ps(A, B) \
  1690. ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
  1691. (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
  1692. #define _mm256_mask_reduce_ps(W, U, A, B) \
  1693. ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
  1694. (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
  1695. #define _mm256_maskz_reduce_ps(U, A, B) \
  1696. ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
  1697. (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
  1698. #define _mm_reduce_ps(A, B) \
  1699. ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
  1700. (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
  1701. #define _mm_mask_reduce_ps(W, U, A, B) \
  1702. ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
  1703. (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
  1704. #define _mm_maskz_reduce_ps(U, A, B) \
  1705. ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
  1706. (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
  1707. #define _mm256_range_pd(A, B, C) \
  1708. ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
  1709. (__v4df)(__m256d)(B), (int)(C), \
  1710. (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
  1711. #define _mm256_maskz_range_pd(U, A, B, C) \
  1712. ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
  1713. (__v4df)(__m256d)(B), (int)(C), \
  1714. (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
  1715. #define _mm_range_pd(A, B, C) \
  1716. ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
  1717. (__v2df)(__m128d)(B), (int)(C), \
  1718. (__v2df)_mm_setzero_pd(), (__mmask8)-1))
  1719. #define _mm256_range_ps(A, B, C) \
  1720. ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
  1721. (__v8sf)(__m256)(B), (int)(C), \
  1722. (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
  1723. #define _mm256_mask_range_ps(W, U, A, B, C) \
  1724. ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
  1725. (__v8sf)(__m256)(B), (int)(C), \
  1726. (__v8sf)(__m256)(W), (__mmask8)(U)))
  1727. #define _mm256_maskz_range_ps(U, A, B, C) \
  1728. ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
  1729. (__v8sf)(__m256)(B), (int)(C), \
  1730. (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
  1731. #define _mm_range_ps(A, B, C) \
  1732. ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
  1733. (__v4sf)(__m128)(B), (int)(C), \
  1734. (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
  1735. #define _mm_mask_range_ps(W, U, A, B, C) \
  1736. ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
  1737. (__v4sf)(__m128)(B), (int)(C), \
  1738. (__v4sf)(__m128)(W), (__mmask8)(U)))
  1739. #define _mm_maskz_range_ps(U, A, B, C) \
  1740. ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
  1741. (__v4sf)(__m128)(B), (int)(C), \
  1742. (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
  1743. #define _mm256_mask_range_pd(W, U, A, B, C) \
  1744. ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
  1745. (__v4df)(__m256d)(B), (int)(C), \
  1746. (__v4df)(__m256d)(W), (__mmask8)(U)))
  1747. #define _mm_mask_range_pd(W, U, A, B, C) \
  1748. ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
  1749. (__v2df)(__m128d)(B), (int)(C), \
  1750. (__v2df)(__m128d)(W), (__mmask8)(U)))
  1751. #define _mm_maskz_range_pd(U, A, B, C) \
  1752. ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
  1753. (__v2df)(__m128d)(B), (int)(C), \
  1754. (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
  1755. #define _mm256_mask_fpclass_pd_mask(u, X, C) \
  1756. ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
  1757. (int) (C),(__mmask8)(u)))
  1758. #define _mm256_mask_fpclass_ps_mask(u, X, C) \
  1759. ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
  1760. (int) (C),(__mmask8)(u)))
  1761. #define _mm_mask_fpclass_pd_mask(u, X, C) \
  1762. ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
  1763. (int) (C),(__mmask8)(u)))
  1764. #define _mm_mask_fpclass_ps_mask(u, X, C) \
  1765. ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
  1766. (int) (C),(__mmask8)(u)))
  1767. #define _mm256_fpclass_pd_mask(X, C) \
  1768. ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
  1769. (int) (C),(__mmask8)-1))
  1770. #define _mm256_fpclass_ps_mask(X, C) \
  1771. ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
  1772. (int) (C),(__mmask8)-1))
  1773. #define _mm_fpclass_pd_mask(X, C) \
  1774. ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
  1775. (int) (C),(__mmask8)-1))
  1776. #define _mm_fpclass_ps_mask(X, C) \
  1777. ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
  1778. (int) (C),(__mmask8)-1))
  1779. #endif
  1780. #ifdef __DISABLE_AVX512VLDQ__
  1781. #undef __DISABLE_AVX512VLDQ__
  1782. #pragma GCC pop_options
  1783. #endif /* __DISABLE_AVX512VLDQ__ */
  1784. #endif /* _AVX512VLDQINTRIN_H_INCLUDED */