avx512fp16vlintrin.h 94 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362
  1. /* Copyright (C) 2019-2022 Free Software Foundation, Inc.
  2. This file is part of GCC.
  3. GCC is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. GCC is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. Under Section 7 of GPL version 3, you are granted additional
  12. permissions described in the GCC Runtime Library Exception, version
  13. 3.1, as published by the Free Software Foundation.
  14. You should have received a copy of the GNU General Public License and
  15. a copy of the GCC Runtime Library Exception along with this program;
  16. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  17. <http://www.gnu.org/licenses/>. */
  18. #ifndef _IMMINTRIN_H_INCLUDED
  19. #error "Never use <avx512fp16vlintrin.h> directly; include <immintrin.h> instead."
  20. #endif
  21. #ifndef __AVX512FP16VLINTRIN_H_INCLUDED
  22. #define __AVX512FP16VLINTRIN_H_INCLUDED
  23. #if !defined(__AVX512VL__) || !defined(__AVX512FP16__)
  24. #pragma GCC push_options
  25. #pragma GCC target("avx512fp16,avx512vl")
  26. #define __DISABLE_AVX512FP16VL__
  27. #endif /* __AVX512FP16VL__ */
  28. extern __inline __m128
  29. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  30. _mm_castph_ps (__m128h __a)
  31. {
  32. return (__m128) __a;
  33. }
  34. extern __inline __m256
  35. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  36. _mm256_castph_ps (__m256h __a)
  37. {
  38. return (__m256) __a;
  39. }
  40. extern __inline __m128d
  41. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  42. _mm_castph_pd (__m128h __a)
  43. {
  44. return (__m128d) __a;
  45. }
  46. extern __inline __m256d
  47. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  48. _mm256_castph_pd (__m256h __a)
  49. {
  50. return (__m256d) __a;
  51. }
  52. extern __inline __m128i
  53. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  54. _mm_castph_si128 (__m128h __a)
  55. {
  56. return (__m128i) __a;
  57. }
  58. extern __inline __m256i
  59. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  60. _mm256_castph_si256 (__m256h __a)
  61. {
  62. return (__m256i) __a;
  63. }
  64. extern __inline __m128h
  65. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  66. _mm_castps_ph (__m128 __a)
  67. {
  68. return (__m128h) __a;
  69. }
  70. extern __inline __m256h
  71. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  72. _mm256_castps_ph (__m256 __a)
  73. {
  74. return (__m256h) __a;
  75. }
  76. extern __inline __m128h
  77. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  78. _mm_castpd_ph (__m128d __a)
  79. {
  80. return (__m128h) __a;
  81. }
  82. extern __inline __m256h
  83. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  84. _mm256_castpd_ph (__m256d __a)
  85. {
  86. return (__m256h) __a;
  87. }
  88. extern __inline __m128h
  89. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  90. _mm_castsi128_ph (__m128i __a)
  91. {
  92. return (__m128h) __a;
  93. }
  94. extern __inline __m256h
  95. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  96. _mm256_castsi256_ph (__m256i __a)
  97. {
  98. return (__m256h) __a;
  99. }
  100. extern __inline __m128h
  101. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  102. _mm256_castph256_ph128 (__m256h __A)
  103. {
  104. union
  105. {
  106. __m128h a[2];
  107. __m256h v;
  108. } u = { .v = __A };
  109. return u.a[0];
  110. }
  111. extern __inline __m256h
  112. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  113. _mm256_castph128_ph256 (__m128h __A)
  114. {
  115. union
  116. {
  117. __m128h a[2];
  118. __m256h v;
  119. } u;
  120. u.a[0] = __A;
  121. return u.v;
  122. }
  123. extern __inline __m256h
  124. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  125. _mm256_zextph128_ph256 (__m128h __A)
  126. {
  127. return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (),
  128. (__m128) __A, 0);
  129. }
  130. extern __inline __m256h
  131. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  132. _mm256_conj_pch (__m256h __A)
  133. {
  134. return (__m256h) _mm256_xor_epi32 ((__m256i) __A, _mm256_set1_epi32 (1<<31));
  135. }
  136. extern __inline __m256h
  137. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  138. _mm256_mask_conj_pch (__m256h __W, __mmask8 __U, __m256h __A)
  139. {
  140. return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf)
  141. _mm256_conj_pch (__A),
  142. (__v8sf) __W,
  143. (__mmask8) __U);
  144. }
  145. extern __inline __m256h
  146. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  147. _mm256_maskz_conj_pch (__mmask8 __U, __m256h __A)
  148. {
  149. return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf)
  150. _mm256_conj_pch (__A),
  151. (__v8sf)
  152. _mm256_setzero_ps (),
  153. (__mmask8) __U);
  154. }
  155. extern __inline __m128h
  156. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  157. _mm_conj_pch (__m128h __A)
  158. {
  159. return (__m128h) _mm_xor_epi32 ((__m128i) __A, _mm_set1_epi32 (1<<31));
  160. }
  161. extern __inline __m128h
  162. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  163. _mm_mask_conj_pch (__m128h __W, __mmask8 __U, __m128h __A)
  164. {
  165. return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A),
  166. (__v4sf) __W,
  167. (__mmask8) __U);
  168. }
  169. extern __inline __m128h
  170. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  171. _mm_maskz_conj_pch (__mmask8 __U, __m128h __A)
  172. {
  173. return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A),
  174. (__v4sf) _mm_setzero_ps (),
  175. (__mmask8) __U);
  176. }
  177. /* Intrinsics v[add,sub,mul,div]ph. */
  178. extern __inline __m128h
  179. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  180. _mm_add_ph (__m128h __A, __m128h __B)
  181. {
  182. return (__m128h) ((__v8hf) __A + (__v8hf) __B);
  183. }
  184. extern __inline __m256h
  185. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  186. _mm256_add_ph (__m256h __A, __m256h __B)
  187. {
  188. return (__m256h) ((__v16hf) __A + (__v16hf) __B);
  189. }
  190. extern __inline __m128h
  191. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  192. _mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  193. {
  194. return __builtin_ia32_addph128_mask (__C, __D, __A, __B);
  195. }
  196. extern __inline __m256h
  197. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  198. _mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
  199. {
  200. return __builtin_ia32_addph256_mask (__C, __D, __A, __B);
  201. }
  202. extern __inline __m128h
  203. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  204. _mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C)
  205. {
  206. return __builtin_ia32_addph128_mask (__B, __C, _mm_setzero_ph (),
  207. __A);
  208. }
  209. extern __inline __m256h
  210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  211. _mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C)
  212. {
  213. return __builtin_ia32_addph256_mask (__B, __C,
  214. _mm256_setzero_ph (), __A);
  215. }
  216. extern __inline __m128h
  217. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  218. _mm_sub_ph (__m128h __A, __m128h __B)
  219. {
  220. return (__m128h) ((__v8hf) __A - (__v8hf) __B);
  221. }
  222. extern __inline __m256h
  223. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  224. _mm256_sub_ph (__m256h __A, __m256h __B)
  225. {
  226. return (__m256h) ((__v16hf) __A - (__v16hf) __B);
  227. }
  228. extern __inline __m128h
  229. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  230. _mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  231. {
  232. return __builtin_ia32_subph128_mask (__C, __D, __A, __B);
  233. }
  234. extern __inline __m256h
  235. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  236. _mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
  237. {
  238. return __builtin_ia32_subph256_mask (__C, __D, __A, __B);
  239. }
  240. extern __inline __m128h
  241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  242. _mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C)
  243. {
  244. return __builtin_ia32_subph128_mask (__B, __C, _mm_setzero_ph (),
  245. __A);
  246. }
  247. extern __inline __m256h
  248. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  249. _mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C)
  250. {
  251. return __builtin_ia32_subph256_mask (__B, __C,
  252. _mm256_setzero_ph (), __A);
  253. }
  254. extern __inline __m128h
  255. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  256. _mm_mul_ph (__m128h __A, __m128h __B)
  257. {
  258. return (__m128h) ((__v8hf) __A * (__v8hf) __B);
  259. }
  260. extern __inline __m256h
  261. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  262. _mm256_mul_ph (__m256h __A, __m256h __B)
  263. {
  264. return (__m256h) ((__v16hf) __A * (__v16hf) __B);
  265. }
  266. extern __inline __m128h
  267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  268. _mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  269. {
  270. return __builtin_ia32_mulph128_mask (__C, __D, __A, __B);
  271. }
  272. extern __inline __m256h
  273. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  274. _mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
  275. {
  276. return __builtin_ia32_mulph256_mask (__C, __D, __A, __B);
  277. }
  278. extern __inline __m128h
  279. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  280. _mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C)
  281. {
  282. return __builtin_ia32_mulph128_mask (__B, __C, _mm_setzero_ph (),
  283. __A);
  284. }
  285. extern __inline __m256h
  286. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  287. _mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C)
  288. {
  289. return __builtin_ia32_mulph256_mask (__B, __C,
  290. _mm256_setzero_ph (), __A);
  291. }
  292. extern __inline __m128h
  293. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  294. _mm_div_ph (__m128h __A, __m128h __B)
  295. {
  296. return (__m128h) ((__v8hf) __A / (__v8hf) __B);
  297. }
  298. extern __inline __m256h
  299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  300. _mm256_div_ph (__m256h __A, __m256h __B)
  301. {
  302. return (__m256h) ((__v16hf) __A / (__v16hf) __B);
  303. }
  304. extern __inline __m128h
  305. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  306. _mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  307. {
  308. return __builtin_ia32_divph128_mask (__C, __D, __A, __B);
  309. }
  310. extern __inline __m256h
  311. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  312. _mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
  313. {
  314. return __builtin_ia32_divph256_mask (__C, __D, __A, __B);
  315. }
  316. extern __inline __m128h
  317. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  318. _mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C)
  319. {
  320. return __builtin_ia32_divph128_mask (__B, __C, _mm_setzero_ph (),
  321. __A);
  322. }
  323. extern __inline __m256h
  324. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  325. _mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C)
  326. {
  327. return __builtin_ia32_divph256_mask (__B, __C,
  328. _mm256_setzero_ph (), __A);
  329. }
  330. /* Intrinsics v[max,min]ph. */
  331. extern __inline __m128h
  332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  333. _mm_max_ph (__m128h __A, __m128h __B)
  334. {
  335. return __builtin_ia32_maxph128_mask (__A, __B,
  336. _mm_setzero_ph (),
  337. (__mmask8) -1);
  338. }
  339. extern __inline __m256h
  340. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  341. _mm256_max_ph (__m256h __A, __m256h __B)
  342. {
  343. return __builtin_ia32_maxph256_mask (__A, __B,
  344. _mm256_setzero_ph (),
  345. (__mmask16) -1);
  346. }
  347. extern __inline __m128h
  348. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  349. _mm_mask_max_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  350. {
  351. return __builtin_ia32_maxph128_mask (__C, __D, __A, __B);
  352. }
  353. extern __inline __m256h
  354. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  355. _mm256_mask_max_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
  356. {
  357. return __builtin_ia32_maxph256_mask (__C, __D, __A, __B);
  358. }
  359. extern __inline __m128h
  360. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  361. _mm_maskz_max_ph (__mmask8 __A, __m128h __B, __m128h __C)
  362. {
  363. return __builtin_ia32_maxph128_mask (__B, __C, _mm_setzero_ph (),
  364. __A);
  365. }
  366. extern __inline __m256h
  367. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  368. _mm256_maskz_max_ph (__mmask16 __A, __m256h __B, __m256h __C)
  369. {
  370. return __builtin_ia32_maxph256_mask (__B, __C,
  371. _mm256_setzero_ph (), __A);
  372. }
  373. extern __inline __m128h
  374. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  375. _mm_min_ph (__m128h __A, __m128h __B)
  376. {
  377. return __builtin_ia32_minph128_mask (__A, __B,
  378. _mm_setzero_ph (),
  379. (__mmask8) -1);
  380. }
  381. extern __inline __m256h
  382. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  383. _mm256_min_ph (__m256h __A, __m256h __B)
  384. {
  385. return __builtin_ia32_minph256_mask (__A, __B,
  386. _mm256_setzero_ph (),
  387. (__mmask16) -1);
  388. }
  389. extern __inline __m128h
  390. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  391. _mm_mask_min_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  392. {
  393. return __builtin_ia32_minph128_mask (__C, __D, __A, __B);
  394. }
  395. extern __inline __m256h
  396. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  397. _mm256_mask_min_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
  398. {
  399. return __builtin_ia32_minph256_mask (__C, __D, __A, __B);
  400. }
  401. extern __inline __m128h
  402. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  403. _mm_maskz_min_ph (__mmask8 __A, __m128h __B, __m128h __C)
  404. {
  405. return __builtin_ia32_minph128_mask (__B, __C, _mm_setzero_ph (),
  406. __A);
  407. }
  408. extern __inline __m256h
  409. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  410. _mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C)
  411. {
  412. return __builtin_ia32_minph256_mask (__B, __C,
  413. _mm256_setzero_ph (), __A);
  414. }
  415. extern __inline __m128h
  416. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  417. _mm_abs_ph (__m128h __A)
  418. {
  419. return (__m128h) _mm_and_si128 ( _mm_set1_epi32 (0x7FFF7FFF),
  420. (__m128i) __A);
  421. }
  422. extern __inline __m256h
  423. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  424. _mm256_abs_ph (__m256h __A)
  425. {
  426. return (__m256h) _mm256_and_si256 ( _mm256_set1_epi32 (0x7FFF7FFF),
  427. (__m256i) __A);
  428. }
  429. /* vcmpph */
  430. #ifdef __OPTIMIZE
  431. extern __inline __mmask8
  432. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  433. _mm_cmp_ph_mask (__m128h __A, __m128h __B, const int __C)
  434. {
  435. return (__mmask8) __builtin_ia32_cmpph128_mask (__A, __B, __C,
  436. (__mmask8) -1);
  437. }
  438. extern __inline __mmask8
  439. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  440. _mm_mask_cmp_ph_mask (__mmask8 __A, __m128h __B, __m128h __C,
  441. const int __D)
  442. {
  443. return (__mmask8) __builtin_ia32_cmpph128_mask (__B, __C, __D, __A);
  444. }
  445. extern __inline __mmask16
  446. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  447. _mm_cmp_ph_mask (__m256h __A, __m256h __B, const int __C)
  448. {
  449. return (__mmask16) __builtin_ia32_cmpph256_mask (__A, __B, __C,
  450. (__mmask16) -1);
  451. }
  452. extern __inline __mmask16
  453. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  454. _mm_mask_cmp_ph_mask (__mmask16 __A, __m256h __B, __m256h __C,
  455. const int __D)
  456. {
  457. return (__mmask16) __builtin_ia32_cmpph256_mask (__B, __C, __D,
  458. __A);
  459. }
  460. #else
  461. #define _mm_cmp_ph_mask(A, B, C) \
  462. (__builtin_ia32_cmpph128_mask ((A), (B), (C), (-1)))
  463. #define _mm_mask_cmp_ph_mask(A, B, C, D) \
  464. (__builtin_ia32_cmpph128_mask ((B), (C), (D), (A)))
  465. #define _mm256_cmp_ph_mask(A, B, C) \
  466. (__builtin_ia32_cmpph256_mask ((A), (B), (C), (-1)))
  467. #define _mm256_mask_cmp_ph_mask(A, B, C, D) \
  468. (__builtin_ia32_cmpph256_mask ((B), (C), (D), (A)))
  469. #endif /* __OPTIMIZE__ */
  470. /* Intrinsics vsqrtph. */
  471. extern __inline __m128h
  472. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  473. _mm_sqrt_ph (__m128h __A)
  474. {
  475. return __builtin_ia32_sqrtph128_mask (__A, _mm_setzero_ph (),
  476. (__mmask8) -1);
  477. }
  478. extern __inline __m256h
  479. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  480. _mm256_sqrt_ph (__m256h __A)
  481. {
  482. return __builtin_ia32_sqrtph256_mask (__A, _mm256_setzero_ph (),
  483. (__mmask16) -1);
  484. }
  485. extern __inline __m128h
  486. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  487. _mm_mask_sqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
  488. {
  489. return __builtin_ia32_sqrtph128_mask (__C, __A, __B);
  490. }
  491. extern __inline __m256h
  492. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  493. _mm256_mask_sqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
  494. {
  495. return __builtin_ia32_sqrtph256_mask (__C, __A, __B);
  496. }
  497. extern __inline __m128h
  498. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  499. _mm_maskz_sqrt_ph (__mmask8 __A, __m128h __B)
  500. {
  501. return __builtin_ia32_sqrtph128_mask (__B, _mm_setzero_ph (),
  502. __A);
  503. }
  504. extern __inline __m256h
  505. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  506. _mm256_maskz_sqrt_ph (__mmask16 __A, __m256h __B)
  507. {
  508. return __builtin_ia32_sqrtph256_mask (__B, _mm256_setzero_ph (),
  509. __A);
  510. }
  511. /* Intrinsics vrsqrtph. */
  512. extern __inline __m128h
  513. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  514. _mm_rsqrt_ph (__m128h __A)
  515. {
  516. return __builtin_ia32_rsqrtph128_mask (__A, _mm_setzero_ph (),
  517. (__mmask8) -1);
  518. }
  519. extern __inline __m256h
  520. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  521. _mm256_rsqrt_ph (__m256h __A)
  522. {
  523. return __builtin_ia32_rsqrtph256_mask (__A, _mm256_setzero_ph (),
  524. (__mmask16) -1);
  525. }
  526. extern __inline __m128h
  527. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  528. _mm_mask_rsqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
  529. {
  530. return __builtin_ia32_rsqrtph128_mask (__C, __A, __B);
  531. }
  532. extern __inline __m256h
  533. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  534. _mm256_mask_rsqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
  535. {
  536. return __builtin_ia32_rsqrtph256_mask (__C, __A, __B);
  537. }
  538. extern __inline __m128h
  539. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  540. _mm_maskz_rsqrt_ph (__mmask8 __A, __m128h __B)
  541. {
  542. return __builtin_ia32_rsqrtph128_mask (__B, _mm_setzero_ph (), __A);
  543. }
  544. extern __inline __m256h
  545. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  546. _mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B)
  547. {
  548. return __builtin_ia32_rsqrtph256_mask (__B, _mm256_setzero_ph (),
  549. __A);
  550. }
  551. /* Intrinsics vrcpph. */
  552. extern __inline __m128h
  553. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  554. _mm_rcp_ph (__m128h __A)
  555. {
  556. return __builtin_ia32_rcpph128_mask (__A, _mm_setzero_ph (),
  557. (__mmask8) -1);
  558. }
  559. extern __inline __m256h
  560. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  561. _mm256_rcp_ph (__m256h __A)
  562. {
  563. return __builtin_ia32_rcpph256_mask (__A, _mm256_setzero_ph (),
  564. (__mmask16) -1);
  565. }
  566. extern __inline __m128h
  567. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  568. _mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C)
  569. {
  570. return __builtin_ia32_rcpph128_mask (__C, __A, __B);
  571. }
  572. extern __inline __m256h
  573. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  574. _mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C)
  575. {
  576. return __builtin_ia32_rcpph256_mask (__C, __A, __B);
  577. }
  578. extern __inline __m128h
  579. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  580. _mm_maskz_rcp_ph (__mmask8 __A, __m128h __B)
  581. {
  582. return __builtin_ia32_rcpph128_mask (__B, _mm_setzero_ph (), __A);
  583. }
  584. extern __inline __m256h
  585. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  586. _mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B)
  587. {
  588. return __builtin_ia32_rcpph256_mask (__B, _mm256_setzero_ph (),
  589. __A);
  590. }
  591. /* Intrinsics vscalefph. */
  592. extern __inline __m128h
  593. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  594. _mm_scalef_ph (__m128h __A, __m128h __B)
  595. {
  596. return __builtin_ia32_scalefph128_mask (__A, __B,
  597. _mm_setzero_ph (),
  598. (__mmask8) -1);
  599. }
  600. extern __inline __m256h
  601. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  602. _mm256_scalef_ph (__m256h __A, __m256h __B)
  603. {
  604. return __builtin_ia32_scalefph256_mask (__A, __B,
  605. _mm256_setzero_ph (),
  606. (__mmask16) -1);
  607. }
  608. extern __inline __m128h
  609. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  610. _mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  611. {
  612. return __builtin_ia32_scalefph128_mask (__C, __D, __A, __B);
  613. }
  614. extern __inline __m256h
  615. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  616. _mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C,
  617. __m256h __D)
  618. {
  619. return __builtin_ia32_scalefph256_mask (__C, __D, __A, __B);
  620. }
  621. extern __inline __m128h
  622. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  623. _mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C)
  624. {
  625. return __builtin_ia32_scalefph128_mask (__B, __C,
  626. _mm_setzero_ph (), __A);
  627. }
  628. extern __inline __m256h
  629. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  630. _mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
  631. {
  632. return __builtin_ia32_scalefph256_mask (__B, __C,
  633. _mm256_setzero_ph (),
  634. __A);
  635. }
  636. /* Intrinsics vreduceph. */
  637. #ifdef __OPTIMIZE__
  638. extern __inline __m128h
  639. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  640. _mm_reduce_ph (__m128h __A, int __B)
  641. {
  642. return __builtin_ia32_reduceph128_mask (__A, __B,
  643. _mm_setzero_ph (),
  644. (__mmask8) -1);
  645. }
  646. extern __inline __m128h
  647. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  648. _mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
  649. {
  650. return __builtin_ia32_reduceph128_mask (__C, __D, __A, __B);
  651. }
  652. extern __inline __m128h
  653. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  654. _mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C)
  655. {
  656. return __builtin_ia32_reduceph128_mask (__B, __C,
  657. _mm_setzero_ph (), __A);
  658. }
  659. extern __inline __m256h
  660. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  661. _mm256_reduce_ph (__m256h __A, int __B)
  662. {
  663. return __builtin_ia32_reduceph256_mask (__A, __B,
  664. _mm256_setzero_ph (),
  665. (__mmask16) -1);
  666. }
  667. extern __inline __m256h
  668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  669. _mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D)
  670. {
  671. return __builtin_ia32_reduceph256_mask (__C, __D, __A, __B);
  672. }
  673. extern __inline __m256h
  674. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  675. _mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C)
  676. {
  677. return __builtin_ia32_reduceph256_mask (__B, __C,
  678. _mm256_setzero_ph (),
  679. __A);
  680. }
  681. #else
  682. #define _mm_reduce_ph(A, B) \
  683. (__builtin_ia32_reduceph128_mask ((A), (B), \
  684. _mm_setzero_ph (), \
  685. ((__mmask8)-1)))
  686. #define _mm_mask_reduce_ph(A, B, C, D) \
  687. (__builtin_ia32_reduceph128_mask ((C), (D), (A), (B)))
  688. #define _mm_maskz_reduce_ph(A, B, C) \
  689. (__builtin_ia32_reduceph128_mask ((B), (C), _mm_setzero_ph (), (A)))
  690. #define _mm256_reduce_ph(A, B) \
  691. (__builtin_ia32_reduceph256_mask ((A), (B), \
  692. _mm256_setzero_ph (), \
  693. ((__mmask16)-1)))
  694. #define _mm256_mask_reduce_ph(A, B, C, D) \
  695. (__builtin_ia32_reduceph256_mask ((C), (D), (A), (B)))
  696. #define _mm256_maskz_reduce_ph(A, B, C) \
  697. (__builtin_ia32_reduceph256_mask ((B), (C), _mm256_setzero_ph (), (A)))
  698. #endif /* __OPTIMIZE__ */
  699. /* Intrinsics vrndscaleph. */
  700. #ifdef __OPTIMIZE__
  701. extern __inline __m128h
  702. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  703. _mm_roundscale_ph (__m128h __A, int __B)
  704. {
  705. return __builtin_ia32_rndscaleph128_mask (__A, __B,
  706. _mm_setzero_ph (),
  707. (__mmask8) -1);
  708. }
  709. extern __inline __m128h
  710. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  711. _mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
  712. {
  713. return __builtin_ia32_rndscaleph128_mask (__C, __D, __A, __B);
  714. }
  715. extern __inline __m128h
  716. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  717. _mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C)
  718. {
  719. return __builtin_ia32_rndscaleph128_mask (__B, __C,
  720. _mm_setzero_ph (), __A);
  721. }
  722. extern __inline __m256h
  723. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  724. _mm256_roundscale_ph (__m256h __A, int __B)
  725. {
  726. return __builtin_ia32_rndscaleph256_mask (__A, __B,
  727. _mm256_setzero_ph (),
  728. (__mmask16) -1);
  729. }
  730. extern __inline __m256h
  731. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  732. _mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C,
  733. int __D)
  734. {
  735. return __builtin_ia32_rndscaleph256_mask (__C, __D, __A, __B);
  736. }
  737. extern __inline __m256h
  738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  739. _mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
  740. {
  741. return __builtin_ia32_rndscaleph256_mask (__B, __C,
  742. _mm256_setzero_ph (),
  743. __A);
  744. }
  745. #else
  746. #define _mm_roundscale_ph(A, B) \
  747. (__builtin_ia32_rndscaleph128_mask ((A), (B), _mm_setzero_ph (), \
  748. ((__mmask8)-1)))
  749. #define _mm_mask_roundscale_ph(A, B, C, D) \
  750. (__builtin_ia32_rndscaleph128_mask ((C), (D), (A), (B)))
  751. #define _mm_maskz_roundscale_ph(A, B, C) \
  752. (__builtin_ia32_rndscaleph128_mask ((B), (C), _mm_setzero_ph (), (A)))
  753. #define _mm256_roundscale_ph(A, B) \
  754. (__builtin_ia32_rndscaleph256_mask ((A), (B), \
  755. _mm256_setzero_ph(), \
  756. ((__mmask16)-1)))
  757. #define _mm256_mask_roundscale_ph(A, B, C, D) \
  758. (__builtin_ia32_rndscaleph256_mask ((C), (D), (A), (B)))
  759. #define _mm256_maskz_roundscale_ph(A, B, C) \
  760. (__builtin_ia32_rndscaleph256_mask ((B), (C), \
  761. _mm256_setzero_ph (), (A)))
  762. #endif /* __OPTIMIZE__ */
  763. /* Intrinsics vfpclassph. */
  764. #ifdef __OPTIMIZE__
  765. extern __inline __mmask8
  766. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  767. _mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm)
  768. {
  769. return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
  770. __imm, __U);
  771. }
  772. extern __inline __mmask8
  773. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  774. _mm_fpclass_ph_mask (__m128h __A, const int __imm)
  775. {
  776. return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
  777. __imm,
  778. (__mmask8) -1);
  779. }
  780. extern __inline __mmask16
  781. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  782. _mm256_mask_fpclass_ph_mask (__mmask16 __U, __m256h __A, const int __imm)
  783. {
  784. return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
  785. __imm, __U);
  786. }
  787. extern __inline __mmask16
  788. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  789. _mm256_fpclass_ph_mask (__m256h __A, const int __imm)
  790. {
  791. return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
  792. __imm,
  793. (__mmask16) -1);
  794. }
  795. #else
  796. #define _mm_fpclass_ph_mask(X, C) \
  797. ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), \
  798. (int) (C),(__mmask8)-1))
  799. #define _mm_mask_fpclass_ph_mask(u, X, C) \
  800. ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), \
  801. (int) (C),(__mmask8)(u)))
  802. #define _mm256_fpclass_ph_mask(X, C) \
  803. ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
  804. (int) (C),(__mmask16)-1))
  805. #define _mm256_mask_fpclass_ph_mask(u, X, C) \
  806. ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
  807. (int) (C),(__mmask16)(u)))
  808. #endif /* __OPTIMIZE__ */
  809. /* Intrinsics vgetexpph, vgetexpsh. */
  810. extern __inline __m256h
  811. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  812. _mm256_getexp_ph (__m256h __A)
  813. {
  814. return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
  815. (__v16hf)
  816. _mm256_setzero_ph (),
  817. (__mmask16) -1);
  818. }
  819. extern __inline __m256h
  820. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  821. _mm256_mask_getexp_ph (__m256h __W, __mmask16 __U, __m256h __A)
  822. {
  823. return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
  824. (__v16hf) __W,
  825. (__mmask16) __U);
  826. }
  827. extern __inline __m256h
  828. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  829. _mm256_maskz_getexp_ph (__mmask16 __U, __m256h __A)
  830. {
  831. return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
  832. (__v16hf)
  833. _mm256_setzero_ph (),
  834. (__mmask16) __U);
  835. }
  836. extern __inline __m128h
  837. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  838. _mm_getexp_ph (__m128h __A)
  839. {
  840. return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
  841. (__v8hf)
  842. _mm_setzero_ph (),
  843. (__mmask8) -1);
  844. }
  845. extern __inline __m128h
  846. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  847. _mm_mask_getexp_ph (__m128h __W, __mmask8 __U, __m128h __A)
  848. {
  849. return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
  850. (__v8hf) __W,
  851. (__mmask8) __U);
  852. }
  853. extern __inline __m128h
  854. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  855. _mm_maskz_getexp_ph (__mmask8 __U, __m128h __A)
  856. {
  857. return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
  858. (__v8hf)
  859. _mm_setzero_ph (),
  860. (__mmask8) __U);
  861. }
  862. /* Intrinsics vgetmantph, vgetmantsh. */
  863. #ifdef __OPTIMIZE__
  864. extern __inline __m256h
  865. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  866. _mm256_getmant_ph (__m256h __A, _MM_MANTISSA_NORM_ENUM __B,
  867. _MM_MANTISSA_SIGN_ENUM __C)
  868. {
  869. return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
  870. (__C << 2) | __B,
  871. (__v16hf)
  872. _mm256_setzero_ph (),
  873. (__mmask16) -1);
  874. }
  875. extern __inline __m256h
  876. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  877. _mm256_mask_getmant_ph (__m256h __W, __mmask16 __U, __m256h __A,
  878. _MM_MANTISSA_NORM_ENUM __B,
  879. _MM_MANTISSA_SIGN_ENUM __C)
  880. {
  881. return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
  882. (__C << 2) | __B,
  883. (__v16hf) __W,
  884. (__mmask16) __U);
  885. }
  886. extern __inline __m256h
  887. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  888. _mm256_maskz_getmant_ph (__mmask16 __U, __m256h __A,
  889. _MM_MANTISSA_NORM_ENUM __B,
  890. _MM_MANTISSA_SIGN_ENUM __C)
  891. {
  892. return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
  893. (__C << 2) | __B,
  894. (__v16hf)
  895. _mm256_setzero_ph (),
  896. (__mmask16) __U);
  897. }
  898. extern __inline __m128h
  899. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  900. _mm_getmant_ph (__m128h __A, _MM_MANTISSA_NORM_ENUM __B,
  901. _MM_MANTISSA_SIGN_ENUM __C)
  902. {
  903. return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
  904. (__C << 2) | __B,
  905. (__v8hf)
  906. _mm_setzero_ph (),
  907. (__mmask8) -1);
  908. }
  909. extern __inline __m128h
  910. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  911. _mm_mask_getmant_ph (__m128h __W, __mmask8 __U, __m128h __A,
  912. _MM_MANTISSA_NORM_ENUM __B,
  913. _MM_MANTISSA_SIGN_ENUM __C)
  914. {
  915. return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
  916. (__C << 2) | __B,
  917. (__v8hf) __W,
  918. (__mmask8) __U);
  919. }
  920. extern __inline __m128h
  921. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  922. _mm_maskz_getmant_ph (__mmask8 __U, __m128h __A,
  923. _MM_MANTISSA_NORM_ENUM __B,
  924. _MM_MANTISSA_SIGN_ENUM __C)
  925. {
  926. return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
  927. (__C << 2) | __B,
  928. (__v8hf)
  929. _mm_setzero_ph (),
  930. (__mmask8) __U);
  931. }
  932. #else
  933. #define _mm256_getmant_ph(X, B, C) \
  934. ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
  935. (int)(((C)<<2) | (B)), \
  936. (__v16hf)(__m256h)_mm256_setzero_ph (), \
  937. (__mmask16)-1))
  938. #define _mm256_mask_getmant_ph(W, U, X, B, C) \
  939. ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
  940. (int)(((C)<<2) | (B)), \
  941. (__v16hf)(__m256h)(W), \
  942. (__mmask16)(U)))
  943. #define _mm256_maskz_getmant_ph(U, X, B, C) \
  944. ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
  945. (int)(((C)<<2) | (B)), \
  946. (__v16hf)(__m256h)_mm256_setzero_ph (), \
  947. (__mmask16)(U)))
  948. #define _mm_getmant_ph(X, B, C) \
  949. ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \
  950. (int)(((C)<<2) | (B)), \
  951. (__v8hf)(__m128h)_mm_setzero_ph (), \
  952. (__mmask8)-1))
  953. #define _mm_mask_getmant_ph(W, U, X, B, C) \
  954. ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \
  955. (int)(((C)<<2) | (B)), \
  956. (__v8hf)(__m128h)(W), \
  957. (__mmask8)(U)))
  958. #define _mm_maskz_getmant_ph(U, X, B, C) \
  959. ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \
  960. (int)(((C)<<2) | (B)), \
  961. (__v8hf)(__m128h)_mm_setzero_ph (), \
  962. (__mmask8)(U)))
  963. #endif /* __OPTIMIZE__ */
  964. /* Intrinsics vcvtph2dq. */
  965. extern __inline __m128i
  966. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  967. _mm_cvtph_epi32 (__m128h __A)
  968. {
  969. return (__m128i)
  970. __builtin_ia32_vcvtph2dq128_mask (__A,
  971. (__v4si)
  972. _mm_setzero_si128 (),
  973. (__mmask8) -1);
  974. }
  975. extern __inline __m128i
  976. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  977. _mm_mask_cvtph_epi32 (__m128i __A, __mmask8 __B, __m128h __C)
  978. {
  979. return (__m128i)
  980. __builtin_ia32_vcvtph2dq128_mask (__C, ( __v4si) __A, __B);
  981. }
  982. extern __inline __m128i
  983. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  984. _mm_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
  985. {
  986. return (__m128i)
  987. __builtin_ia32_vcvtph2dq128_mask (__B,
  988. (__v4si) _mm_setzero_si128 (),
  989. __A);
  990. }
  991. extern __inline __m256i
  992. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  993. _mm256_cvtph_epi32 (__m128h __A)
  994. {
  995. return (__m256i)
  996. __builtin_ia32_vcvtph2dq256_mask (__A,
  997. (__v8si)
  998. _mm256_setzero_si256 (),
  999. (__mmask8) -1);
  1000. }
  1001. extern __inline __m256i
  1002. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1003. _mm256_mask_cvtph_epi32 (__m256i __A, __mmask8 __B, __m128h __C)
  1004. {
  1005. return (__m256i)
  1006. __builtin_ia32_vcvtph2dq256_mask (__C, ( __v8si) __A, __B);
  1007. }
  1008. extern __inline __m256i
  1009. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1010. _mm256_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
  1011. {
  1012. return (__m256i)
  1013. __builtin_ia32_vcvtph2dq256_mask (__B,
  1014. (__v8si)
  1015. _mm256_setzero_si256 (),
  1016. __A);
  1017. }
  1018. /* Intrinsics vcvtph2udq. */
  1019. extern __inline __m128i
  1020. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1021. _mm_cvtph_epu32 (__m128h __A)
  1022. {
  1023. return (__m128i)
  1024. __builtin_ia32_vcvtph2udq128_mask (__A,
  1025. (__v4si)
  1026. _mm_setzero_si128 (),
  1027. (__mmask8) -1);
  1028. }
  1029. extern __inline __m128i
  1030. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1031. _mm_mask_cvtph_epu32 (__m128i __A, __mmask8 __B, __m128h __C)
  1032. {
  1033. return (__m128i)
  1034. __builtin_ia32_vcvtph2udq128_mask (__C, ( __v4si) __A, __B);
  1035. }
  1036. extern __inline __m128i
  1037. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1038. _mm_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
  1039. {
  1040. return (__m128i)
  1041. __builtin_ia32_vcvtph2udq128_mask (__B,
  1042. (__v4si)
  1043. _mm_setzero_si128 (),
  1044. __A);
  1045. }
  1046. extern __inline __m256i
  1047. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1048. _mm256_cvtph_epu32 (__m128h __A)
  1049. {
  1050. return (__m256i)
  1051. __builtin_ia32_vcvtph2udq256_mask (__A,
  1052. (__v8si)
  1053. _mm256_setzero_si256 (),
  1054. (__mmask8) -1);
  1055. }
  1056. extern __inline __m256i
  1057. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1058. _mm256_mask_cvtph_epu32 (__m256i __A, __mmask8 __B, __m128h __C)
  1059. {
  1060. return (__m256i)
  1061. __builtin_ia32_vcvtph2udq256_mask (__C, ( __v8si) __A, __B);
  1062. }
  1063. extern __inline __m256i
  1064. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1065. _mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
  1066. {
  1067. return (__m256i)
  1068. __builtin_ia32_vcvtph2udq256_mask (__B,
  1069. (__v8si) _mm256_setzero_si256 (),
  1070. __A);
  1071. }
  1072. /* Intrinsics vcvttph2dq. */
  1073. extern __inline __m128i
  1074. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1075. _mm_cvttph_epi32 (__m128h __A)
  1076. {
  1077. return (__m128i)
  1078. __builtin_ia32_vcvttph2dq128_mask (__A,
  1079. (__v4si) _mm_setzero_si128 (),
  1080. (__mmask8) -1);
  1081. }
  1082. extern __inline __m128i
  1083. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1084. _mm_mask_cvttph_epi32 (__m128i __A, __mmask8 __B, __m128h __C)
  1085. {
  1086. return (__m128i)__builtin_ia32_vcvttph2dq128_mask (__C,
  1087. ( __v4si) __A,
  1088. __B);
  1089. }
  1090. extern __inline __m128i
  1091. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1092. _mm_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
  1093. {
  1094. return (__m128i)
  1095. __builtin_ia32_vcvttph2dq128_mask (__B,
  1096. (__v4si) _mm_setzero_si128 (),
  1097. __A);
  1098. }
  1099. extern __inline __m256i
  1100. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1101. _mm256_cvttph_epi32 (__m128h __A)
  1102. {
  1103. return (__m256i)
  1104. __builtin_ia32_vcvttph2dq256_mask (__A,
  1105. (__v8si)
  1106. _mm256_setzero_si256 (),
  1107. (__mmask8) -1);
  1108. }
  1109. extern __inline __m256i
  1110. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1111. _mm256_mask_cvttph_epi32 (__m256i __A, __mmask8 __B, __m128h __C)
  1112. {
  1113. return (__m256i)
  1114. __builtin_ia32_vcvttph2dq256_mask (__C,
  1115. ( __v8si) __A,
  1116. __B);
  1117. }
  1118. extern __inline __m256i
  1119. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1120. _mm256_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
  1121. {
  1122. return (__m256i)
  1123. __builtin_ia32_vcvttph2dq256_mask (__B,
  1124. (__v8si)
  1125. _mm256_setzero_si256 (),
  1126. __A);
  1127. }
  1128. /* Intrinsics vcvttph2udq. */
  1129. extern __inline __m128i
  1130. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1131. _mm_cvttph_epu32 (__m128h __A)
  1132. {
  1133. return (__m128i)
  1134. __builtin_ia32_vcvttph2udq128_mask (__A,
  1135. (__v4si)
  1136. _mm_setzero_si128 (),
  1137. (__mmask8) -1);
  1138. }
  1139. extern __inline __m128i
  1140. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1141. _mm_mask_cvttph_epu32 (__m128i __A, __mmask8 __B, __m128h __C)
  1142. {
  1143. return (__m128i)
  1144. __builtin_ia32_vcvttph2udq128_mask (__C,
  1145. ( __v4si) __A,
  1146. __B);
  1147. }
  1148. extern __inline __m128i
  1149. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1150. _mm_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
  1151. {
  1152. return (__m128i)
  1153. __builtin_ia32_vcvttph2udq128_mask (__B,
  1154. (__v4si)
  1155. _mm_setzero_si128 (),
  1156. __A);
  1157. }
  1158. extern __inline __m256i
  1159. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1160. _mm256_cvttph_epu32 (__m128h __A)
  1161. {
  1162. return (__m256i)
  1163. __builtin_ia32_vcvttph2udq256_mask (__A,
  1164. (__v8si)
  1165. _mm256_setzero_si256 (), (__mmask8) -1);
  1166. }
  1167. extern __inline __m256i
  1168. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1169. _mm256_mask_cvttph_epu32 (__m256i __A, __mmask8 __B, __m128h __C)
  1170. {
  1171. return (__m256i)
  1172. __builtin_ia32_vcvttph2udq256_mask (__C,
  1173. ( __v8si) __A,
  1174. __B);
  1175. }
  1176. extern __inline __m256i
  1177. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1178. _mm256_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
  1179. {
  1180. return (__m256i)
  1181. __builtin_ia32_vcvttph2udq256_mask (__B,
  1182. (__v8si)
  1183. _mm256_setzero_si256 (),
  1184. __A);
  1185. }
  1186. /* Intrinsics vcvtdq2ph. */
  1187. extern __inline __m128h
  1188. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1189. _mm_cvtepi32_ph (__m128i __A)
  1190. {
  1191. return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __A,
  1192. _mm_setzero_ph (),
  1193. (__mmask8) -1);
  1194. }
  1195. extern __inline __m128h
  1196. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1197. _mm_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m128i __C)
  1198. {
  1199. return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __C, __A, __B);
  1200. }
  1201. extern __inline __m128h
  1202. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1203. _mm_maskz_cvtepi32_ph (__mmask8 __A, __m128i __B)
  1204. {
  1205. return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __B,
  1206. _mm_setzero_ph (),
  1207. __A);
  1208. }
  1209. extern __inline __m128h
  1210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1211. _mm256_cvtepi32_ph (__m256i __A)
  1212. {
  1213. return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __A,
  1214. _mm_setzero_ph (),
  1215. (__mmask8) -1);
  1216. }
  1217. extern __inline __m128h
  1218. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1219. _mm256_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m256i __C)
  1220. {
  1221. return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __C, __A, __B);
  1222. }
  1223. extern __inline __m128h
  1224. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1225. _mm256_maskz_cvtepi32_ph (__mmask8 __A, __m256i __B)
  1226. {
  1227. return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __B,
  1228. _mm_setzero_ph (),
  1229. __A);
  1230. }
  1231. /* Intrinsics vcvtudq2ph. */
  1232. extern __inline __m128h
  1233. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1234. _mm_cvtepu32_ph (__m128i __A)
  1235. {
  1236. return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __A,
  1237. _mm_setzero_ph (),
  1238. (__mmask8) -1);
  1239. }
  1240. extern __inline __m128h
  1241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1242. _mm_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m128i __C)
  1243. {
  1244. return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __C,
  1245. __A,
  1246. __B);
  1247. }
  1248. extern __inline __m128h
  1249. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1250. _mm_maskz_cvtepu32_ph (__mmask8 __A, __m128i __B)
  1251. {
  1252. return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __B,
  1253. _mm_setzero_ph (),
  1254. __A);
  1255. }
  1256. extern __inline __m128h
  1257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1258. _mm256_cvtepu32_ph (__m256i __A)
  1259. {
  1260. return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __A,
  1261. _mm_setzero_ph (),
  1262. (__mmask8) -1);
  1263. }
  1264. extern __inline __m128h
  1265. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1266. _mm256_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m256i __C)
  1267. {
  1268. return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __C, __A, __B);
  1269. }
  1270. extern __inline __m128h
  1271. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1272. _mm256_maskz_cvtepu32_ph (__mmask8 __A, __m256i __B)
  1273. {
  1274. return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __B,
  1275. _mm_setzero_ph (),
  1276. __A);
  1277. }
  1278. /* Intrinsics vcvtph2qq. */
  1279. extern __inline __m128i
  1280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1281. _mm_cvtph_epi64 (__m128h __A)
  1282. {
  1283. return
  1284. __builtin_ia32_vcvtph2qq128_mask (__A,
  1285. _mm_setzero_si128 (),
  1286. (__mmask8) -1);
  1287. }
  1288. extern __inline __m128i
  1289. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1290. _mm_mask_cvtph_epi64 (__m128i __A, __mmask8 __B, __m128h __C)
  1291. {
  1292. return __builtin_ia32_vcvtph2qq128_mask (__C, __A, __B);
  1293. }
  1294. extern __inline __m128i
  1295. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1296. _mm_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
  1297. {
  1298. return __builtin_ia32_vcvtph2qq128_mask (__B,
  1299. _mm_setzero_si128 (),
  1300. __A);
  1301. }
  1302. extern __inline __m256i
  1303. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1304. _mm256_cvtph_epi64 (__m128h __A)
  1305. {
  1306. return __builtin_ia32_vcvtph2qq256_mask (__A,
  1307. _mm256_setzero_si256 (),
  1308. (__mmask8) -1);
  1309. }
  1310. extern __inline __m256i
  1311. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1312. _mm256_mask_cvtph_epi64 (__m256i __A, __mmask8 __B, __m128h __C)
  1313. {
  1314. return __builtin_ia32_vcvtph2qq256_mask (__C, __A, __B);
  1315. }
  1316. extern __inline __m256i
  1317. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1318. _mm256_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
  1319. {
  1320. return __builtin_ia32_vcvtph2qq256_mask (__B,
  1321. _mm256_setzero_si256 (),
  1322. __A);
  1323. }
  1324. /* Intrinsics vcvtph2uqq. */
  1325. extern __inline __m128i
  1326. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1327. _mm_cvtph_epu64 (__m128h __A)
  1328. {
  1329. return __builtin_ia32_vcvtph2uqq128_mask (__A,
  1330. _mm_setzero_si128 (),
  1331. (__mmask8) -1);
  1332. }
  1333. extern __inline __m128i
  1334. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1335. _mm_mask_cvtph_epu64 (__m128i __A, __mmask8 __B, __m128h __C)
  1336. {
  1337. return __builtin_ia32_vcvtph2uqq128_mask (__C, __A, __B);
  1338. }
  1339. extern __inline __m128i
  1340. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1341. _mm_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
  1342. {
  1343. return __builtin_ia32_vcvtph2uqq128_mask (__B,
  1344. _mm_setzero_si128 (),
  1345. __A);
  1346. }
  1347. extern __inline __m256i
  1348. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1349. _mm256_cvtph_epu64 (__m128h __A)
  1350. {
  1351. return __builtin_ia32_vcvtph2uqq256_mask (__A,
  1352. _mm256_setzero_si256 (),
  1353. (__mmask8) -1);
  1354. }
  1355. extern __inline __m256i
  1356. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1357. _mm256_mask_cvtph_epu64 (__m256i __A, __mmask8 __B, __m128h __C)
  1358. {
  1359. return __builtin_ia32_vcvtph2uqq256_mask (__C, __A, __B);
  1360. }
  1361. extern __inline __m256i
  1362. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1363. _mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
  1364. {
  1365. return __builtin_ia32_vcvtph2uqq256_mask (__B,
  1366. _mm256_setzero_si256 (),
  1367. __A);
  1368. }
  1369. /* Intrinsics vcvttph2qq. */
  1370. extern __inline __m128i
  1371. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1372. _mm_cvttph_epi64 (__m128h __A)
  1373. {
  1374. return __builtin_ia32_vcvttph2qq128_mask (__A,
  1375. _mm_setzero_si128 (),
  1376. (__mmask8) -1);
  1377. }
  1378. extern __inline __m128i
  1379. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1380. _mm_mask_cvttph_epi64 (__m128i __A, __mmask8 __B, __m128h __C)
  1381. {
  1382. return __builtin_ia32_vcvttph2qq128_mask (__C,
  1383. __A,
  1384. __B);
  1385. }
  1386. extern __inline __m128i
  1387. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1388. _mm_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
  1389. {
  1390. return __builtin_ia32_vcvttph2qq128_mask (__B,
  1391. _mm_setzero_si128 (),
  1392. __A);
  1393. }
  1394. extern __inline __m256i
  1395. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1396. _mm256_cvttph_epi64 (__m128h __A)
  1397. {
  1398. return __builtin_ia32_vcvttph2qq256_mask (__A,
  1399. _mm256_setzero_si256 (),
  1400. (__mmask8) -1);
  1401. }
  1402. extern __inline __m256i
  1403. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1404. _mm256_mask_cvttph_epi64 (__m256i __A, __mmask8 __B, __m128h __C)
  1405. {
  1406. return __builtin_ia32_vcvttph2qq256_mask (__C,
  1407. __A,
  1408. __B);
  1409. }
  1410. extern __inline __m256i
  1411. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1412. _mm256_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
  1413. {
  1414. return __builtin_ia32_vcvttph2qq256_mask (__B,
  1415. _mm256_setzero_si256 (),
  1416. __A);
  1417. }
  1418. /* Intrinsics vcvttph2uqq. */
  1419. extern __inline __m128i
  1420. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1421. _mm_cvttph_epu64 (__m128h __A)
  1422. {
  1423. return __builtin_ia32_vcvttph2uqq128_mask (__A,
  1424. _mm_setzero_si128 (),
  1425. (__mmask8) -1);
  1426. }
  1427. extern __inline __m128i
  1428. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1429. _mm_mask_cvttph_epu64 (__m128i __A, __mmask8 __B, __m128h __C)
  1430. {
  1431. return __builtin_ia32_vcvttph2uqq128_mask (__C,
  1432. __A,
  1433. __B);
  1434. }
  1435. extern __inline __m128i
  1436. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1437. _mm_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
  1438. {
  1439. return __builtin_ia32_vcvttph2uqq128_mask (__B,
  1440. _mm_setzero_si128 (),
  1441. __A);
  1442. }
  1443. extern __inline __m256i
  1444. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1445. _mm256_cvttph_epu64 (__m128h __A)
  1446. {
  1447. return __builtin_ia32_vcvttph2uqq256_mask (__A,
  1448. _mm256_setzero_si256 (),
  1449. (__mmask8) -1);
  1450. }
  1451. extern __inline __m256i
  1452. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1453. _mm256_mask_cvttph_epu64 (__m256i __A, __mmask8 __B, __m128h __C)
  1454. {
  1455. return __builtin_ia32_vcvttph2uqq256_mask (__C,
  1456. __A,
  1457. __B);
  1458. }
  1459. extern __inline __m256i
  1460. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1461. _mm256_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
  1462. {
  1463. return __builtin_ia32_vcvttph2uqq256_mask (__B,
  1464. _mm256_setzero_si256 (),
  1465. __A);
  1466. }
  1467. /* Intrinsics vcvtqq2ph. */
  1468. extern __inline __m128h
  1469. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1470. _mm_cvtepi64_ph (__m128i __A)
  1471. {
  1472. return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __A,
  1473. _mm_setzero_ph (),
  1474. (__mmask8) -1);
  1475. }
  1476. extern __inline __m128h
  1477. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1478. _mm_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m128i __C)
  1479. {
  1480. return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __C, __A, __B);
  1481. }
  1482. extern __inline __m128h
  1483. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1484. _mm_maskz_cvtepi64_ph (__mmask8 __A, __m128i __B)
  1485. {
  1486. return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __B,
  1487. _mm_setzero_ph (),
  1488. __A);
  1489. }
  1490. extern __inline __m128h
  1491. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1492. _mm256_cvtepi64_ph (__m256i __A)
  1493. {
  1494. return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __A,
  1495. _mm_setzero_ph (),
  1496. (__mmask8) -1);
  1497. }
  1498. extern __inline __m128h
  1499. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1500. _mm256_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m256i __C)
  1501. {
  1502. return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __C, __A, __B);
  1503. }
  1504. extern __inline __m128h
  1505. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1506. _mm256_maskz_cvtepi64_ph (__mmask8 __A, __m256i __B)
  1507. {
  1508. return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __B,
  1509. _mm_setzero_ph (),
  1510. __A);
  1511. }
  1512. /* Intrinsics vcvtuqq2ph. */
  1513. extern __inline __m128h
  1514. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1515. _mm_cvtepu64_ph (__m128i __A)
  1516. {
  1517. return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __A,
  1518. _mm_setzero_ph (),
  1519. (__mmask8) -1);
  1520. }
  1521. extern __inline __m128h
  1522. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1523. _mm_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m128i __C)
  1524. {
  1525. return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __C, __A, __B);
  1526. }
  1527. extern __inline __m128h
  1528. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1529. _mm_maskz_cvtepu64_ph (__mmask8 __A, __m128i __B)
  1530. {
  1531. return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __B,
  1532. _mm_setzero_ph (),
  1533. __A);
  1534. }
  1535. extern __inline __m128h
  1536. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1537. _mm256_cvtepu64_ph (__m256i __A)
  1538. {
  1539. return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __A,
  1540. _mm_setzero_ph (),
  1541. (__mmask8) -1);
  1542. }
  1543. extern __inline __m128h
  1544. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1545. _mm256_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m256i __C)
  1546. {
  1547. return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __C, __A, __B);
  1548. }
  1549. extern __inline __m128h
  1550. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1551. _mm256_maskz_cvtepu64_ph (__mmask8 __A, __m256i __B)
  1552. {
  1553. return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __B,
  1554. _mm_setzero_ph (),
  1555. __A);
  1556. }
  1557. /* Intrinsics vcvtph2w. */
  1558. extern __inline __m128i
  1559. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1560. _mm_cvtph_epi16 (__m128h __A)
  1561. {
  1562. return (__m128i)
  1563. __builtin_ia32_vcvtph2w128_mask (__A,
  1564. (__v8hi)
  1565. _mm_setzero_si128 (),
  1566. (__mmask8) -1);
  1567. }
  1568. extern __inline __m128i
  1569. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1570. _mm_mask_cvtph_epi16 (__m128i __A, __mmask8 __B, __m128h __C)
  1571. {
  1572. return (__m128i)
  1573. __builtin_ia32_vcvtph2w128_mask (__C, ( __v8hi) __A, __B);
  1574. }
  1575. extern __inline __m128i
  1576. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1577. _mm_maskz_cvtph_epi16 (__mmask8 __A, __m128h __B)
  1578. {
  1579. return (__m128i)
  1580. __builtin_ia32_vcvtph2w128_mask (__B,
  1581. (__v8hi)
  1582. _mm_setzero_si128 (),
  1583. __A);
  1584. }
  1585. extern __inline __m256i
  1586. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1587. _mm256_cvtph_epi16 (__m256h __A)
  1588. {
  1589. return (__m256i)
  1590. __builtin_ia32_vcvtph2w256_mask (__A,
  1591. (__v16hi)
  1592. _mm256_setzero_si256 (),
  1593. (__mmask16) -1);
  1594. }
  1595. extern __inline __m256i
  1596. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1597. _mm256_mask_cvtph_epi16 (__m256i __A, __mmask16 __B, __m256h __C)
  1598. {
  1599. return (__m256i)
  1600. __builtin_ia32_vcvtph2w256_mask (__C, ( __v16hi) __A, __B);
  1601. }
  1602. extern __inline __m256i
  1603. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1604. _mm256_maskz_cvtph_epi16 (__mmask16 __A, __m256h __B)
  1605. {
  1606. return (__m256i)
  1607. __builtin_ia32_vcvtph2w256_mask (__B,
  1608. (__v16hi)
  1609. _mm256_setzero_si256 (),
  1610. __A);
  1611. }
  1612. /* Intrinsics vcvtph2uw. */
  1613. extern __inline __m128i
  1614. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1615. _mm_cvtph_epu16 (__m128h __A)
  1616. {
  1617. return (__m128i)
  1618. __builtin_ia32_vcvtph2uw128_mask (__A,
  1619. (__v8hi)
  1620. _mm_setzero_si128 (),
  1621. (__mmask8) -1);
  1622. }
  1623. extern __inline __m128i
  1624. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1625. _mm_mask_cvtph_epu16 (__m128i __A, __mmask8 __B, __m128h __C)
  1626. {
  1627. return (__m128i)
  1628. __builtin_ia32_vcvtph2uw128_mask (__C, ( __v8hi) __A, __B);
  1629. }
  1630. extern __inline __m128i
  1631. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1632. _mm_maskz_cvtph_epu16 (__mmask8 __A, __m128h __B)
  1633. {
  1634. return (__m128i)
  1635. __builtin_ia32_vcvtph2uw128_mask (__B,
  1636. (__v8hi)
  1637. _mm_setzero_si128 (),
  1638. __A);
  1639. }
  1640. extern __inline __m256i
  1641. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1642. _mm256_cvtph_epu16 (__m256h __A)
  1643. {
  1644. return (__m256i)
  1645. __builtin_ia32_vcvtph2uw256_mask (__A,
  1646. (__v16hi)
  1647. _mm256_setzero_si256 (),
  1648. (__mmask16) -1);
  1649. }
  1650. extern __inline __m256i
  1651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1652. _mm256_mask_cvtph_epu16 (__m256i __A, __mmask16 __B, __m256h __C)
  1653. {
  1654. return (__m256i)
  1655. __builtin_ia32_vcvtph2uw256_mask (__C, ( __v16hi) __A, __B);
  1656. }
  1657. extern __inline __m256i
  1658. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1659. _mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B)
  1660. {
  1661. return (__m256i)
  1662. __builtin_ia32_vcvtph2uw256_mask (__B,
  1663. (__v16hi)
  1664. _mm256_setzero_si256 (),
  1665. __A);
  1666. }
  1667. /* Intrinsics vcvttph2w. */
  1668. extern __inline __m128i
  1669. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1670. _mm_cvttph_epi16 (__m128h __A)
  1671. {
  1672. return (__m128i)
  1673. __builtin_ia32_vcvttph2w128_mask (__A,
  1674. (__v8hi)
  1675. _mm_setzero_si128 (),
  1676. (__mmask8) -1);
  1677. }
  1678. extern __inline __m128i
  1679. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1680. _mm_mask_cvttph_epi16 (__m128i __A, __mmask8 __B, __m128h __C)
  1681. {
  1682. return (__m128i)
  1683. __builtin_ia32_vcvttph2w128_mask (__C,
  1684. ( __v8hi) __A,
  1685. __B);
  1686. }
  1687. extern __inline __m128i
  1688. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1689. _mm_maskz_cvttph_epi16 (__mmask8 __A, __m128h __B)
  1690. {
  1691. return (__m128i)
  1692. __builtin_ia32_vcvttph2w128_mask (__B,
  1693. (__v8hi)
  1694. _mm_setzero_si128 (),
  1695. __A);
  1696. }
  1697. extern __inline __m256i
  1698. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1699. _mm256_cvttph_epi16 (__m256h __A)
  1700. {
  1701. return (__m256i)
  1702. __builtin_ia32_vcvttph2w256_mask (__A,
  1703. (__v16hi)
  1704. _mm256_setzero_si256 (),
  1705. (__mmask16) -1);
  1706. }
  1707. extern __inline __m256i
  1708. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1709. _mm256_mask_cvttph_epi16 (__m256i __A, __mmask16 __B, __m256h __C)
  1710. {
  1711. return (__m256i)
  1712. __builtin_ia32_vcvttph2w256_mask (__C,
  1713. ( __v16hi) __A,
  1714. __B);
  1715. }
  1716. extern __inline __m256i
  1717. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1718. _mm256_maskz_cvttph_epi16 (__mmask16 __A, __m256h __B)
  1719. {
  1720. return (__m256i)
  1721. __builtin_ia32_vcvttph2w256_mask (__B,
  1722. (__v16hi)
  1723. _mm256_setzero_si256 (),
  1724. __A);
  1725. }
  1726. /* Intrinsics vcvttph2uw. */
  1727. extern __inline __m128i
  1728. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1729. _mm_cvttph_epu16 (__m128h __A)
  1730. {
  1731. return (__m128i)
  1732. __builtin_ia32_vcvttph2uw128_mask (__A,
  1733. (__v8hi)
  1734. _mm_setzero_si128 (),
  1735. (__mmask8) -1);
  1736. }
  1737. extern __inline __m128i
  1738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1739. _mm_mask_cvttph_epu16 (__m128i __A, __mmask8 __B, __m128h __C)
  1740. {
  1741. return (__m128i)
  1742. __builtin_ia32_vcvttph2uw128_mask (__C,
  1743. ( __v8hi) __A,
  1744. __B);
  1745. }
  1746. extern __inline __m128i
  1747. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1748. _mm_maskz_cvttph_epu16 (__mmask8 __A, __m128h __B)
  1749. {
  1750. return (__m128i)
  1751. __builtin_ia32_vcvttph2uw128_mask (__B,
  1752. (__v8hi)
  1753. _mm_setzero_si128 (),
  1754. __A);
  1755. }
  1756. extern __inline __m256i
  1757. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1758. _mm256_cvttph_epu16 (__m256h __A)
  1759. {
  1760. return (__m256i)
  1761. __builtin_ia32_vcvttph2uw256_mask (__A,
  1762. (__v16hi)
  1763. _mm256_setzero_si256 (),
  1764. (__mmask16) -1);
  1765. }
  1766. extern __inline __m256i
  1767. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1768. _mm256_mask_cvttph_epu16 (__m256i __A, __mmask16 __B, __m256h __C)
  1769. {
  1770. return (__m256i)
  1771. __builtin_ia32_vcvttph2uw256_mask (__C,
  1772. ( __v16hi) __A,
  1773. __B);
  1774. }
  1775. extern __inline __m256i
  1776. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1777. _mm256_maskz_cvttph_epu16 (__mmask16 __A, __m256h __B)
  1778. {
  1779. return (__m256i)
  1780. __builtin_ia32_vcvttph2uw256_mask (__B,
  1781. (__v16hi) _mm256_setzero_si256 (),
  1782. __A);
  1783. }
  1784. /* Intrinsics vcvtw2ph. */
  1785. extern __inline __m128h
  1786. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1787. _mm_cvtepi16_ph (__m128i __A)
  1788. {
  1789. return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __A,
  1790. _mm_setzero_ph (),
  1791. (__mmask8) -1);
  1792. }
  1793. extern __inline __m128h
  1794. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1795. _mm_mask_cvtepi16_ph (__m128h __A, __mmask8 __B, __m128i __C)
  1796. {
  1797. return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __C,
  1798. __A,
  1799. __B);
  1800. }
  1801. extern __inline __m128h
  1802. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1803. _mm_maskz_cvtepi16_ph (__mmask8 __A, __m128i __B)
  1804. {
  1805. return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __B,
  1806. _mm_setzero_ph (),
  1807. __A);
  1808. }
  1809. extern __inline __m256h
  1810. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1811. _mm256_cvtepi16_ph (__m256i __A)
  1812. {
  1813. return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __A,
  1814. _mm256_setzero_ph (),
  1815. (__mmask16) -1);
  1816. }
  1817. extern __inline __m256h
  1818. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1819. _mm256_mask_cvtepi16_ph (__m256h __A, __mmask16 __B, __m256i __C)
  1820. {
  1821. return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __C,
  1822. __A,
  1823. __B);
  1824. }
  1825. extern __inline __m256h
  1826. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1827. _mm256_maskz_cvtepi16_ph (__mmask16 __A, __m256i __B)
  1828. {
  1829. return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __B,
  1830. _mm256_setzero_ph (),
  1831. __A);
  1832. }
  1833. /* Intrinsics vcvtuw2ph. */
  1834. extern __inline __m128h
  1835. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1836. _mm_cvtepu16_ph (__m128i __A)
  1837. {
  1838. return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __A,
  1839. _mm_setzero_ph (),
  1840. (__mmask8) -1);
  1841. }
  1842. extern __inline __m128h
  1843. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1844. _mm_mask_cvtepu16_ph (__m128h __A, __mmask8 __B, __m128i __C)
  1845. {
  1846. return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __C, __A, __B);
  1847. }
  1848. extern __inline __m128h
  1849. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1850. _mm_maskz_cvtepu16_ph (__mmask8 __A, __m128i __B)
  1851. {
  1852. return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __B,
  1853. _mm_setzero_ph (),
  1854. __A);
  1855. }
  1856. extern __inline __m256h
  1857. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1858. _mm256_cvtepu16_ph (__m256i __A)
  1859. {
  1860. return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __A,
  1861. _mm256_setzero_ph (),
  1862. (__mmask16) -1);
  1863. }
  1864. extern __inline __m256h
  1865. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1866. _mm256_mask_cvtepu16_ph (__m256h __A, __mmask16 __B, __m256i __C)
  1867. {
  1868. return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __C, __A, __B);
  1869. }
  1870. extern __inline __m256h
  1871. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1872. _mm256_maskz_cvtepu16_ph (__mmask16 __A, __m256i __B)
  1873. {
  1874. return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __B,
  1875. _mm256_setzero_ph (),
  1876. __A);
  1877. }
  1878. /* Intrinsics vcvtph2pd. */
  1879. extern __inline __m128d
  1880. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1881. _mm_cvtph_pd (__m128h __A)
  1882. {
  1883. return __builtin_ia32_vcvtph2pd128_mask (__A,
  1884. _mm_setzero_pd (),
  1885. (__mmask8) -1);
  1886. }
  1887. extern __inline __m128d
  1888. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1889. _mm_mask_cvtph_pd (__m128d __A, __mmask8 __B, __m128h __C)
  1890. {
  1891. return __builtin_ia32_vcvtph2pd128_mask (__C, __A, __B);
  1892. }
  1893. extern __inline __m128d
  1894. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1895. _mm_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
  1896. {
  1897. return __builtin_ia32_vcvtph2pd128_mask (__B, _mm_setzero_pd (), __A);
  1898. }
  1899. extern __inline __m256d
  1900. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1901. _mm256_cvtph_pd (__m128h __A)
  1902. {
  1903. return __builtin_ia32_vcvtph2pd256_mask (__A,
  1904. _mm256_setzero_pd (),
  1905. (__mmask8) -1);
  1906. }
  1907. extern __inline __m256d
  1908. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1909. _mm256_mask_cvtph_pd (__m256d __A, __mmask8 __B, __m128h __C)
  1910. {
  1911. return __builtin_ia32_vcvtph2pd256_mask (__C, __A, __B);
  1912. }
  1913. extern __inline __m256d
  1914. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1915. _mm256_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
  1916. {
  1917. return __builtin_ia32_vcvtph2pd256_mask (__B,
  1918. _mm256_setzero_pd (),
  1919. __A);
  1920. }
  1921. /* Intrinsics vcvtph2ps. */
  1922. extern __inline __m128
  1923. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1924. _mm_cvtxph_ps (__m128h __A)
  1925. {
  1926. return __builtin_ia32_vcvtph2psx128_mask (__A,
  1927. _mm_setzero_ps (),
  1928. (__mmask8) -1);
  1929. }
  1930. extern __inline __m128
  1931. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1932. _mm_mask_cvtxph_ps (__m128 __A, __mmask8 __B, __m128h __C)
  1933. {
  1934. return __builtin_ia32_vcvtph2psx128_mask (__C, __A, __B);
  1935. }
  1936. extern __inline __m128
  1937. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1938. _mm_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
  1939. {
  1940. return __builtin_ia32_vcvtph2psx128_mask (__B, _mm_setzero_ps (), __A);
  1941. }
  1942. extern __inline __m256
  1943. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1944. _mm256_cvtxph_ps (__m128h __A)
  1945. {
  1946. return __builtin_ia32_vcvtph2psx256_mask (__A,
  1947. _mm256_setzero_ps (),
  1948. (__mmask8) -1);
  1949. }
  1950. extern __inline __m256
  1951. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1952. _mm256_mask_cvtxph_ps (__m256 __A, __mmask8 __B, __m128h __C)
  1953. {
  1954. return __builtin_ia32_vcvtph2psx256_mask (__C, __A, __B);
  1955. }
  1956. extern __inline __m256
  1957. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1958. _mm256_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
  1959. {
  1960. return __builtin_ia32_vcvtph2psx256_mask (__B,
  1961. _mm256_setzero_ps (),
  1962. __A);
  1963. }
  1964. /* Intrinsics vcvtxps2ph. */
  1965. extern __inline __m128h
  1966. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1967. _mm_cvtxps_ph (__m128 __A)
  1968. {
  1969. return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __A,
  1970. _mm_setzero_ph (),
  1971. (__mmask8) -1);
  1972. }
  1973. extern __inline __m128h
  1974. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1975. _mm_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m128 __C)
  1976. {
  1977. return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __C, __A, __B);
  1978. }
  1979. extern __inline __m128h
  1980. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1981. _mm_maskz_cvtxps_ph (__mmask8 __A, __m128 __B)
  1982. {
  1983. return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __B,
  1984. _mm_setzero_ph (),
  1985. __A);
  1986. }
  1987. extern __inline __m128h
  1988. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1989. _mm256_cvtxps_ph (__m256 __A)
  1990. {
  1991. return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __A,
  1992. _mm_setzero_ph (),
  1993. (__mmask8) -1);
  1994. }
  1995. extern __inline __m128h
  1996. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1997. _mm256_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m256 __C)
  1998. {
  1999. return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __C, __A, __B);
  2000. }
  2001. extern __inline __m128h
  2002. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2003. _mm256_maskz_cvtxps_ph (__mmask8 __A, __m256 __B)
  2004. {
  2005. return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __B,
  2006. _mm_setzero_ph (),
  2007. __A);
  2008. }
  2009. /* Intrinsics vcvtpd2ph. */
  2010. extern __inline __m128h
  2011. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2012. _mm_cvtpd_ph (__m128d __A)
  2013. {
  2014. return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __A,
  2015. _mm_setzero_ph (),
  2016. (__mmask8) -1);
  2017. }
  2018. extern __inline __m128h
  2019. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2020. _mm_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m128d __C)
  2021. {
  2022. return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __C, __A, __B);
  2023. }
  2024. extern __inline __m128h
  2025. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2026. _mm_maskz_cvtpd_ph (__mmask8 __A, __m128d __B)
  2027. {
  2028. return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __B,
  2029. _mm_setzero_ph (),
  2030. __A);
  2031. }
  2032. extern __inline __m128h
  2033. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2034. _mm256_cvtpd_ph (__m256d __A)
  2035. {
  2036. return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __A,
  2037. _mm_setzero_ph (),
  2038. (__mmask8) -1);
  2039. }
  2040. extern __inline __m128h
  2041. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2042. _mm256_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m256d __C)
  2043. {
  2044. return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __C, __A, __B);
  2045. }
  2046. extern __inline __m128h
  2047. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2048. _mm256_maskz_cvtpd_ph (__mmask8 __A, __m256d __B)
  2049. {
  2050. return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __B,
  2051. _mm_setzero_ph (),
  2052. __A);
  2053. }
  2054. /* Intrinsics vfmaddsub[132,213,231]ph. */
  2055. extern __inline __m256h
  2056. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2057. _mm256_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C)
  2058. {
  2059. return (__m256h)__builtin_ia32_vfmaddsubph256_mask ((__v16hf)__A,
  2060. (__v16hf)__B,
  2061. (__v16hf)__C,
  2062. (__mmask16)-1);
  2063. }
  2064. extern __inline __m256h
  2065. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2066. _mm256_mask_fmaddsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
  2067. __m256h __C)
  2068. {
  2069. return (__m256h) __builtin_ia32_vfmaddsubph256_mask ((__v16hf) __A,
  2070. (__v16hf) __B,
  2071. (__v16hf) __C,
  2072. (__mmask16) __U);
  2073. }
  2074. extern __inline __m256h
  2075. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2076. _mm256_mask3_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C,
  2077. __mmask16 __U)
  2078. {
  2079. return (__m256h) __builtin_ia32_vfmaddsubph256_mask3 ((__v16hf) __A,
  2080. (__v16hf) __B,
  2081. (__v16hf) __C,
  2082. (__mmask16)
  2083. __U);
  2084. }
  2085. extern __inline __m256h
  2086. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2087. _mm256_maskz_fmaddsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
  2088. __m256h __C)
  2089. {
  2090. return (__m256h) __builtin_ia32_vfmaddsubph256_maskz ((__v16hf) __A,
  2091. (__v16hf) __B,
  2092. (__v16hf) __C,
  2093. (__mmask16)
  2094. __U);
  2095. }
  2096. extern __inline __m128h
  2097. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2098. _mm_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C)
  2099. {
  2100. return (__m128h)__builtin_ia32_vfmaddsubph128_mask ((__v8hf)__A,
  2101. (__v8hf)__B,
  2102. (__v8hf)__C,
  2103. (__mmask8)-1);
  2104. }
  2105. extern __inline __m128h
  2106. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2107. _mm_mask_fmaddsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
  2108. __m128h __C)
  2109. {
  2110. return (__m128h) __builtin_ia32_vfmaddsubph128_mask ((__v8hf) __A,
  2111. (__v8hf) __B,
  2112. (__v8hf) __C,
  2113. (__mmask8) __U);
  2114. }
  2115. extern __inline __m128h
  2116. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2117. _mm_mask3_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C,
  2118. __mmask8 __U)
  2119. {
  2120. return (__m128h) __builtin_ia32_vfmaddsubph128_mask3 ((__v8hf) __A,
  2121. (__v8hf) __B,
  2122. (__v8hf) __C,
  2123. (__mmask8)
  2124. __U);
  2125. }
  2126. extern __inline __m128h
  2127. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2128. _mm_maskz_fmaddsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
  2129. __m128h __C)
  2130. {
  2131. return (__m128h) __builtin_ia32_vfmaddsubph128_maskz ((__v8hf) __A,
  2132. (__v8hf) __B,
  2133. (__v8hf) __C,
  2134. (__mmask8)
  2135. __U);
  2136. }
  2137. /* Intrinsics vfmsubadd[132,213,231]ph. */
  2138. extern __inline __m256h
  2139. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2140. _mm256_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C)
  2141. {
  2142. return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A,
  2143. (__v16hf) __B,
  2144. (__v16hf) __C,
  2145. (__mmask16) -1);
  2146. }
  2147. extern __inline __m256h
  2148. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2149. _mm256_mask_fmsubadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
  2150. __m256h __C)
  2151. {
  2152. return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A,
  2153. (__v16hf) __B,
  2154. (__v16hf) __C,
  2155. (__mmask16) __U);
  2156. }
  2157. extern __inline __m256h
  2158. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2159. _mm256_mask3_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C,
  2160. __mmask16 __U)
  2161. {
  2162. return (__m256h) __builtin_ia32_vfmsubaddph256_mask3 ((__v16hf) __A,
  2163. (__v16hf) __B,
  2164. (__v16hf) __C,
  2165. (__mmask16)
  2166. __U);
  2167. }
  2168. extern __inline __m256h
  2169. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2170. _mm256_maskz_fmsubadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
  2171. __m256h __C)
  2172. {
  2173. return (__m256h) __builtin_ia32_vfmsubaddph256_maskz ((__v16hf) __A,
  2174. (__v16hf) __B,
  2175. (__v16hf) __C,
  2176. (__mmask16)
  2177. __U);
  2178. }
  2179. extern __inline __m128h
  2180. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2181. _mm_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C)
  2182. {
  2183. return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A,
  2184. (__v8hf) __B,
  2185. (__v8hf) __C,
  2186. (__mmask8) -1);
  2187. }
  2188. extern __inline __m128h
  2189. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2190. _mm_mask_fmsubadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
  2191. __m128h __C)
  2192. {
  2193. return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A,
  2194. (__v8hf) __B,
  2195. (__v8hf) __C,
  2196. (__mmask8) __U);
  2197. }
  2198. extern __inline __m128h
  2199. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2200. _mm_mask3_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C,
  2201. __mmask8 __U)
  2202. {
  2203. return (__m128h) __builtin_ia32_vfmsubaddph128_mask3 ((__v8hf) __A,
  2204. (__v8hf) __B,
  2205. (__v8hf) __C,
  2206. (__mmask8)
  2207. __U);
  2208. }
  2209. extern __inline __m128h
  2210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2211. _mm_maskz_fmsubadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
  2212. __m128h __C)
  2213. {
  2214. return (__m128h) __builtin_ia32_vfmsubaddph128_maskz ((__v8hf) __A,
  2215. (__v8hf) __B,
  2216. (__v8hf) __C,
  2217. (__mmask8)
  2218. __U);
  2219. }
  2220. /* Intrinsics vfmadd[132,213,231]ph. */
  2221. extern __inline __m256h
  2222. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2223. _mm256_fmadd_ph (__m256h __A, __m256h __B, __m256h __C)
  2224. {
  2225. return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A,
  2226. (__v16hf) __B,
  2227. (__v16hf) __C,
  2228. (__mmask16) -1);
  2229. }
  2230. extern __inline __m256h
  2231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2232. _mm256_mask_fmadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
  2233. __m256h __C)
  2234. {
  2235. return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A,
  2236. (__v16hf) __B,
  2237. (__v16hf) __C,
  2238. (__mmask16) __U);
  2239. }
  2240. extern __inline __m256h
  2241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2242. _mm256_mask3_fmadd_ph (__m256h __A, __m256h __B, __m256h __C,
  2243. __mmask16 __U)
  2244. {
  2245. return (__m256h) __builtin_ia32_vfmaddph256_mask3 ((__v16hf) __A,
  2246. (__v16hf) __B,
  2247. (__v16hf) __C,
  2248. (__mmask16)
  2249. __U);
  2250. }
  2251. extern __inline __m256h
  2252. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2253. _mm256_maskz_fmadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
  2254. __m256h __C)
  2255. {
  2256. return (__m256h) __builtin_ia32_vfmaddph256_maskz ((__v16hf) __A,
  2257. (__v16hf) __B,
  2258. (__v16hf) __C,
  2259. (__mmask16)
  2260. __U);
  2261. }
  2262. extern __inline __m128h
  2263. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2264. _mm_fmadd_ph (__m128h __A, __m128h __B, __m128h __C)
  2265. {
  2266. return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A,
  2267. (__v8hf) __B,
  2268. (__v8hf) __C,
  2269. (__mmask8) -1);
  2270. }
  2271. extern __inline __m128h
  2272. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2273. _mm_mask_fmadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
  2274. __m128h __C)
  2275. {
  2276. return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A,
  2277. (__v8hf) __B,
  2278. (__v8hf) __C,
  2279. (__mmask8) __U);
  2280. }
  2281. extern __inline __m128h
  2282. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2283. _mm_mask3_fmadd_ph (__m128h __A, __m128h __B, __m128h __C,
  2284. __mmask8 __U)
  2285. {
  2286. return (__m128h) __builtin_ia32_vfmaddph128_mask3 ((__v8hf) __A,
  2287. (__v8hf) __B,
  2288. (__v8hf) __C,
  2289. (__mmask8)
  2290. __U);
  2291. }
  2292. extern __inline __m128h
  2293. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2294. _mm_maskz_fmadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
  2295. __m128h __C)
  2296. {
  2297. return (__m128h) __builtin_ia32_vfmaddph128_maskz ((__v8hf) __A,
  2298. (__v8hf) __B,
  2299. (__v8hf) __C,
  2300. (__mmask8)
  2301. __U);
  2302. }
  2303. /* Intrinsics vfnmadd[132,213,231]ph. */
  2304. extern __inline __m256h
  2305. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2306. _mm256_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C)
  2307. {
  2308. return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A,
  2309. (__v16hf) __B,
  2310. (__v16hf) __C,
  2311. (__mmask16) -1);
  2312. }
  2313. extern __inline __m256h
  2314. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2315. _mm256_mask_fnmadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
  2316. __m256h __C)
  2317. {
  2318. return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A,
  2319. (__v16hf) __B,
  2320. (__v16hf) __C,
  2321. (__mmask16) __U);
  2322. }
  2323. extern __inline __m256h
  2324. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2325. _mm256_mask3_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C,
  2326. __mmask16 __U)
  2327. {
  2328. return (__m256h) __builtin_ia32_vfnmaddph256_mask3 ((__v16hf) __A,
  2329. (__v16hf) __B,
  2330. (__v16hf) __C,
  2331. (__mmask16)
  2332. __U);
  2333. }
  2334. extern __inline __m256h
  2335. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2336. _mm256_maskz_fnmadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
  2337. __m256h __C)
  2338. {
  2339. return (__m256h) __builtin_ia32_vfnmaddph256_maskz ((__v16hf) __A,
  2340. (__v16hf) __B,
  2341. (__v16hf) __C,
  2342. (__mmask16)
  2343. __U);
  2344. }
  2345. extern __inline __m128h
  2346. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2347. _mm_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C)
  2348. {
  2349. return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A,
  2350. (__v8hf) __B,
  2351. (__v8hf) __C,
  2352. (__mmask8) -1);
  2353. }
  2354. extern __inline __m128h
  2355. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2356. _mm_mask_fnmadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
  2357. __m128h __C)
  2358. {
  2359. return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A,
  2360. (__v8hf) __B,
  2361. (__v8hf) __C,
  2362. (__mmask8) __U);
  2363. }
  2364. extern __inline __m128h
  2365. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2366. _mm_mask3_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C,
  2367. __mmask8 __U)
  2368. {
  2369. return (__m128h) __builtin_ia32_vfnmaddph128_mask3 ((__v8hf) __A,
  2370. (__v8hf) __B,
  2371. (__v8hf) __C,
  2372. (__mmask8)
  2373. __U);
  2374. }
  2375. extern __inline __m128h
  2376. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2377. _mm_maskz_fnmadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
  2378. __m128h __C)
  2379. {
  2380. return (__m128h) __builtin_ia32_vfnmaddph128_maskz ((__v8hf) __A,
  2381. (__v8hf) __B,
  2382. (__v8hf) __C,
  2383. (__mmask8)
  2384. __U);
  2385. }
  2386. /* Intrinsics vfmsub[132,213,231]ph. */
  2387. extern __inline __m256h
  2388. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2389. _mm256_fmsub_ph (__m256h __A, __m256h __B, __m256h __C)
  2390. {
  2391. return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A,
  2392. (__v16hf) __B,
  2393. (__v16hf) __C,
  2394. (__mmask16) -1);
  2395. }
  2396. extern __inline __m256h
  2397. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2398. _mm256_mask_fmsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
  2399. __m256h __C)
  2400. {
  2401. return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A,
  2402. (__v16hf) __B,
  2403. (__v16hf) __C,
  2404. (__mmask16) __U);
  2405. }
  2406. extern __inline __m256h
  2407. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2408. _mm256_mask3_fmsub_ph (__m256h __A, __m256h __B, __m256h __C,
  2409. __mmask16 __U)
  2410. {
  2411. return (__m256h) __builtin_ia32_vfmsubph256_mask3 ((__v16hf) __A,
  2412. (__v16hf) __B,
  2413. (__v16hf) __C,
  2414. (__mmask16)
  2415. __U);
  2416. }
  2417. extern __inline __m256h
  2418. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2419. _mm256_maskz_fmsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
  2420. __m256h __C)
  2421. {
  2422. return (__m256h) __builtin_ia32_vfmsubph256_maskz ((__v16hf) __A,
  2423. (__v16hf) __B,
  2424. (__v16hf) __C,
  2425. (__mmask16)
  2426. __U);
  2427. }
  2428. extern __inline __m128h
  2429. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2430. _mm_fmsub_ph (__m128h __A, __m128h __B, __m128h __C)
  2431. {
  2432. return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A,
  2433. (__v8hf) __B,
  2434. (__v8hf) __C,
  2435. (__mmask8) -1);
  2436. }
  2437. extern __inline __m128h
  2438. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2439. _mm_mask_fmsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
  2440. __m128h __C)
  2441. {
  2442. return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A,
  2443. (__v8hf) __B,
  2444. (__v8hf) __C,
  2445. (__mmask8) __U);
  2446. }
  2447. extern __inline __m128h
  2448. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2449. _mm_mask3_fmsub_ph (__m128h __A, __m128h __B, __m128h __C,
  2450. __mmask8 __U)
  2451. {
  2452. return (__m128h) __builtin_ia32_vfmsubph128_mask3 ((__v8hf) __A,
  2453. (__v8hf) __B,
  2454. (__v8hf) __C,
  2455. (__mmask8)
  2456. __U);
  2457. }
  2458. extern __inline __m128h
  2459. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2460. _mm_maskz_fmsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
  2461. __m128h __C)
  2462. {
  2463. return (__m128h) __builtin_ia32_vfmsubph128_maskz ((__v8hf) __A,
  2464. (__v8hf) __B,
  2465. (__v8hf) __C,
  2466. (__mmask8)
  2467. __U);
  2468. }
  2469. /* Intrinsics vfnmsub[132,213,231]ph. */
  2470. extern __inline __m256h
  2471. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2472. _mm256_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C)
  2473. {
  2474. return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A,
  2475. (__v16hf) __B,
  2476. (__v16hf) __C,
  2477. (__mmask16) -1);
  2478. }
  2479. extern __inline __m256h
  2480. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2481. _mm256_mask_fnmsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
  2482. __m256h __C)
  2483. {
  2484. return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A,
  2485. (__v16hf) __B,
  2486. (__v16hf) __C,
  2487. (__mmask16) __U);
  2488. }
  2489. extern __inline __m256h
  2490. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2491. _mm256_mask3_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C,
  2492. __mmask16 __U)
  2493. {
  2494. return (__m256h) __builtin_ia32_vfnmsubph256_mask3 ((__v16hf) __A,
  2495. (__v16hf) __B,
  2496. (__v16hf) __C,
  2497. (__mmask16)
  2498. __U);
  2499. }
  2500. extern __inline __m256h
  2501. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2502. _mm256_maskz_fnmsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
  2503. __m256h __C)
  2504. {
  2505. return (__m256h) __builtin_ia32_vfnmsubph256_maskz ((__v16hf) __A,
  2506. (__v16hf) __B,
  2507. (__v16hf) __C,
  2508. (__mmask16)
  2509. __U);
  2510. }
  2511. extern __inline __m128h
  2512. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2513. _mm_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C)
  2514. {
  2515. return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A,
  2516. (__v8hf) __B,
  2517. (__v8hf) __C,
  2518. (__mmask8) -1);
  2519. }
  2520. extern __inline __m128h
  2521. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2522. _mm_mask_fnmsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
  2523. __m128h __C)
  2524. {
  2525. return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A,
  2526. (__v8hf) __B,
  2527. (__v8hf) __C,
  2528. (__mmask8) __U);
  2529. }
  2530. extern __inline __m128h
  2531. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2532. _mm_mask3_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C,
  2533. __mmask8 __U)
  2534. {
  2535. return (__m128h) __builtin_ia32_vfnmsubph128_mask3 ((__v8hf) __A,
  2536. (__v8hf) __B,
  2537. (__v8hf) __C,
  2538. (__mmask8)
  2539. __U);
  2540. }
  2541. extern __inline __m128h
  2542. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2543. _mm_maskz_fnmsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
  2544. __m128h __C)
  2545. {
  2546. return (__m128h) __builtin_ia32_vfnmsubph128_maskz ((__v8hf) __A,
  2547. (__v8hf) __B,
  2548. (__v8hf) __C,
  2549. (__mmask8)
  2550. __U);
  2551. }
  2552. /* Intrinsics vf[,c]maddcph. */
  2553. extern __inline __m128h
  2554. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2555. _mm_fmadd_pch (__m128h __A, __m128h __B, __m128h __C)
  2556. {
  2557. return (__m128h) __builtin_ia32_vfmaddcph128 ((__v8hf) __A,
  2558. (__v8hf) __B,
  2559. (__v8hf) __C);
  2560. }
  2561. extern __inline __m128h
  2562. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2563. _mm_mask_fmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  2564. {
  2565. return (__m128h)
  2566. __builtin_ia32_vfmaddcph128_mask ((__v8hf) __A,
  2567. (__v8hf) __C,
  2568. (__v8hf) __D, __B);
  2569. }
  2570. extern __inline __m128h
  2571. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2572. _mm_mask3_fmadd_pch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
  2573. {
  2574. return (__m128h)
  2575. __builtin_ia32_vfmaddcph128_mask3 ((__v8hf) __A,
  2576. (__v8hf) __B,
  2577. (__v8hf) __C, __D);
  2578. }
  2579. extern __inline __m128h
  2580. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2581. _mm_maskz_fmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
  2582. {
  2583. return (__m128h) __builtin_ia32_vfmaddcph128_maskz ((__v8hf) __B,
  2584. (__v8hf) __C,
  2585. (__v8hf) __D, __A);
  2586. }
  2587. extern __inline __m256h
  2588. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2589. _mm256_fmadd_pch (__m256h __A, __m256h __B, __m256h __C)
  2590. {
  2591. return (__m256h) __builtin_ia32_vfmaddcph256 ((__v16hf) __A,
  2592. (__v16hf) __B,
  2593. (__v16hf) __C);
  2594. }
  2595. extern __inline __m256h
  2596. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2597. _mm256_mask_fmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
  2598. {
  2599. return (__m256h)
  2600. __builtin_ia32_vfmaddcph256_mask ((__v16hf) __A,
  2601. (__v16hf) __C,
  2602. (__v16hf) __D, __B);
  2603. }
  2604. extern __inline __m256h
  2605. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2606. _mm256_mask3_fmadd_pch (__m256h __A, __m256h __B, __m256h __C, __mmask8 __D)
  2607. {
  2608. return (__m256h)
  2609. __builtin_ia32_vfmaddcph256_mask3 ((__v16hf) __A,
  2610. (__v16hf) __B,
  2611. (__v16hf) __C, __D);
  2612. }
  2613. extern __inline __m256h
  2614. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2615. _mm256_maskz_fmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D)
  2616. {
  2617. return (__m256h)__builtin_ia32_vfmaddcph256_maskz ((__v16hf) __B,
  2618. (__v16hf) __C,
  2619. (__v16hf) __D, __A);
  2620. }
  2621. extern __inline __m128h
  2622. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2623. _mm_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C)
  2624. {
  2625. return (__m128h) __builtin_ia32_vfcmaddcph128 ((__v8hf) __A,
  2626. (__v8hf) __B,
  2627. (__v8hf) __C);
  2628. }
  2629. extern __inline __m128h
  2630. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2631. _mm_mask_fcmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  2632. {
  2633. return (__m128h)
  2634. __builtin_ia32_vfcmaddcph128_mask ((__v8hf) __A,
  2635. (__v8hf) __C,
  2636. (__v8hf) __D, __B);
  2637. }
  2638. extern __inline __m128h
  2639. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2640. _mm_mask3_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
  2641. {
  2642. return (__m128h)
  2643. __builtin_ia32_vfcmaddcph128_mask3 ((__v8hf) __A,
  2644. (__v8hf) __B,
  2645. (__v8hf) __C, __D);
  2646. }
  2647. extern __inline __m128h
  2648. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2649. _mm_maskz_fcmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
  2650. {
  2651. return (__m128h)__builtin_ia32_vfcmaddcph128_maskz ((__v8hf) __B,
  2652. (__v8hf) __C,
  2653. (__v8hf) __D, __A);
  2654. }
  2655. extern __inline __m256h
  2656. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2657. _mm256_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C)
  2658. {
  2659. return (__m256h) __builtin_ia32_vfcmaddcph256 ((__v16hf) __A,
  2660. (__v16hf) __B,
  2661. (__v16hf) __C);
  2662. }
  2663. extern __inline __m256h
  2664. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2665. _mm256_mask_fcmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
  2666. {
  2667. return (__m256h)
  2668. __builtin_ia32_vfcmaddcph256_mask ((__v16hf) __A,
  2669. (__v16hf) __C,
  2670. (__v16hf) __D, __B);
  2671. }
  2672. extern __inline __m256h
  2673. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2674. _mm256_mask3_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C, __mmask8 __D)
  2675. {
  2676. return (__m256h)
  2677. __builtin_ia32_vfcmaddcph256_mask3 ((__v16hf) __A,
  2678. (__v16hf) __B,
  2679. (__v16hf) __C, __D);
  2680. }
  2681. extern __inline __m256h
  2682. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2683. _mm256_maskz_fcmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D)
  2684. {
  2685. return (__m256h) __builtin_ia32_vfcmaddcph256_maskz ((__v16hf) __B,
  2686. (__v16hf) __C,
  2687. (__v16hf) __D, __A);
  2688. }
  2689. /* Intrinsics vf[,c]mulcph. */
  2690. extern __inline __m128h
  2691. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2692. _mm_fmul_pch (__m128h __A, __m128h __B)
  2693. {
  2694. return (__m128h) __builtin_ia32_vfmulcph128 ((__v8hf) __A, (__v8hf) __B);
  2695. }
  2696. extern __inline __m128h
  2697. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2698. _mm_mask_fmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  2699. {
  2700. return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __C,
  2701. (__v8hf) __D,
  2702. (__v8hf) __A, __B);
  2703. }
  2704. extern __inline __m128h
  2705. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2706. _mm_maskz_fmul_pch (__mmask8 __A, __m128h __B, __m128h __C)
  2707. {
  2708. return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __B,
  2709. (__v8hf) __C,
  2710. _mm_setzero_ph (),
  2711. __A);
  2712. }
  2713. extern __inline __m256h
  2714. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2715. _mm256_fmul_pch (__m256h __A, __m256h __B)
  2716. {
  2717. return (__m256h) __builtin_ia32_vfmulcph256 ((__v16hf) __A,
  2718. (__v16hf) __B);
  2719. }
  2720. extern __inline __m256h
  2721. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2722. _mm256_mask_fmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
  2723. {
  2724. return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __C,
  2725. (__v16hf) __D,
  2726. (__v16hf) __A, __B);
  2727. }
  2728. extern __inline __m256h
  2729. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2730. _mm256_maskz_fmul_pch (__mmask8 __A, __m256h __B, __m256h __C)
  2731. {
  2732. return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __B,
  2733. (__v16hf) __C,
  2734. _mm256_setzero_ph (),
  2735. __A);
  2736. }
  2737. extern __inline __m128h
  2738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2739. _mm_fcmul_pch (__m128h __A, __m128h __B)
  2740. {
  2741. return (__m128h) __builtin_ia32_vfcmulcph128 ((__v8hf) __A,
  2742. (__v8hf) __B);
  2743. }
  2744. extern __inline __m128h
  2745. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2746. _mm_mask_fcmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  2747. {
  2748. return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __C,
  2749. (__v8hf) __D,
  2750. (__v8hf) __A, __B);
  2751. }
  2752. extern __inline __m128h
  2753. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2754. _mm_maskz_fcmul_pch (__mmask8 __A, __m128h __B, __m128h __C)
  2755. {
  2756. return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __B,
  2757. (__v8hf) __C,
  2758. _mm_setzero_ph (),
  2759. __A);
  2760. }
  2761. extern __inline __m256h
  2762. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2763. _mm256_fcmul_pch (__m256h __A, __m256h __B)
  2764. {
  2765. return (__m256h) __builtin_ia32_vfcmulcph256 ((__v16hf) __A, (__v16hf) __B);
  2766. }
  2767. extern __inline __m256h
  2768. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2769. _mm256_mask_fcmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
  2770. {
  2771. return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __C,
  2772. (__v16hf) __D,
  2773. (__v16hf) __A, __B);
  2774. }
  2775. extern __inline __m256h
  2776. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2777. _mm256_maskz_fcmul_pch (__mmask8 __A, __m256h __B, __m256h __C)
  2778. {
  2779. return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __B,
  2780. (__v16hf) __C,
  2781. _mm256_setzero_ph (),
  2782. __A);
  2783. }
  2784. #define _MM256_REDUCE_OP(op) \
  2785. __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0); \
  2786. __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1); \
  2787. __m128h __T3 = (__T1 op __T2); \
  2788. __m128h __T4 = (__m128h) __builtin_shuffle (__T3, \
  2789. (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \
  2790. __m128h __T5 = (__T3) op (__T4); \
  2791. __m128h __T6 = (__m128h) __builtin_shuffle (__T5, \
  2792. (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); \
  2793. __m128h __T7 = __T5 op __T6; \
  2794. return __T7[0] op __T7[1]
  2795. extern __inline _Float16
  2796. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2797. _mm256_reduce_add_ph (__m256h __A)
  2798. {
  2799. _MM256_REDUCE_OP (+);
  2800. }
  2801. extern __inline _Float16
  2802. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2803. _mm256_reduce_mul_ph (__m256h __A)
  2804. {
  2805. _MM256_REDUCE_OP (*);
  2806. }
  2807. #undef _MM256_REDUCE_OP
  2808. #define _MM256_REDUCE_OP(op) \
  2809. __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0); \
  2810. __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1); \
  2811. __m128h __T3 = _mm_##op (__T1, __T2); \
  2812. __m128h __T4 = (__m128h) __builtin_shuffle (__T3, \
  2813. (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \
  2814. __m128h __T5 = _mm_##op (__T3, __T4); \
  2815. __m128h __T6 = (__m128h) __builtin_shuffle (__T5, (__v8hi) { 4, 5 }); \
  2816. __m128h __T7 = _mm_##op (__T5, __T6); \
  2817. __m128h __T8 = (__m128h) __builtin_shuffle (__T7, (__v8hi) { 1, 0 }); \
  2818. __m128h __T9 = _mm_##op (__T7, __T8); \
  2819. return __T9[0]
  2820. extern __inline _Float16
  2821. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2822. _mm256_reduce_min_ph (__m256h __A)
  2823. {
  2824. _MM256_REDUCE_OP (min_ph);
  2825. }
  2826. extern __inline _Float16
  2827. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2828. _mm256_reduce_max_ph (__m256h __A)
  2829. {
  2830. _MM256_REDUCE_OP (max_ph);
  2831. }
  2832. #define _MM_REDUCE_OP(op) \
  2833. __m128h __T1 = (__m128h) __builtin_shuffle (__A, \
  2834. (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \
  2835. __m128h __T2 = (__A) op (__T1); \
  2836. __m128h __T3 = (__m128h) __builtin_shuffle (__T2, \
  2837. (__v8hi){ 2, 3, 0, 1, 4, 5, 6, 7 }); \
  2838. __m128h __T4 = __T2 op __T3; \
  2839. return __T4[0] op __T4[1]
  2840. extern __inline _Float16
  2841. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2842. _mm_reduce_add_ph (__m128h __A)
  2843. {
  2844. _MM_REDUCE_OP (+);
  2845. }
  2846. extern __inline _Float16
  2847. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2848. _mm_reduce_mul_ph (__m128h __A)
  2849. {
  2850. _MM_REDUCE_OP (*);
  2851. }
  2852. #undef _MM_REDUCE_OP
  2853. #define _MM_REDUCE_OP(op) \
  2854. __m128h __T1 = (__m128h) __builtin_shuffle (__A, \
  2855. (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \
  2856. __m128h __T2 = _mm_##op (__A, __T1); \
  2857. __m128h __T3 = (__m128h) __builtin_shuffle (__T2, (__v8hi){ 4, 5 }); \
  2858. __m128h __T4 = _mm_##op (__T2, __T3); \
  2859. __m128h __T5 = (__m128h) __builtin_shuffle (__T4, (__v8hi){ 1, 0 }); \
  2860. __m128h __T6 = _mm_##op (__T4, __T5); \
  2861. return __T6[0]
  2862. extern __inline _Float16
  2863. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2864. _mm_reduce_min_ph (__m128h __A)
  2865. {
  2866. _MM_REDUCE_OP (min_ph);
  2867. }
  2868. extern __inline _Float16
  2869. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2870. _mm_reduce_max_ph (__m128h __A)
  2871. {
  2872. _MM_REDUCE_OP (max_ph);
  2873. }
  2874. #undef _MM256_REDUCE_OP
  2875. #undef _MM_REDUCE_OP
  2876. extern __inline __m256h
  2877. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2878. _mm256_mask_blend_ph (__mmask16 __U, __m256h __A, __m256h __W)
  2879. {
  2880. return (__m256h) __builtin_ia32_movdquhi256_mask ((__v16hi) __W,
  2881. (__v16hi) __A,
  2882. (__mmask16) __U);
  2883. }
  2884. extern __inline __m256h
  2885. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2886. _mm256_permutex2var_ph (__m256h __A, __m256i __I, __m256h __B)
  2887. {
  2888. return (__m256h) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
  2889. (__v16hi) __I,
  2890. (__v16hi) __B,
  2891. (__mmask16)-1);
  2892. }
  2893. extern __inline __m256h
  2894. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2895. _mm256_permutexvar_ph (__m256i __A, __m256h __B)
  2896. {
  2897. return (__m256h) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
  2898. (__v16hi) __A,
  2899. (__v16hi)
  2900. (_mm256_setzero_ph ()),
  2901. (__mmask16)-1);
  2902. }
  2903. extern __inline __m128h
  2904. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2905. _mm_mask_blend_ph (__mmask8 __U, __m128h __A, __m128h __W)
  2906. {
  2907. return (__m128h) __builtin_ia32_movdquhi128_mask ((__v8hi) __W,
  2908. (__v8hi) __A,
  2909. (__mmask8) __U);
  2910. }
  2911. extern __inline __m128h
  2912. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2913. _mm_permutex2var_ph (__m128h __A, __m128i __I, __m128h __B)
  2914. {
  2915. return (__m128h) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
  2916. (__v8hi) __I,
  2917. (__v8hi) __B,
  2918. (__mmask8)-1);
  2919. }
  2920. extern __inline __m128h
  2921. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2922. _mm_permutexvar_ph (__m128i __A, __m128h __B)
  2923. {
  2924. return (__m128h) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
  2925. (__v8hi) __A,
  2926. (__v8hi)
  2927. (_mm_setzero_ph ()),
  2928. (__mmask8)-1);
  2929. }
  2930. extern __inline __m256h
  2931. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2932. _mm256_set1_pch (_Float16 _Complex __A)
  2933. {
  2934. union
  2935. {
  2936. _Float16 _Complex a;
  2937. float b;
  2938. } u = { .a = __A };
  2939. return (__m256h) _mm256_set1_ps (u.b);
  2940. }
  2941. extern __inline __m128h
  2942. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2943. _mm_set1_pch (_Float16 _Complex __A)
  2944. {
  2945. union
  2946. {
  2947. _Float16 _Complex a;
  2948. float b;
  2949. } u = { .a = __A };
  2950. return (__m128h) _mm_set1_ps (u.b);
  2951. }
  2952. // intrinsics below are alias for f*mul_*ch
  2953. #define _mm_mul_pch(A, B) _mm_fmul_pch ((A), (B))
  2954. #define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch ((W), (U), (A), (B))
  2955. #define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch ((U), (A), (B))
  2956. #define _mm256_mul_pch(A, B) _mm256_fmul_pch ((A), (B))
  2957. #define _mm256_mask_mul_pch(W, U, A, B) \
  2958. _mm256_mask_fmul_pch ((W), (U), (A), (B))
  2959. #define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch ((U), (A), (B))
  2960. #define _mm_cmul_pch(A, B) _mm_fcmul_pch ((A), (B))
  2961. #define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch ((W), (U), (A), (B))
  2962. #define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch ((U), (A), (B))
  2963. #define _mm256_cmul_pch(A, B) _mm256_fcmul_pch ((A), (B))
  2964. #define _mm256_mask_cmul_pch(W, U, A, B) \
  2965. _mm256_mask_fcmul_pch ((W), (U), (A), (B))
  2966. #define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch((U), (A), (B))
  2967. #ifdef __DISABLE_AVX512FP16VL__
  2968. #undef __DISABLE_AVX512FP16VL__
  2969. #pragma GCC pop_options
  2970. #endif /* __DISABLE_AVX512FP16VL__ */
  2971. #endif /* __AVX512FP16VLINTRIN_H_INCLUDED */