avx512fp16intrin.h 210 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209
  1. /* Copyright (C) 2019-2022 Free Software Foundation, Inc.
  2. This file is part of GCC.
  3. GCC is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. GCC is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. Under Section 7 of GPL version 3, you are granted additional
  12. permissions described in the GCC Runtime Library Exception, version
  13. 3.1, as published by the Free Software Foundation.
  14. You should have received a copy of the GNU General Public License and
  15. a copy of the GCC Runtime Library Exception along with this program;
  16. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  17. <http://www.gnu.org/licenses/>. */
  18. #ifndef _IMMINTRIN_H_INCLUDED
  19. #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
  20. #endif
  21. #ifndef __AVX512FP16INTRIN_H_INCLUDED
  22. #define __AVX512FP16INTRIN_H_INCLUDED
  23. #ifndef __AVX512FP16__
  24. #pragma GCC push_options
  25. #pragma GCC target("avx512fp16")
  26. #define __DISABLE_AVX512FP16__
  27. #endif /* __AVX512FP16__ */
  28. /* Internal data types for implementing the intrinsics. */
  29. typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
  30. typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
  31. typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
  32. /* The Intel API is flexible enough that we must allow aliasing with other
  33. vector types, and their scalar components. */
  34. typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
  35. typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
  36. typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
  37. /* Unaligned version of the same type. */
  38. typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16), \
  39. __may_alias__, __aligned__ (1)));
  40. typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32), \
  41. __may_alias__, __aligned__ (1)));
  42. typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \
  43. __may_alias__, __aligned__ (1)));
  44. extern __inline __m128h
  45. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  46. _mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5,
  47. _Float16 __A4, _Float16 __A3, _Float16 __A2,
  48. _Float16 __A1, _Float16 __A0)
  49. {
  50. return __extension__ (__m128h)(__v8hf){ __A0, __A1, __A2, __A3,
  51. __A4, __A5, __A6, __A7 };
  52. }
  53. extern __inline __m256h
  54. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  55. _mm256_set_ph (_Float16 __A15, _Float16 __A14, _Float16 __A13,
  56. _Float16 __A12, _Float16 __A11, _Float16 __A10,
  57. _Float16 __A9, _Float16 __A8, _Float16 __A7,
  58. _Float16 __A6, _Float16 __A5, _Float16 __A4,
  59. _Float16 __A3, _Float16 __A2, _Float16 __A1,
  60. _Float16 __A0)
  61. {
  62. return __extension__ (__m256h)(__v16hf){ __A0, __A1, __A2, __A3,
  63. __A4, __A5, __A6, __A7,
  64. __A8, __A9, __A10, __A11,
  65. __A12, __A13, __A14, __A15 };
  66. }
  67. extern __inline __m512h
  68. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  69. _mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29,
  70. _Float16 __A28, _Float16 __A27, _Float16 __A26,
  71. _Float16 __A25, _Float16 __A24, _Float16 __A23,
  72. _Float16 __A22, _Float16 __A21, _Float16 __A20,
  73. _Float16 __A19, _Float16 __A18, _Float16 __A17,
  74. _Float16 __A16, _Float16 __A15, _Float16 __A14,
  75. _Float16 __A13, _Float16 __A12, _Float16 __A11,
  76. _Float16 __A10, _Float16 __A9, _Float16 __A8,
  77. _Float16 __A7, _Float16 __A6, _Float16 __A5,
  78. _Float16 __A4, _Float16 __A3, _Float16 __A2,
  79. _Float16 __A1, _Float16 __A0)
  80. {
  81. return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3,
  82. __A4, __A5, __A6, __A7,
  83. __A8, __A9, __A10, __A11,
  84. __A12, __A13, __A14, __A15,
  85. __A16, __A17, __A18, __A19,
  86. __A20, __A21, __A22, __A23,
  87. __A24, __A25, __A26, __A27,
  88. __A28, __A29, __A30, __A31 };
  89. }
  90. /* Create vectors of elements in the reversed order from _mm_set_ph,
  91. _mm256_set_ph and _mm512_set_ph functions. */
  92. extern __inline __m128h
  93. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  94. _mm_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
  95. _Float16 __A3, _Float16 __A4, _Float16 __A5,
  96. _Float16 __A6, _Float16 __A7)
  97. {
  98. return _mm_set_ph (__A7, __A6, __A5, __A4, __A3, __A2, __A1, __A0);
  99. }
  100. extern __inline __m256h
  101. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  102. _mm256_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
  103. _Float16 __A3, _Float16 __A4, _Float16 __A5,
  104. _Float16 __A6, _Float16 __A7, _Float16 __A8,
  105. _Float16 __A9, _Float16 __A10, _Float16 __A11,
  106. _Float16 __A12, _Float16 __A13, _Float16 __A14,
  107. _Float16 __A15)
  108. {
  109. return _mm256_set_ph (__A15, __A14, __A13, __A12, __A11, __A10, __A9,
  110. __A8, __A7, __A6, __A5, __A4, __A3, __A2, __A1,
  111. __A0);
  112. }
  113. extern __inline __m512h
  114. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  115. _mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
  116. _Float16 __A3, _Float16 __A4, _Float16 __A5,
  117. _Float16 __A6, _Float16 __A7, _Float16 __A8,
  118. _Float16 __A9, _Float16 __A10, _Float16 __A11,
  119. _Float16 __A12, _Float16 __A13, _Float16 __A14,
  120. _Float16 __A15, _Float16 __A16, _Float16 __A17,
  121. _Float16 __A18, _Float16 __A19, _Float16 __A20,
  122. _Float16 __A21, _Float16 __A22, _Float16 __A23,
  123. _Float16 __A24, _Float16 __A25, _Float16 __A26,
  124. _Float16 __A27, _Float16 __A28, _Float16 __A29,
  125. _Float16 __A30, _Float16 __A31)
  126. {
  127. return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25,
  128. __A24, __A23, __A22, __A21, __A20, __A19, __A18,
  129. __A17, __A16, __A15, __A14, __A13, __A12, __A11,
  130. __A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3,
  131. __A2, __A1, __A0);
  132. }
  133. /* Broadcast _Float16 to vector. */
  134. extern __inline __m128h
  135. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  136. _mm_set1_ph (_Float16 __A)
  137. {
  138. return _mm_set_ph (__A, __A, __A, __A, __A, __A, __A, __A);
  139. }
  140. extern __inline __m256h
  141. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  142. _mm256_set1_ph (_Float16 __A)
  143. {
  144. return _mm256_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
  145. __A, __A, __A, __A, __A, __A, __A, __A);
  146. }
  147. extern __inline __m512h
  148. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  149. _mm512_set1_ph (_Float16 __A)
  150. {
  151. return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
  152. __A, __A, __A, __A, __A, __A, __A, __A,
  153. __A, __A, __A, __A, __A, __A, __A, __A,
  154. __A, __A, __A, __A, __A, __A, __A, __A);
  155. }
  156. /* Create a vector with all zeros. */
  157. extern __inline __m128h
  158. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  159. _mm_setzero_ph (void)
  160. {
  161. return _mm_set1_ph (0.0f);
  162. }
  163. extern __inline __m256h
  164. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  165. _mm256_setzero_ph (void)
  166. {
  167. return _mm256_set1_ph (0.0f);
  168. }
  169. extern __inline __m512h
  170. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  171. _mm512_setzero_ph (void)
  172. {
  173. return _mm512_set1_ph (0.0f);
  174. }
  175. extern __inline __m128h
  176. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  177. _mm_undefined_ph (void)
  178. {
  179. __m128h __Y = __Y;
  180. return __Y;
  181. }
  182. extern __inline __m256h
  183. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  184. _mm256_undefined_ph (void)
  185. {
  186. __m256h __Y = __Y;
  187. return __Y;
  188. }
  189. extern __inline __m512h
  190. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  191. _mm512_undefined_ph (void)
  192. {
  193. __m512h __Y = __Y;
  194. return __Y;
  195. }
  196. extern __inline _Float16
  197. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  198. _mm_cvtsh_h (__m128h __A)
  199. {
  200. return __A[0];
  201. }
  202. extern __inline _Float16
  203. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  204. _mm256_cvtsh_h (__m256h __A)
  205. {
  206. return __A[0];
  207. }
  208. extern __inline _Float16
  209. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  210. _mm512_cvtsh_h (__m512h __A)
  211. {
  212. return __A[0];
  213. }
  214. extern __inline __m512
  215. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  216. _mm512_castph_ps (__m512h __a)
  217. {
  218. return (__m512) __a;
  219. }
  220. extern __inline __m512d
  221. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  222. _mm512_castph_pd (__m512h __a)
  223. {
  224. return (__m512d) __a;
  225. }
  226. extern __inline __m512i
  227. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  228. _mm512_castph_si512 (__m512h __a)
  229. {
  230. return (__m512i) __a;
  231. }
  232. extern __inline __m128h
  233. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  234. _mm512_castph512_ph128 (__m512h __A)
  235. {
  236. union
  237. {
  238. __m128h a[4];
  239. __m512h v;
  240. } u = { .v = __A };
  241. return u.a[0];
  242. }
  243. extern __inline __m256h
  244. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  245. _mm512_castph512_ph256 (__m512h __A)
  246. {
  247. union
  248. {
  249. __m256h a[2];
  250. __m512h v;
  251. } u = { .v = __A };
  252. return u.a[0];
  253. }
  254. extern __inline __m512h
  255. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  256. _mm512_castph128_ph512 (__m128h __A)
  257. {
  258. union
  259. {
  260. __m128h a[4];
  261. __m512h v;
  262. } u;
  263. u.a[0] = __A;
  264. return u.v;
  265. }
  266. extern __inline __m512h
  267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  268. _mm512_castph256_ph512 (__m256h __A)
  269. {
  270. union
  271. {
  272. __m256h a[2];
  273. __m512h v;
  274. } u;
  275. u.a[0] = __A;
  276. return u.v;
  277. }
  278. extern __inline __m512h
  279. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  280. _mm512_zextph128_ph512 (__m128h __A)
  281. {
  282. return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
  283. (__m128) __A, 0);
  284. }
  285. extern __inline __m512h
  286. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  287. _mm512_zextph256_ph512 (__m256h __A)
  288. {
  289. return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
  290. (__m256d) __A, 0);
  291. }
  292. extern __inline __m512h
  293. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  294. _mm512_castps_ph (__m512 __a)
  295. {
  296. return (__m512h) __a;
  297. }
  298. extern __inline __m512h
  299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  300. _mm512_castpd_ph (__m512d __a)
  301. {
  302. return (__m512h) __a;
  303. }
  304. extern __inline __m512h
  305. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  306. _mm512_castsi512_ph (__m512i __a)
  307. {
  308. return (__m512h) __a;
  309. }
  310. /* Create a vector with element 0 as F and the rest zero. */
  311. extern __inline __m128h
  312. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  313. _mm_set_sh (_Float16 __F)
  314. {
  315. return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, __F);
  316. }
  317. /* Create a vector with element 0 as *P and the rest zero. */
  318. extern __inline __m128h
  319. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  320. _mm_load_sh (void const *__P)
  321. {
  322. return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
  323. *(_Float16 const *) __P);
  324. }
  325. extern __inline __m512h
  326. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  327. _mm512_load_ph (void const *__P)
  328. {
  329. return *(const __m512h *) __P;
  330. }
  331. extern __inline __m256h
  332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  333. _mm256_load_ph (void const *__P)
  334. {
  335. return *(const __m256h *) __P;
  336. }
  337. extern __inline __m128h
  338. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  339. _mm_load_ph (void const *__P)
  340. {
  341. return *(const __m128h *) __P;
  342. }
  343. extern __inline __m512h
  344. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  345. _mm512_loadu_ph (void const *__P)
  346. {
  347. return *(const __m512h_u *) __P;
  348. }
  349. extern __inline __m256h
  350. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  351. _mm256_loadu_ph (void const *__P)
  352. {
  353. return *(const __m256h_u *) __P;
  354. }
  355. extern __inline __m128h
  356. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  357. _mm_loadu_ph (void const *__P)
  358. {
  359. return *(const __m128h_u *) __P;
  360. }
  361. /* Stores the lower _Float16 value. */
  362. extern __inline void
  363. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  364. _mm_store_sh (void *__P, __m128h __A)
  365. {
  366. *(_Float16 *) __P = ((__v8hf)__A)[0];
  367. }
  368. extern __inline void
  369. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  370. _mm512_store_ph (void *__P, __m512h __A)
  371. {
  372. *(__m512h *) __P = __A;
  373. }
  374. extern __inline void
  375. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  376. _mm256_store_ph (void *__P, __m256h __A)
  377. {
  378. *(__m256h *) __P = __A;
  379. }
  380. extern __inline void
  381. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  382. _mm_store_ph (void *__P, __m128h __A)
  383. {
  384. *(__m128h *) __P = __A;
  385. }
  386. extern __inline void
  387. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  388. _mm512_storeu_ph (void *__P, __m512h __A)
  389. {
  390. *(__m512h_u *) __P = __A;
  391. }
  392. extern __inline void
  393. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  394. _mm256_storeu_ph (void *__P, __m256h __A)
  395. {
  396. *(__m256h_u *) __P = __A;
  397. }
  398. extern __inline void
  399. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  400. _mm_storeu_ph (void *__P, __m128h __A)
  401. {
  402. *(__m128h_u *) __P = __A;
  403. }
  404. extern __inline __m512h
  405. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  406. _mm512_abs_ph (__m512h __A)
  407. {
  408. return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
  409. (__m512i) __A);
  410. }
  411. /* Intrinsics v[add,sub,mul,div]ph. */
  412. extern __inline __m512h
  413. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  414. _mm512_add_ph (__m512h __A, __m512h __B)
  415. {
  416. return (__m512h) ((__v32hf) __A + (__v32hf) __B);
  417. }
  418. extern __inline __m512h
  419. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  420. _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
  421. {
  422. return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
  423. }
  424. extern __inline __m512h
  425. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  426. _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
  427. {
  428. return __builtin_ia32_addph512_mask (__B, __C,
  429. _mm512_setzero_ph (), __A);
  430. }
  431. extern __inline __m512h
  432. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  433. _mm512_sub_ph (__m512h __A, __m512h __B)
  434. {
  435. return (__m512h) ((__v32hf) __A - (__v32hf) __B);
  436. }
  437. extern __inline __m512h
  438. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  439. _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
  440. {
  441. return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
  442. }
  443. extern __inline __m512h
  444. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  445. _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
  446. {
  447. return __builtin_ia32_subph512_mask (__B, __C,
  448. _mm512_setzero_ph (), __A);
  449. }
  450. extern __inline __m512h
  451. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  452. _mm512_mul_ph (__m512h __A, __m512h __B)
  453. {
  454. return (__m512h) ((__v32hf) __A * (__v32hf) __B);
  455. }
  456. extern __inline __m512h
  457. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  458. _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
  459. {
  460. return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
  461. }
  462. extern __inline __m512h
  463. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  464. _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
  465. {
  466. return __builtin_ia32_mulph512_mask (__B, __C,
  467. _mm512_setzero_ph (), __A);
  468. }
  469. extern __inline __m512h
  470. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  471. _mm512_div_ph (__m512h __A, __m512h __B)
  472. {
  473. return (__m512h) ((__v32hf) __A / (__v32hf) __B);
  474. }
  475. extern __inline __m512h
  476. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  477. _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
  478. {
  479. return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
  480. }
  481. extern __inline __m512h
  482. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  483. _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
  484. {
  485. return __builtin_ia32_divph512_mask (__B, __C,
  486. _mm512_setzero_ph (), __A);
  487. }
  488. #ifdef __OPTIMIZE__
  489. extern __inline __m512h
  490. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  491. _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
  492. {
  493. return __builtin_ia32_addph512_mask_round (__A, __B,
  494. _mm512_setzero_ph (),
  495. (__mmask32) -1, __C);
  496. }
  497. extern __inline __m512h
  498. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  499. _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
  500. __m512h __D, const int __E)
  501. {
  502. return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
  503. }
  504. extern __inline __m512h
  505. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  506. _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
  507. const int __D)
  508. {
  509. return __builtin_ia32_addph512_mask_round (__B, __C,
  510. _mm512_setzero_ph (),
  511. __A, __D);
  512. }
  513. extern __inline __m512h
  514. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  515. _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
  516. {
  517. return __builtin_ia32_subph512_mask_round (__A, __B,
  518. _mm512_setzero_ph (),
  519. (__mmask32) -1, __C);
  520. }
  521. extern __inline __m512h
  522. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  523. _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
  524. __m512h __D, const int __E)
  525. {
  526. return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
  527. }
  528. extern __inline __m512h
  529. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  530. _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
  531. const int __D)
  532. {
  533. return __builtin_ia32_subph512_mask_round (__B, __C,
  534. _mm512_setzero_ph (),
  535. __A, __D);
  536. }
  537. extern __inline __m512h
  538. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  539. _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
  540. {
  541. return __builtin_ia32_mulph512_mask_round (__A, __B,
  542. _mm512_setzero_ph (),
  543. (__mmask32) -1, __C);
  544. }
  545. extern __inline __m512h
  546. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  547. _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
  548. __m512h __D, const int __E)
  549. {
  550. return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
  551. }
  552. extern __inline __m512h
  553. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  554. _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
  555. const int __D)
  556. {
  557. return __builtin_ia32_mulph512_mask_round (__B, __C,
  558. _mm512_setzero_ph (),
  559. __A, __D);
  560. }
  561. extern __inline __m512h
  562. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  563. _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
  564. {
  565. return __builtin_ia32_divph512_mask_round (__A, __B,
  566. _mm512_setzero_ph (),
  567. (__mmask32) -1, __C);
  568. }
  569. extern __inline __m512h
  570. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  571. _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
  572. __m512h __D, const int __E)
  573. {
  574. return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
  575. }
  576. extern __inline __m512h
  577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  578. _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
  579. const int __D)
  580. {
  581. return __builtin_ia32_divph512_mask_round (__B, __C,
  582. _mm512_setzero_ph (),
  583. __A, __D);
  584. }
  585. #else
  586. #define _mm512_add_round_ph(A, B, C) \
  587. ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), \
  588. _mm512_setzero_ph (), \
  589. (__mmask32)-1, (C)))
  590. #define _mm512_mask_add_round_ph(A, B, C, D, E) \
  591. ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
  592. #define _mm512_maskz_add_round_ph(A, B, C, D) \
  593. ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), \
  594. _mm512_setzero_ph (), \
  595. (A), (D)))
  596. #define _mm512_sub_round_ph(A, B, C) \
  597. ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), \
  598. _mm512_setzero_ph (), \
  599. (__mmask32)-1, (C)))
  600. #define _mm512_mask_sub_round_ph(A, B, C, D, E) \
  601. ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
  602. #define _mm512_maskz_sub_round_ph(A, B, C, D) \
  603. ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), \
  604. _mm512_setzero_ph (), \
  605. (A), (D)))
  606. #define _mm512_mul_round_ph(A, B, C) \
  607. ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), \
  608. _mm512_setzero_ph (), \
  609. (__mmask32)-1, (C)))
  610. #define _mm512_mask_mul_round_ph(A, B, C, D, E) \
  611. ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
  612. #define _mm512_maskz_mul_round_ph(A, B, C, D) \
  613. ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), \
  614. _mm512_setzero_ph (), \
  615. (A), (D)))
  616. #define _mm512_div_round_ph(A, B, C) \
  617. ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), \
  618. _mm512_setzero_ph (), \
  619. (__mmask32)-1, (C)))
  620. #define _mm512_mask_div_round_ph(A, B, C, D, E) \
  621. ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
  622. #define _mm512_maskz_div_round_ph(A, B, C, D) \
  623. ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), \
  624. _mm512_setzero_ph (), \
  625. (A), (D)))
  626. #endif /* __OPTIMIZE__ */
  627. extern __inline __m512h
  628. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  629. _mm512_conj_pch (__m512h __A)
  630. {
  631. return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31));
  632. }
  633. extern __inline __m512h
  634. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  635. _mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A)
  636. {
  637. return (__m512h)
  638. __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
  639. (__v16sf) __W,
  640. (__mmask16) __U);
  641. }
  642. extern __inline __m512h
  643. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  644. _mm512_maskz_conj_pch (__mmask16 __U, __m512h __A)
  645. {
  646. return (__m512h)
  647. __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
  648. (__v16sf) _mm512_setzero_ps (),
  649. (__mmask16) __U);
  650. }
  651. /* Intrinsics of v[add,sub,mul,div]sh. */
  652. extern __inline __m128h
  653. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  654. _mm_add_sh (__m128h __A, __m128h __B)
  655. {
  656. __A[0] += __B[0];
  657. return __A;
  658. }
  659. extern __inline __m128h
  660. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  661. _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  662. {
  663. return __builtin_ia32_addsh_mask (__C, __D, __A, __B);
  664. }
  665. extern __inline __m128h
  666. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  667. _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C)
  668. {
  669. return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (),
  670. __A);
  671. }
  672. extern __inline __m128h
  673. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  674. _mm_sub_sh (__m128h __A, __m128h __B)
  675. {
  676. __A[0] -= __B[0];
  677. return __A;
  678. }
  679. extern __inline __m128h
  680. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  681. _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  682. {
  683. return __builtin_ia32_subsh_mask (__C, __D, __A, __B);
  684. }
  685. extern __inline __m128h
  686. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  687. _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C)
  688. {
  689. return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (),
  690. __A);
  691. }
  692. extern __inline __m128h
  693. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  694. _mm_mul_sh (__m128h __A, __m128h __B)
  695. {
  696. __A[0] *= __B[0];
  697. return __A;
  698. }
  699. extern __inline __m128h
  700. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  701. _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  702. {
  703. return __builtin_ia32_mulsh_mask (__C, __D, __A, __B);
  704. }
  705. extern __inline __m128h
  706. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  707. _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C)
  708. {
  709. return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A);
  710. }
  711. extern __inline __m128h
  712. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  713. _mm_div_sh (__m128h __A, __m128h __B)
  714. {
  715. __A[0] /= __B[0];
  716. return __A;
  717. }
  718. extern __inline __m128h
  719. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  720. _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  721. {
  722. return __builtin_ia32_divsh_mask (__C, __D, __A, __B);
  723. }
  724. extern __inline __m128h
  725. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  726. _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C)
  727. {
  728. return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (),
  729. __A);
  730. }
  731. #ifdef __OPTIMIZE__
  732. extern __inline __m128h
  733. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  734. _mm_add_round_sh (__m128h __A, __m128h __B, const int __C)
  735. {
  736. return __builtin_ia32_addsh_mask_round (__A, __B,
  737. _mm_setzero_ph (),
  738. (__mmask8) -1, __C);
  739. }
  740. extern __inline __m128h
  741. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  742. _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
  743. __m128h __D, const int __E)
  744. {
  745. return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E);
  746. }
  747. extern __inline __m128h
  748. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  749. _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
  750. const int __D)
  751. {
  752. return __builtin_ia32_addsh_mask_round (__B, __C,
  753. _mm_setzero_ph (),
  754. __A, __D);
  755. }
  756. extern __inline __m128h
  757. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  758. _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C)
  759. {
  760. return __builtin_ia32_subsh_mask_round (__A, __B,
  761. _mm_setzero_ph (),
  762. (__mmask8) -1, __C);
  763. }
  764. extern __inline __m128h
  765. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  766. _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
  767. __m128h __D, const int __E)
  768. {
  769. return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E);
  770. }
  771. extern __inline __m128h
  772. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  773. _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
  774. const int __D)
  775. {
  776. return __builtin_ia32_subsh_mask_round (__B, __C,
  777. _mm_setzero_ph (),
  778. __A, __D);
  779. }
  780. extern __inline __m128h
  781. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  782. _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C)
  783. {
  784. return __builtin_ia32_mulsh_mask_round (__A, __B,
  785. _mm_setzero_ph (),
  786. (__mmask8) -1, __C);
  787. }
  788. extern __inline __m128h
  789. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  790. _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
  791. __m128h __D, const int __E)
  792. {
  793. return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E);
  794. }
  795. extern __inline __m128h
  796. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  797. _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
  798. const int __D)
  799. {
  800. return __builtin_ia32_mulsh_mask_round (__B, __C,
  801. _mm_setzero_ph (),
  802. __A, __D);
  803. }
  804. extern __inline __m128h
  805. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  806. _mm_div_round_sh (__m128h __A, __m128h __B, const int __C)
  807. {
  808. return __builtin_ia32_divsh_mask_round (__A, __B,
  809. _mm_setzero_ph (),
  810. (__mmask8) -1, __C);
  811. }
  812. extern __inline __m128h
  813. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  814. _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
  815. __m128h __D, const int __E)
  816. {
  817. return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E);
  818. }
  819. extern __inline __m128h
  820. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  821. _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
  822. const int __D)
  823. {
  824. return __builtin_ia32_divsh_mask_round (__B, __C,
  825. _mm_setzero_ph (),
  826. __A, __D);
  827. }
  828. #else
  829. #define _mm_add_round_sh(A, B, C) \
  830. ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B), \
  831. _mm_setzero_ph (), \
  832. (__mmask8)-1, (C)))
  833. #define _mm_mask_add_round_sh(A, B, C, D, E) \
  834. ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
  835. #define _mm_maskz_add_round_sh(A, B, C, D) \
  836. ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C), \
  837. _mm_setzero_ph (), \
  838. (A), (D)))
  839. #define _mm_sub_round_sh(A, B, C) \
  840. ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B), \
  841. _mm_setzero_ph (), \
  842. (__mmask8)-1, (C)))
  843. #define _mm_mask_sub_round_sh(A, B, C, D, E) \
  844. ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
  845. #define _mm_maskz_sub_round_sh(A, B, C, D) \
  846. ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C), \
  847. _mm_setzero_ph (), \
  848. (A), (D)))
  849. #define _mm_mul_round_sh(A, B, C) \
  850. ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B), \
  851. _mm_setzero_ph (), \
  852. (__mmask8)-1, (C)))
  853. #define _mm_mask_mul_round_sh(A, B, C, D, E) \
  854. ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
  855. #define _mm_maskz_mul_round_sh(A, B, C, D) \
  856. ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C), \
  857. _mm_setzero_ph (), \
  858. (A), (D)))
  859. #define _mm_div_round_sh(A, B, C) \
  860. ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B), \
  861. _mm_setzero_ph (), \
  862. (__mmask8)-1, (C)))
  863. #define _mm_mask_div_round_sh(A, B, C, D, E) \
  864. ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
  865. #define _mm_maskz_div_round_sh(A, B, C, D) \
  866. ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C), \
  867. _mm_setzero_ph (), \
  868. (A), (D)))
  869. #endif /* __OPTIMIZE__ */
  870. /* Intrinsic vmaxph vminph. */
  871. extern __inline __m512h
  872. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  873. _mm512_max_ph (__m512h __A, __m512h __B)
  874. {
  875. return __builtin_ia32_maxph512_mask (__A, __B,
  876. _mm512_setzero_ph (),
  877. (__mmask32) -1);
  878. }
  879. extern __inline __m512h
  880. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  881. _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
  882. {
  883. return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
  884. }
  885. extern __inline __m512h
  886. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  887. _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
  888. {
  889. return __builtin_ia32_maxph512_mask (__B, __C,
  890. _mm512_setzero_ph (), __A);
  891. }
  892. extern __inline __m512h
  893. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  894. _mm512_min_ph (__m512h __A, __m512h __B)
  895. {
  896. return __builtin_ia32_minph512_mask (__A, __B,
  897. _mm512_setzero_ph (),
  898. (__mmask32) -1);
  899. }
  900. extern __inline __m512h
  901. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  902. _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
  903. {
  904. return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
  905. }
  906. extern __inline __m512h
  907. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  908. _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
  909. {
  910. return __builtin_ia32_minph512_mask (__B, __C,
  911. _mm512_setzero_ph (), __A);
  912. }
  913. #ifdef __OPTIMIZE__
  914. extern __inline __m512h
  915. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  916. _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
  917. {
  918. return __builtin_ia32_maxph512_mask_round (__A, __B,
  919. _mm512_setzero_ph (),
  920. (__mmask32) -1, __C);
  921. }
  922. extern __inline __m512h
  923. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  924. _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
  925. __m512h __D, const int __E)
  926. {
  927. return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
  928. }
  929. extern __inline __m512h
  930. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  931. _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
  932. const int __D)
  933. {
  934. return __builtin_ia32_maxph512_mask_round (__B, __C,
  935. _mm512_setzero_ph (),
  936. __A, __D);
  937. }
  938. extern __inline __m512h
  939. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  940. _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
  941. {
  942. return __builtin_ia32_minph512_mask_round (__A, __B,
  943. _mm512_setzero_ph (),
  944. (__mmask32) -1, __C);
  945. }
  946. extern __inline __m512h
  947. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  948. _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
  949. __m512h __D, const int __E)
  950. {
  951. return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
  952. }
  953. extern __inline __m512h
  954. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  955. _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
  956. const int __D)
  957. {
  958. return __builtin_ia32_minph512_mask_round (__B, __C,
  959. _mm512_setzero_ph (),
  960. __A, __D);
  961. }
  962. #else
  963. #define _mm512_max_round_ph(A, B, C) \
  964. (__builtin_ia32_maxph512_mask_round ((A), (B), \
  965. _mm512_setzero_ph (), \
  966. (__mmask32)-1, (C)))
  967. #define _mm512_mask_max_round_ph(A, B, C, D, E) \
  968. (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
  969. #define _mm512_maskz_max_round_ph(A, B, C, D) \
  970. (__builtin_ia32_maxph512_mask_round ((B), (C), \
  971. _mm512_setzero_ph (), \
  972. (A), (D)))
  973. #define _mm512_min_round_ph(A, B, C) \
  974. (__builtin_ia32_minph512_mask_round ((A), (B), \
  975. _mm512_setzero_ph (), \
  976. (__mmask32)-1, (C)))
  977. #define _mm512_mask_min_round_ph(A, B, C, D, E) \
  978. (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
  979. #define _mm512_maskz_min_round_ph(A, B, C, D) \
  980. (__builtin_ia32_minph512_mask_round ((B), (C), \
  981. _mm512_setzero_ph (), \
  982. (A), (D)))
  983. #endif /* __OPTIMIZE__ */
  984. /* Intrinsic vmaxsh vminsh. */
  985. extern __inline __m128h
  986. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  987. _mm_max_sh (__m128h __A, __m128h __B)
  988. {
  989. __A[0] = __A[0] > __B[0] ? __A[0] : __B[0];
  990. return __A;
  991. }
  992. extern __inline __m128h
  993. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  994. _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  995. {
  996. return __builtin_ia32_maxsh_mask (__C, __D, __A, __B);
  997. }
  998. extern __inline __m128h
  999. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1000. _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C)
  1001. {
  1002. return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (),
  1003. __A);
  1004. }
  1005. extern __inline __m128h
  1006. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1007. _mm_min_sh (__m128h __A, __m128h __B)
  1008. {
  1009. __A[0] = __A[0] < __B[0] ? __A[0] : __B[0];
  1010. return __A;
  1011. }
  1012. extern __inline __m128h
  1013. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1014. _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  1015. {
  1016. return __builtin_ia32_minsh_mask (__C, __D, __A, __B);
  1017. }
  1018. extern __inline __m128h
  1019. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1020. _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C)
  1021. {
  1022. return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (),
  1023. __A);
  1024. }
  1025. #ifdef __OPTIMIZE__
  1026. extern __inline __m128h
  1027. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1028. _mm_max_round_sh (__m128h __A, __m128h __B, const int __C)
  1029. {
  1030. return __builtin_ia32_maxsh_mask_round (__A, __B,
  1031. _mm_setzero_ph (),
  1032. (__mmask8) -1, __C);
  1033. }
  1034. extern __inline __m128h
  1035. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1036. _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
  1037. __m128h __D, const int __E)
  1038. {
  1039. return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E);
  1040. }
  1041. extern __inline __m128h
  1042. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1043. _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
  1044. const int __D)
  1045. {
  1046. return __builtin_ia32_maxsh_mask_round (__B, __C,
  1047. _mm_setzero_ph (),
  1048. __A, __D);
  1049. }
  1050. extern __inline __m128h
  1051. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1052. _mm_min_round_sh (__m128h __A, __m128h __B, const int __C)
  1053. {
  1054. return __builtin_ia32_minsh_mask_round (__A, __B,
  1055. _mm_setzero_ph (),
  1056. (__mmask8) -1, __C);
  1057. }
  1058. extern __inline __m128h
  1059. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1060. _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
  1061. __m128h __D, const int __E)
  1062. {
  1063. return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E);
  1064. }
  1065. extern __inline __m128h
  1066. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1067. _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
  1068. const int __D)
  1069. {
  1070. return __builtin_ia32_minsh_mask_round (__B, __C,
  1071. _mm_setzero_ph (),
  1072. __A, __D);
  1073. }
  1074. #else
  1075. #define _mm_max_round_sh(A, B, C) \
  1076. (__builtin_ia32_maxsh_mask_round ((A), (B), \
  1077. _mm_setzero_ph (), \
  1078. (__mmask8)-1, (C)))
  1079. #define _mm_mask_max_round_sh(A, B, C, D, E) \
  1080. (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
  1081. #define _mm_maskz_max_round_sh(A, B, C, D) \
  1082. (__builtin_ia32_maxsh_mask_round ((B), (C), \
  1083. _mm_setzero_ph (), \
  1084. (A), (D)))
  1085. #define _mm_min_round_sh(A, B, C) \
  1086. (__builtin_ia32_minsh_mask_round ((A), (B), \
  1087. _mm_setzero_ph (), \
  1088. (__mmask8)-1, (C)))
  1089. #define _mm_mask_min_round_sh(A, B, C, D, E) \
  1090. (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
  1091. #define _mm_maskz_min_round_sh(A, B, C, D) \
  1092. (__builtin_ia32_minsh_mask_round ((B), (C), \
  1093. _mm_setzero_ph (), \
  1094. (A), (D)))
  1095. #endif /* __OPTIMIZE__ */
  1096. /* vcmpph */
  1097. #ifdef __OPTIMIZE
  1098. extern __inline __mmask32
  1099. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1100. _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
  1101. {
  1102. return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
  1103. (__mmask32) -1);
  1104. }
  1105. extern __inline __mmask32
  1106. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1107. _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
  1108. const int __D)
  1109. {
  1110. return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
  1111. __A);
  1112. }
  1113. extern __inline __mmask32
  1114. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1115. _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
  1116. const int __D)
  1117. {
  1118. return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
  1119. __C, (__mmask32) -1,
  1120. __D);
  1121. }
  1122. extern __inline __mmask32
  1123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1124. _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
  1125. const int __D, const int __E)
  1126. {
  1127. return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
  1128. __D, __A,
  1129. __E);
  1130. }
  1131. #else
  1132. #define _mm512_cmp_ph_mask(A, B, C) \
  1133. (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
  1134. #define _mm512_mask_cmp_ph_mask(A, B, C, D) \
  1135. (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
  1136. #define _mm512_cmp_round_ph_mask(A, B, C, D) \
  1137. (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
  1138. #define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) \
  1139. (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
  1140. #endif /* __OPTIMIZE__ */
  1141. /* Intrinsics vcmpsh. */
  1142. #ifdef __OPTIMIZE__
  1143. extern __inline __mmask8
  1144. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1145. _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C)
  1146. {
  1147. return (__mmask8)
  1148. __builtin_ia32_cmpsh_mask_round (__A, __B,
  1149. __C, (__mmask8) -1,
  1150. _MM_FROUND_CUR_DIRECTION);
  1151. }
  1152. extern __inline __mmask8
  1153. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1154. _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
  1155. const int __D)
  1156. {
  1157. return (__mmask8)
  1158. __builtin_ia32_cmpsh_mask_round (__B, __C,
  1159. __D, __A,
  1160. _MM_FROUND_CUR_DIRECTION);
  1161. }
  1162. extern __inline __mmask8
  1163. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1164. _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C,
  1165. const int __D)
  1166. {
  1167. return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B,
  1168. __C, (__mmask8) -1,
  1169. __D);
  1170. }
  1171. extern __inline __mmask8
  1172. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1173. _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
  1174. const int __D, const int __E)
  1175. {
  1176. return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C,
  1177. __D, __A,
  1178. __E);
  1179. }
  1180. #else
  1181. #define _mm_cmp_sh_mask(A, B, C) \
  1182. (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), \
  1183. (_MM_FROUND_CUR_DIRECTION)))
  1184. #define _mm_mask_cmp_sh_mask(A, B, C, D) \
  1185. (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), \
  1186. (_MM_FROUND_CUR_DIRECTION)))
  1187. #define _mm_cmp_round_sh_mask(A, B, C, D) \
  1188. (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
  1189. #define _mm_mask_cmp_round_sh_mask(A, B, C, D, E) \
  1190. (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
  1191. #endif /* __OPTIMIZE__ */
  1192. /* Intrinsics vcomish. */
  1193. extern __inline int
  1194. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1195. _mm_comieq_sh (__m128h __A, __m128h __B)
  1196. {
  1197. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS,
  1198. (__mmask8) -1,
  1199. _MM_FROUND_CUR_DIRECTION);
  1200. }
  1201. extern __inline int
  1202. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1203. _mm_comilt_sh (__m128h __A, __m128h __B)
  1204. {
  1205. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS,
  1206. (__mmask8) -1,
  1207. _MM_FROUND_CUR_DIRECTION);
  1208. }
  1209. extern __inline int
  1210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1211. _mm_comile_sh (__m128h __A, __m128h __B)
  1212. {
  1213. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS,
  1214. (__mmask8) -1,
  1215. _MM_FROUND_CUR_DIRECTION);
  1216. }
  1217. extern __inline int
  1218. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1219. _mm_comigt_sh (__m128h __A, __m128h __B)
  1220. {
  1221. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS,
  1222. (__mmask8) -1,
  1223. _MM_FROUND_CUR_DIRECTION);
  1224. }
  1225. extern __inline int
  1226. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1227. _mm_comige_sh (__m128h __A, __m128h __B)
  1228. {
  1229. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS,
  1230. (__mmask8) -1,
  1231. _MM_FROUND_CUR_DIRECTION);
  1232. }
  1233. extern __inline int
  1234. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1235. _mm_comineq_sh (__m128h __A, __m128h __B)
  1236. {
  1237. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US,
  1238. (__mmask8) -1,
  1239. _MM_FROUND_CUR_DIRECTION);
  1240. }
  1241. extern __inline int
  1242. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1243. _mm_ucomieq_sh (__m128h __A, __m128h __B)
  1244. {
  1245. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ,
  1246. (__mmask8) -1,
  1247. _MM_FROUND_CUR_DIRECTION);
  1248. }
  1249. extern __inline int
  1250. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1251. _mm_ucomilt_sh (__m128h __A, __m128h __B)
  1252. {
  1253. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ,
  1254. (__mmask8) -1,
  1255. _MM_FROUND_CUR_DIRECTION);
  1256. }
  1257. extern __inline int
  1258. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1259. _mm_ucomile_sh (__m128h __A, __m128h __B)
  1260. {
  1261. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ,
  1262. (__mmask8) -1,
  1263. _MM_FROUND_CUR_DIRECTION);
  1264. }
  1265. extern __inline int
  1266. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1267. _mm_ucomigt_sh (__m128h __A, __m128h __B)
  1268. {
  1269. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ,
  1270. (__mmask8) -1,
  1271. _MM_FROUND_CUR_DIRECTION);
  1272. }
  1273. extern __inline int
  1274. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1275. _mm_ucomige_sh (__m128h __A, __m128h __B)
  1276. {
  1277. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ,
  1278. (__mmask8) -1,
  1279. _MM_FROUND_CUR_DIRECTION);
  1280. }
  1281. extern __inline int
  1282. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1283. _mm_ucomineq_sh (__m128h __A, __m128h __B)
  1284. {
  1285. return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ,
  1286. (__mmask8) -1,
  1287. _MM_FROUND_CUR_DIRECTION);
  1288. }
  1289. #ifdef __OPTIMIZE__
  1290. extern __inline int
  1291. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1292. _mm_comi_sh (__m128h __A, __m128h __B, const int __P)
  1293. {
  1294. return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
  1295. (__mmask8) -1,
  1296. _MM_FROUND_CUR_DIRECTION);
  1297. }
  1298. extern __inline int
  1299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1300. _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
  1301. {
  1302. return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
  1303. (__mmask8) -1,__R);
  1304. }
  1305. #else
  1306. #define _mm_comi_round_sh(A, B, P, R) \
  1307. (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
  1308. #define _mm_comi_sh(A, B, P) \
  1309. (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), \
  1310. _MM_FROUND_CUR_DIRECTION))
  1311. #endif /* __OPTIMIZE__ */
  1312. /* Intrinsics vsqrtph. */
  1313. extern __inline __m512h
  1314. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1315. _mm512_sqrt_ph (__m512h __A)
  1316. {
  1317. return __builtin_ia32_sqrtph512_mask_round (__A,
  1318. _mm512_setzero_ph(),
  1319. (__mmask32) -1,
  1320. _MM_FROUND_CUR_DIRECTION);
  1321. }
  1322. extern __inline __m512h
  1323. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1324. _mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
  1325. {
  1326. return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
  1327. _MM_FROUND_CUR_DIRECTION);
  1328. }
  1329. extern __inline __m512h
  1330. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1331. _mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
  1332. {
  1333. return __builtin_ia32_sqrtph512_mask_round (__B,
  1334. _mm512_setzero_ph (),
  1335. __A,
  1336. _MM_FROUND_CUR_DIRECTION);
  1337. }
  1338. #ifdef __OPTIMIZE__
  1339. extern __inline __m512h
  1340. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1341. _mm512_sqrt_round_ph (__m512h __A, const int __B)
  1342. {
  1343. return __builtin_ia32_sqrtph512_mask_round (__A,
  1344. _mm512_setzero_ph(),
  1345. (__mmask32) -1, __B);
  1346. }
  1347. extern __inline __m512h
  1348. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1349. _mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
  1350. const int __D)
  1351. {
  1352. return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
  1353. }
  1354. extern __inline __m512h
  1355. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1356. _mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
  1357. {
  1358. return __builtin_ia32_sqrtph512_mask_round (__B,
  1359. _mm512_setzero_ph (),
  1360. __A, __C);
  1361. }
  1362. #else
  1363. #define _mm512_sqrt_round_ph(A, B) \
  1364. (__builtin_ia32_sqrtph512_mask_round ((A), \
  1365. _mm512_setzero_ph (), \
  1366. (__mmask32)-1, (B)))
  1367. #define _mm512_mask_sqrt_round_ph(A, B, C, D) \
  1368. (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
  1369. #define _mm512_maskz_sqrt_round_ph(A, B, C) \
  1370. (__builtin_ia32_sqrtph512_mask_round ((B), \
  1371. _mm512_setzero_ph (), \
  1372. (A), (C)))
  1373. #endif /* __OPTIMIZE__ */
  1374. /* Intrinsics vrsqrtph. */
  1375. extern __inline __m512h
  1376. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1377. _mm512_rsqrt_ph (__m512h __A)
  1378. {
  1379. return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
  1380. (__mmask32) -1);
  1381. }
  1382. extern __inline __m512h
  1383. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1384. _mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
  1385. {
  1386. return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
  1387. }
  1388. extern __inline __m512h
  1389. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1390. _mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
  1391. {
  1392. return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
  1393. __A);
  1394. }
  1395. /* Intrinsics vrsqrtsh. */
  1396. extern __inline __m128h
  1397. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1398. _mm_rsqrt_sh (__m128h __A, __m128h __B)
  1399. {
  1400. return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
  1401. (__mmask8) -1);
  1402. }
  1403. extern __inline __m128h
  1404. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1405. _mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  1406. {
  1407. return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
  1408. }
  1409. extern __inline __m128h
  1410. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1411. _mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
  1412. {
  1413. return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
  1414. __A);
  1415. }
  1416. /* Intrinsics vsqrtsh. */
  1417. extern __inline __m128h
  1418. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1419. _mm_sqrt_sh (__m128h __A, __m128h __B)
  1420. {
  1421. return __builtin_ia32_sqrtsh_mask_round (__B, __A,
  1422. _mm_setzero_ph (),
  1423. (__mmask8) -1,
  1424. _MM_FROUND_CUR_DIRECTION);
  1425. }
  1426. extern __inline __m128h
  1427. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1428. _mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  1429. {
  1430. return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
  1431. _MM_FROUND_CUR_DIRECTION);
  1432. }
  1433. extern __inline __m128h
  1434. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1435. _mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
  1436. {
  1437. return __builtin_ia32_sqrtsh_mask_round (__C, __B,
  1438. _mm_setzero_ph (),
  1439. __A, _MM_FROUND_CUR_DIRECTION);
  1440. }
  1441. #ifdef __OPTIMIZE__
  1442. extern __inline __m128h
  1443. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1444. _mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C)
  1445. {
  1446. return __builtin_ia32_sqrtsh_mask_round (__B, __A,
  1447. _mm_setzero_ph (),
  1448. (__mmask8) -1, __C);
  1449. }
  1450. extern __inline __m128h
  1451. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1452. _mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
  1453. __m128h __D, const int __E)
  1454. {
  1455. return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
  1456. __E);
  1457. }
  1458. extern __inline __m128h
  1459. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1460. _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
  1461. const int __D)
  1462. {
  1463. return __builtin_ia32_sqrtsh_mask_round (__C, __B,
  1464. _mm_setzero_ph (),
  1465. __A, __D);
  1466. }
  1467. #else
  1468. #define _mm_sqrt_round_sh(A, B, C) \
  1469. (__builtin_ia32_sqrtsh_mask_round ((B), (A), \
  1470. _mm_setzero_ph (), \
  1471. (__mmask8)-1, (C)))
  1472. #define _mm_mask_sqrt_round_sh(A, B, C, D, E) \
  1473. (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
  1474. #define _mm_maskz_sqrt_round_sh(A, B, C, D) \
  1475. (__builtin_ia32_sqrtsh_mask_round ((C), (B), \
  1476. _mm_setzero_ph (), \
  1477. (A), (D)))
  1478. #endif /* __OPTIMIZE__ */
  1479. /* Intrinsics vrcpph. */
  1480. extern __inline __m512h
  1481. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1482. _mm512_rcp_ph (__m512h __A)
  1483. {
  1484. return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
  1485. (__mmask32) -1);
  1486. }
  1487. extern __inline __m512h
  1488. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1489. _mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
  1490. {
  1491. return __builtin_ia32_rcpph512_mask (__C, __A, __B);
  1492. }
  1493. extern __inline __m512h
  1494. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1495. _mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
  1496. {
  1497. return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
  1498. __A);
  1499. }
  1500. /* Intrinsics vrcpsh. */
  1501. extern __inline __m128h
  1502. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1503. _mm_rcp_sh (__m128h __A, __m128h __B)
  1504. {
  1505. return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (),
  1506. (__mmask8) -1);
  1507. }
  1508. extern __inline __m128h
  1509. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1510. _mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
  1511. {
  1512. return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B);
  1513. }
  1514. extern __inline __m128h
  1515. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1516. _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
  1517. {
  1518. return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (),
  1519. __A);
  1520. }
  1521. /* Intrinsics vscalefph. */
  1522. extern __inline __m512h
  1523. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1524. _mm512_scalef_ph (__m512h __A, __m512h __B)
  1525. {
  1526. return __builtin_ia32_scalefph512_mask_round (__A, __B,
  1527. _mm512_setzero_ph (),
  1528. (__mmask32) -1,
  1529. _MM_FROUND_CUR_DIRECTION);
  1530. }
  1531. extern __inline __m512h
  1532. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1533. _mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
  1534. {
  1535. return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
  1536. _MM_FROUND_CUR_DIRECTION);
  1537. }
  1538. extern __inline __m512h
  1539. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1540. _mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
  1541. {
  1542. return __builtin_ia32_scalefph512_mask_round (__B, __C,
  1543. _mm512_setzero_ph (),
  1544. __A,
  1545. _MM_FROUND_CUR_DIRECTION);
  1546. }
  1547. #ifdef __OPTIMIZE__
  1548. extern __inline __m512h
  1549. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1550. _mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
  1551. {
  1552. return __builtin_ia32_scalefph512_mask_round (__A, __B,
  1553. _mm512_setzero_ph (),
  1554. (__mmask32) -1, __C);
  1555. }
  1556. extern __inline __m512h
  1557. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1558. _mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
  1559. __m512h __D, const int __E)
  1560. {
  1561. return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
  1562. __E);
  1563. }
  1564. extern __inline __m512h
  1565. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1566. _mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
  1567. const int __D)
  1568. {
  1569. return __builtin_ia32_scalefph512_mask_round (__B, __C,
  1570. _mm512_setzero_ph (),
  1571. __A, __D);
  1572. }
  1573. #else
  1574. #define _mm512_scalef_round_ph(A, B, C) \
  1575. (__builtin_ia32_scalefph512_mask_round ((A), (B), \
  1576. _mm512_setzero_ph (), \
  1577. (__mmask32)-1, (C)))
  1578. #define _mm512_mask_scalef_round_ph(A, B, C, D, E) \
  1579. (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
  1580. #define _mm512_maskz_scalef_round_ph(A, B, C, D) \
  1581. (__builtin_ia32_scalefph512_mask_round ((B), (C), \
  1582. _mm512_setzero_ph (), \
  1583. (A), (D)))
  1584. #endif /* __OPTIMIZE__ */
  1585. /* Intrinsics vscalefsh. */
  1586. extern __inline __m128h
  1587. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1588. _mm_scalef_sh (__m128h __A, __m128h __B)
  1589. {
  1590. return __builtin_ia32_scalefsh_mask_round (__A, __B,
  1591. _mm_setzero_ph (),
  1592. (__mmask8) -1,
  1593. _MM_FROUND_CUR_DIRECTION);
  1594. }
  1595. extern __inline __m128h
  1596. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1597. _mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  1598. {
  1599. return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
  1600. _MM_FROUND_CUR_DIRECTION);
  1601. }
  1602. extern __inline __m128h
  1603. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1604. _mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
  1605. {
  1606. return __builtin_ia32_scalefsh_mask_round (__B, __C,
  1607. _mm_setzero_ph (),
  1608. __A,
  1609. _MM_FROUND_CUR_DIRECTION);
  1610. }
  1611. #ifdef __OPTIMIZE__
  1612. extern __inline __m128h
  1613. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1614. _mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
  1615. {
  1616. return __builtin_ia32_scalefsh_mask_round (__A, __B,
  1617. _mm_setzero_ph (),
  1618. (__mmask8) -1, __C);
  1619. }
  1620. extern __inline __m128h
  1621. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1622. _mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
  1623. __m128h __D, const int __E)
  1624. {
  1625. return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
  1626. __E);
  1627. }
  1628. extern __inline __m128h
  1629. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1630. _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
  1631. const int __D)
  1632. {
  1633. return __builtin_ia32_scalefsh_mask_round (__B, __C,
  1634. _mm_setzero_ph (),
  1635. __A, __D);
  1636. }
  1637. #else
  1638. #define _mm_scalef_round_sh(A, B, C) \
  1639. (__builtin_ia32_scalefsh_mask_round ((A), (B), \
  1640. _mm_setzero_ph (), \
  1641. (__mmask8)-1, (C)))
  1642. #define _mm_mask_scalef_round_sh(A, B, C, D, E) \
  1643. (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
  1644. #define _mm_maskz_scalef_round_sh(A, B, C, D) \
  1645. (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (), \
  1646. (A), (D)))
  1647. #endif /* __OPTIMIZE__ */
  1648. /* Intrinsics vreduceph. */
  1649. #ifdef __OPTIMIZE__
  1650. extern __inline __m512h
  1651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1652. _mm512_reduce_ph (__m512h __A, int __B)
  1653. {
  1654. return __builtin_ia32_reduceph512_mask_round (__A, __B,
  1655. _mm512_setzero_ph (),
  1656. (__mmask32) -1,
  1657. _MM_FROUND_CUR_DIRECTION);
  1658. }
  1659. extern __inline __m512h
  1660. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1661. _mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
  1662. {
  1663. return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
  1664. _MM_FROUND_CUR_DIRECTION);
  1665. }
  1666. extern __inline __m512h
  1667. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1668. _mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
  1669. {
  1670. return __builtin_ia32_reduceph512_mask_round (__B, __C,
  1671. _mm512_setzero_ph (),
  1672. __A,
  1673. _MM_FROUND_CUR_DIRECTION);
  1674. }
  1675. extern __inline __m512h
  1676. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1677. _mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
  1678. {
  1679. return __builtin_ia32_reduceph512_mask_round (__A, __B,
  1680. _mm512_setzero_ph (),
  1681. (__mmask32) -1, __C);
  1682. }
  1683. extern __inline __m512h
  1684. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1685. _mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
  1686. int __D, const int __E)
  1687. {
  1688. return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
  1689. __E);
  1690. }
  1691. extern __inline __m512h
  1692. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1693. _mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
  1694. const int __D)
  1695. {
  1696. return __builtin_ia32_reduceph512_mask_round (__B, __C,
  1697. _mm512_setzero_ph (),
  1698. __A, __D);
  1699. }
  1700. #else
  1701. #define _mm512_reduce_ph(A, B) \
  1702. (__builtin_ia32_reduceph512_mask_round ((A), (B), \
  1703. _mm512_setzero_ph (), \
  1704. (__mmask32)-1, \
  1705. _MM_FROUND_CUR_DIRECTION))
  1706. #define _mm512_mask_reduce_ph(A, B, C, D) \
  1707. (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), \
  1708. _MM_FROUND_CUR_DIRECTION))
  1709. #define _mm512_maskz_reduce_ph(A, B, C) \
  1710. (__builtin_ia32_reduceph512_mask_round ((B), (C), \
  1711. _mm512_setzero_ph (), \
  1712. (A), _MM_FROUND_CUR_DIRECTION))
  1713. #define _mm512_reduce_round_ph(A, B, C) \
  1714. (__builtin_ia32_reduceph512_mask_round ((A), (B), \
  1715. _mm512_setzero_ph (), \
  1716. (__mmask32)-1, (C)))
  1717. #define _mm512_mask_reduce_round_ph(A, B, C, D, E) \
  1718. (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
  1719. #define _mm512_maskz_reduce_round_ph(A, B, C, D) \
  1720. (__builtin_ia32_reduceph512_mask_round ((B), (C), \
  1721. _mm512_setzero_ph (), \
  1722. (A), (D)))
  1723. #endif /* __OPTIMIZE__ */
  1724. /* Intrinsics vreducesh. */
  1725. #ifdef __OPTIMIZE__
  1726. extern __inline __m128h
  1727. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1728. _mm_reduce_sh (__m128h __A, __m128h __B, int __C)
  1729. {
  1730. return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
  1731. _mm_setzero_ph (),
  1732. (__mmask8) -1,
  1733. _MM_FROUND_CUR_DIRECTION);
  1734. }
  1735. extern __inline __m128h
  1736. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1737. _mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
  1738. __m128h __D, int __E)
  1739. {
  1740. return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B,
  1741. _MM_FROUND_CUR_DIRECTION);
  1742. }
  1743. extern __inline __m128h
  1744. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1745. _mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
  1746. {
  1747. return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
  1748. _mm_setzero_ph (), __A,
  1749. _MM_FROUND_CUR_DIRECTION);
  1750. }
  1751. extern __inline __m128h
  1752. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1753. _mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
  1754. {
  1755. return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
  1756. _mm_setzero_ph (),
  1757. (__mmask8) -1, __D);
  1758. }
  1759. extern __inline __m128h
  1760. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1761. _mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
  1762. __m128h __D, int __E, const int __F)
  1763. {
  1764. return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A,
  1765. __B, __F);
  1766. }
  1767. extern __inline __m128h
  1768. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1769. _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
  1770. int __D, const int __E)
  1771. {
  1772. return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
  1773. _mm_setzero_ph (),
  1774. __A, __E);
  1775. }
  1776. #else
  1777. #define _mm_reduce_sh(A, B, C) \
  1778. (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
  1779. _mm_setzero_ph (), \
  1780. (__mmask8)-1, \
  1781. _MM_FROUND_CUR_DIRECTION))
  1782. #define _mm_mask_reduce_sh(A, B, C, D, E) \
  1783. (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), \
  1784. _MM_FROUND_CUR_DIRECTION))
  1785. #define _mm_maskz_reduce_sh(A, B, C, D) \
  1786. (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
  1787. _mm_setzero_ph (), \
  1788. (A), _MM_FROUND_CUR_DIRECTION))
  1789. #define _mm_reduce_round_sh(A, B, C, D) \
  1790. (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
  1791. _mm_setzero_ph (), \
  1792. (__mmask8)-1, (D)))
  1793. #define _mm_mask_reduce_round_sh(A, B, C, D, E, F) \
  1794. (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
  1795. #define _mm_maskz_reduce_round_sh(A, B, C, D, E) \
  1796. (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
  1797. _mm_setzero_ph (), \
  1798. (A), (E)))
  1799. #endif /* __OPTIMIZE__ */
  1800. /* Intrinsics vrndscaleph. */
  1801. #ifdef __OPTIMIZE__
  1802. extern __inline __m512h
  1803. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1804. _mm512_roundscale_ph (__m512h __A, int __B)
  1805. {
  1806. return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
  1807. _mm512_setzero_ph (),
  1808. (__mmask32) -1,
  1809. _MM_FROUND_CUR_DIRECTION);
  1810. }
  1811. extern __inline __m512h
  1812. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1813. _mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
  1814. __m512h __C, int __D)
  1815. {
  1816. return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
  1817. _MM_FROUND_CUR_DIRECTION);
  1818. }
  1819. extern __inline __m512h
  1820. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1821. _mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
  1822. {
  1823. return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
  1824. _mm512_setzero_ph (),
  1825. __A,
  1826. _MM_FROUND_CUR_DIRECTION);
  1827. }
  1828. extern __inline __m512h
  1829. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1830. _mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
  1831. {
  1832. return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
  1833. _mm512_setzero_ph (),
  1834. (__mmask32) -1,
  1835. __C);
  1836. }
  1837. extern __inline __m512h
  1838. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1839. _mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
  1840. __m512h __C, int __D, const int __E)
  1841. {
  1842. return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
  1843. __B, __E);
  1844. }
  1845. extern __inline __m512h
  1846. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1847. _mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
  1848. const int __D)
  1849. {
  1850. return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
  1851. _mm512_setzero_ph (),
  1852. __A, __D);
  1853. }
  1854. #else
  1855. #define _mm512_roundscale_ph(A, B) \
  1856. (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
  1857. _mm512_setzero_ph (), \
  1858. (__mmask32)-1, \
  1859. _MM_FROUND_CUR_DIRECTION))
  1860. #define _mm512_mask_roundscale_ph(A, B, C, D) \
  1861. (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), \
  1862. _MM_FROUND_CUR_DIRECTION))
  1863. #define _mm512_maskz_roundscale_ph(A, B, C) \
  1864. (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
  1865. _mm512_setzero_ph (), \
  1866. (A), \
  1867. _MM_FROUND_CUR_DIRECTION))
  1868. #define _mm512_roundscale_round_ph(A, B, C) \
  1869. (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
  1870. _mm512_setzero_ph (), \
  1871. (__mmask32)-1, (C)))
  1872. #define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \
  1873. (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
  1874. #define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
  1875. (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
  1876. _mm512_setzero_ph (), \
  1877. (A), (D)))
  1878. #endif /* __OPTIMIZE__ */
  1879. /* Intrinsics vrndscalesh. */
  1880. #ifdef __OPTIMIZE__
  1881. extern __inline __m128h
  1882. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1883. _mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
  1884. {
  1885. return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
  1886. _mm_setzero_ph (),
  1887. (__mmask8) -1,
  1888. _MM_FROUND_CUR_DIRECTION);
  1889. }
  1890. extern __inline __m128h
  1891. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1892. _mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
  1893. __m128h __D, int __E)
  1894. {
  1895. return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B,
  1896. _MM_FROUND_CUR_DIRECTION);
  1897. }
  1898. extern __inline __m128h
  1899. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1900. _mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
  1901. {
  1902. return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
  1903. _mm_setzero_ph (), __A,
  1904. _MM_FROUND_CUR_DIRECTION);
  1905. }
  1906. extern __inline __m128h
  1907. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1908. _mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
  1909. {
  1910. return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
  1911. _mm_setzero_ph (),
  1912. (__mmask8) -1,
  1913. __D);
  1914. }
  1915. extern __inline __m128h
  1916. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1917. _mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
  1918. __m128h __D, int __E, const int __F)
  1919. {
  1920. return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E,
  1921. __A, __B, __F);
  1922. }
  1923. extern __inline __m128h
  1924. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1925. _mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
  1926. int __D, const int __E)
  1927. {
  1928. return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
  1929. _mm_setzero_ph (),
  1930. __A, __E);
  1931. }
  1932. #else
  1933. #define _mm_roundscale_sh(A, B, C) \
  1934. (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
  1935. _mm_setzero_ph (), \
  1936. (__mmask8)-1, \
  1937. _MM_FROUND_CUR_DIRECTION))
  1938. #define _mm_mask_roundscale_sh(A, B, C, D, E) \
  1939. (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), \
  1940. _MM_FROUND_CUR_DIRECTION))
  1941. #define _mm_maskz_roundscale_sh(A, B, C, D) \
  1942. (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
  1943. _mm_setzero_ph (), \
  1944. (A), _MM_FROUND_CUR_DIRECTION))
  1945. #define _mm_roundscale_round_sh(A, B, C, D) \
  1946. (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
  1947. _mm_setzero_ph (), \
  1948. (__mmask8)-1, (D)))
  1949. #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) \
  1950. (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
  1951. #define _mm_maskz_roundscale_round_sh(A, B, C, D, E) \
  1952. (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
  1953. _mm_setzero_ph (), \
  1954. (A), (E)))
  1955. #endif /* __OPTIMIZE__ */
  1956. /* Intrinsics vfpclasssh. */
  1957. #ifdef __OPTIMIZE__
  1958. extern __inline __mmask8
  1959. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1960. _mm_fpclass_sh_mask (__m128h __A, const int __imm)
  1961. {
  1962. return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm,
  1963. (__mmask8) -1);
  1964. }
  1965. extern __inline __mmask8
  1966. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1967. _mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm)
  1968. {
  1969. return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, __U);
  1970. }
  1971. #else
  1972. #define _mm_fpclass_sh_mask(X, C) \
  1973. ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
  1974. (int) (C), (__mmask8) (-1))) \
  1975. #define _mm_mask_fpclass_sh_mask(U, X, C) \
  1976. ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
  1977. (int) (C), (__mmask8) (U)))
  1978. #endif /* __OPTIMIZE__ */
  1979. /* Intrinsics vfpclassph. */
  1980. #ifdef __OPTIMIZE__
  1981. extern __inline __mmask32
  1982. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1983. _mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A,
  1984. const int __imm)
  1985. {
  1986. return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
  1987. __imm, __U);
  1988. }
  1989. extern __inline __mmask32
  1990. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1991. _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
  1992. {
  1993. return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
  1994. __imm,
  1995. (__mmask32) -1);
  1996. }
  1997. #else
  1998. #define _mm512_mask_fpclass_ph_mask(u, x, c) \
  1999. ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
  2000. (int) (c),(__mmask8)(u)))
  2001. #define _mm512_fpclass_ph_mask(x, c) \
  2002. ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
  2003. (int) (c),(__mmask8)-1))
  2004. #endif /* __OPIMTIZE__ */
  2005. /* Intrinsics vgetexpph, vgetexpsh. */
  2006. extern __inline __m128h
  2007. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2008. _mm_getexp_sh (__m128h __A, __m128h __B)
  2009. {
  2010. return (__m128h)
  2011. __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
  2012. (__v8hf) _mm_setzero_ph (),
  2013. (__mmask8) -1,
  2014. _MM_FROUND_CUR_DIRECTION);
  2015. }
  2016. extern __inline __m128h
  2017. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2018. _mm_mask_getexp_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
  2019. {
  2020. return (__m128h)
  2021. __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
  2022. (__v8hf) __W, (__mmask8) __U,
  2023. _MM_FROUND_CUR_DIRECTION);
  2024. }
  2025. extern __inline __m128h
  2026. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2027. _mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B)
  2028. {
  2029. return (__m128h)
  2030. __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
  2031. (__v8hf) _mm_setzero_ph (),
  2032. (__mmask8) __U,
  2033. _MM_FROUND_CUR_DIRECTION);
  2034. }
  2035. extern __inline __m512h
  2036. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2037. _mm512_getexp_ph (__m512h __A)
  2038. {
  2039. return (__m512h)
  2040. __builtin_ia32_getexpph512_mask ((__v32hf) __A,
  2041. (__v32hf) _mm512_setzero_ph (),
  2042. (__mmask32) -1, _MM_FROUND_CUR_DIRECTION);
  2043. }
  2044. extern __inline __m512h
  2045. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2046. _mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A)
  2047. {
  2048. return (__m512h)
  2049. __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W,
  2050. (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
  2051. }
  2052. extern __inline __m512h
  2053. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2054. _mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A)
  2055. {
  2056. return (__m512h)
  2057. __builtin_ia32_getexpph512_mask ((__v32hf) __A,
  2058. (__v32hf) _mm512_setzero_ph (),
  2059. (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
  2060. }
  2061. #ifdef __OPTIMIZE__
  2062. extern __inline __m128h
  2063. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2064. _mm_getexp_round_sh (__m128h __A, __m128h __B, const int __R)
  2065. {
  2066. return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
  2067. (__v8hf) __B,
  2068. _mm_setzero_ph (),
  2069. (__mmask8) -1,
  2070. __R);
  2071. }
  2072. extern __inline __m128h
  2073. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2074. _mm_mask_getexp_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
  2075. __m128h __B, const int __R)
  2076. {
  2077. return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
  2078. (__v8hf) __B,
  2079. (__v8hf) __W,
  2080. (__mmask8) __U, __R);
  2081. }
  2082. extern __inline __m128h
  2083. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2084. _mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
  2085. const int __R)
  2086. {
  2087. return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
  2088. (__v8hf) __B,
  2089. (__v8hf)
  2090. _mm_setzero_ph (),
  2091. (__mmask8) __U, __R);
  2092. }
  2093. extern __inline __m512h
  2094. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2095. _mm512_getexp_round_ph (__m512h __A, const int __R)
  2096. {
  2097. return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
  2098. (__v32hf)
  2099. _mm512_setzero_ph (),
  2100. (__mmask32) -1, __R);
  2101. }
  2102. extern __inline __m512h
  2103. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2104. _mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
  2105. const int __R)
  2106. {
  2107. return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
  2108. (__v32hf) __W,
  2109. (__mmask32) __U, __R);
  2110. }
  2111. extern __inline __m512h
  2112. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2113. _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
  2114. {
  2115. return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
  2116. (__v32hf)
  2117. _mm512_setzero_ph (),
  2118. (__mmask32) __U, __R);
  2119. }
  2120. #else
  2121. #define _mm_getexp_round_sh(A, B, R) \
  2122. ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A), \
  2123. (__v8hf)(__m128h)(B), \
  2124. (__v8hf)_mm_setzero_ph(), \
  2125. (__mmask8)-1, R))
  2126. #define _mm_mask_getexp_round_sh(W, U, A, B, C) \
  2127. (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
  2128. #define _mm_maskz_getexp_round_sh(U, A, B, C) \
  2129. (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, \
  2130. (__v8hf)_mm_setzero_ph(), \
  2131. U, C)
  2132. #define _mm512_getexp_round_ph(A, R) \
  2133. ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
  2134. (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
  2135. #define _mm512_mask_getexp_round_ph(W, U, A, R) \
  2136. ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
  2137. (__v32hf)(__m512h)(W), (__mmask32)(U), R))
  2138. #define _mm512_maskz_getexp_round_ph(U, A, R) \
  2139. ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
  2140. (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
  2141. #endif /* __OPTIMIZE__ */
  2142. /* Intrinsics vgetmantph, vgetmantsh. */
  2143. #ifdef __OPTIMIZE__
  2144. extern __inline __m128h
  2145. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2146. _mm_getmant_sh (__m128h __A, __m128h __B,
  2147. _MM_MANTISSA_NORM_ENUM __C,
  2148. _MM_MANTISSA_SIGN_ENUM __D)
  2149. {
  2150. return (__m128h)
  2151. __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
  2152. (__D << 2) | __C, _mm_setzero_ph (),
  2153. (__mmask8) -1,
  2154. _MM_FROUND_CUR_DIRECTION);
  2155. }
  2156. extern __inline __m128h
  2157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2158. _mm_mask_getmant_sh (__m128h __W, __mmask8 __U, __m128h __A,
  2159. __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
  2160. _MM_MANTISSA_SIGN_ENUM __D)
  2161. {
  2162. return (__m128h)
  2163. __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
  2164. (__D << 2) | __C, (__v8hf) __W,
  2165. __U, _MM_FROUND_CUR_DIRECTION);
  2166. }
  2167. extern __inline __m128h
  2168. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2169. _mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B,
  2170. _MM_MANTISSA_NORM_ENUM __C,
  2171. _MM_MANTISSA_SIGN_ENUM __D)
  2172. {
  2173. return (__m128h)
  2174. __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
  2175. (__D << 2) | __C,
  2176. (__v8hf) _mm_setzero_ph(),
  2177. __U, _MM_FROUND_CUR_DIRECTION);
  2178. }
  2179. extern __inline __m512h
  2180. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2181. _mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
  2182. _MM_MANTISSA_SIGN_ENUM __C)
  2183. {
  2184. return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
  2185. (__C << 2) | __B,
  2186. _mm512_setzero_ph (),
  2187. (__mmask32) -1,
  2188. _MM_FROUND_CUR_DIRECTION);
  2189. }
  2190. extern __inline __m512h
  2191. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2192. _mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A,
  2193. _MM_MANTISSA_NORM_ENUM __B,
  2194. _MM_MANTISSA_SIGN_ENUM __C)
  2195. {
  2196. return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
  2197. (__C << 2) | __B,
  2198. (__v32hf) __W, __U,
  2199. _MM_FROUND_CUR_DIRECTION);
  2200. }
  2201. extern __inline __m512h
  2202. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2203. _mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A,
  2204. _MM_MANTISSA_NORM_ENUM __B,
  2205. _MM_MANTISSA_SIGN_ENUM __C)
  2206. {
  2207. return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
  2208. (__C << 2) | __B,
  2209. (__v32hf)
  2210. _mm512_setzero_ph (),
  2211. __U,
  2212. _MM_FROUND_CUR_DIRECTION);
  2213. }
  2214. extern __inline __m128h
  2215. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2216. _mm_getmant_round_sh (__m128h __A, __m128h __B,
  2217. _MM_MANTISSA_NORM_ENUM __C,
  2218. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  2219. {
  2220. return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
  2221. (__v8hf) __B,
  2222. (__D << 2) | __C,
  2223. _mm_setzero_ph (),
  2224. (__mmask8) -1,
  2225. __R);
  2226. }
  2227. extern __inline __m128h
  2228. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2229. _mm_mask_getmant_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
  2230. __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
  2231. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  2232. {
  2233. return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
  2234. (__v8hf) __B,
  2235. (__D << 2) | __C,
  2236. (__v8hf) __W,
  2237. __U, __R);
  2238. }
  2239. extern __inline __m128h
  2240. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2241. _mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
  2242. _MM_MANTISSA_NORM_ENUM __C,
  2243. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  2244. {
  2245. return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
  2246. (__v8hf) __B,
  2247. (__D << 2) | __C,
  2248. (__v8hf)
  2249. _mm_setzero_ph(),
  2250. __U, __R);
  2251. }
  2252. extern __inline __m512h
  2253. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2254. _mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
  2255. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  2256. {
  2257. return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
  2258. (__C << 2) | __B,
  2259. _mm512_setzero_ph (),
  2260. (__mmask32) -1, __R);
  2261. }
  2262. extern __inline __m512h
  2263. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2264. _mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
  2265. _MM_MANTISSA_NORM_ENUM __B,
  2266. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  2267. {
  2268. return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
  2269. (__C << 2) | __B,
  2270. (__v32hf) __W, __U,
  2271. __R);
  2272. }
  2273. extern __inline __m512h
  2274. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2275. _mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
  2276. _MM_MANTISSA_NORM_ENUM __B,
  2277. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  2278. {
  2279. return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
  2280. (__C << 2) | __B,
  2281. (__v32hf)
  2282. _mm512_setzero_ph (),
  2283. __U, __R);
  2284. }
  2285. #else
  2286. #define _mm512_getmant_ph(X, B, C) \
  2287. ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
  2288. (int)(((C)<<2) | (B)), \
  2289. (__v32hf)(__m512h) \
  2290. _mm512_setzero_ph(), \
  2291. (__mmask32)-1, \
  2292. _MM_FROUND_CUR_DIRECTION))
  2293. #define _mm512_mask_getmant_ph(W, U, X, B, C) \
  2294. ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
  2295. (int)(((C)<<2) | (B)), \
  2296. (__v32hf)(__m512h)(W), \
  2297. (__mmask32)(U), \
  2298. _MM_FROUND_CUR_DIRECTION))
  2299. #define _mm512_maskz_getmant_ph(U, X, B, C) \
  2300. ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
  2301. (int)(((C)<<2) | (B)), \
  2302. (__v32hf)(__m512h) \
  2303. _mm512_setzero_ph(), \
  2304. (__mmask32)(U), \
  2305. _MM_FROUND_CUR_DIRECTION))
  2306. #define _mm_getmant_sh(X, Y, C, D) \
  2307. ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
  2308. (__v8hf)(__m128h)(Y), \
  2309. (int)(((D)<<2) | (C)), \
  2310. (__v8hf)(__m128h) \
  2311. _mm_setzero_ph (), \
  2312. (__mmask8)-1, \
  2313. _MM_FROUND_CUR_DIRECTION))
  2314. #define _mm_mask_getmant_sh(W, U, X, Y, C, D) \
  2315. ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
  2316. (__v8hf)(__m128h)(Y), \
  2317. (int)(((D)<<2) | (C)), \
  2318. (__v8hf)(__m128h)(W), \
  2319. (__mmask8)(U), \
  2320. _MM_FROUND_CUR_DIRECTION))
  2321. #define _mm_maskz_getmant_sh(U, X, Y, C, D) \
  2322. ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
  2323. (__v8hf)(__m128h)(Y), \
  2324. (int)(((D)<<2) | (C)), \
  2325. (__v8hf)(__m128h) \
  2326. _mm_setzero_ph(), \
  2327. (__mmask8)(U), \
  2328. _MM_FROUND_CUR_DIRECTION))
  2329. #define _mm512_getmant_round_ph(X, B, C, R) \
  2330. ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
  2331. (int)(((C)<<2) | (B)), \
  2332. (__v32hf)(__m512h) \
  2333. _mm512_setzero_ph(), \
  2334. (__mmask32)-1, \
  2335. (R)))
  2336. #define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) \
  2337. ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
  2338. (int)(((C)<<2) | (B)), \
  2339. (__v32hf)(__m512h)(W), \
  2340. (__mmask32)(U), \
  2341. (R)))
  2342. #define _mm512_maskz_getmant_round_ph(U, X, B, C, R) \
  2343. ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
  2344. (int)(((C)<<2) | (B)), \
  2345. (__v32hf)(__m512h) \
  2346. _mm512_setzero_ph(), \
  2347. (__mmask32)(U), \
  2348. (R)))
  2349. #define _mm_getmant_round_sh(X, Y, C, D, R) \
  2350. ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
  2351. (__v8hf)(__m128h)(Y), \
  2352. (int)(((D)<<2) | (C)), \
  2353. (__v8hf)(__m128h) \
  2354. _mm_setzero_ph (), \
  2355. (__mmask8)-1, \
  2356. (R)))
  2357. #define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R) \
  2358. ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
  2359. (__v8hf)(__m128h)(Y), \
  2360. (int)(((D)<<2) | (C)), \
  2361. (__v8hf)(__m128h)(W), \
  2362. (__mmask8)(U), \
  2363. (R)))
  2364. #define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R) \
  2365. ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
  2366. (__v8hf)(__m128h)(Y), \
  2367. (int)(((D)<<2) | (C)), \
  2368. (__v8hf)(__m128h) \
  2369. _mm_setzero_ph(), \
  2370. (__mmask8)(U), \
  2371. (R)))
  2372. #endif /* __OPTIMIZE__ */
  2373. /* Intrinsics vmovw. */
  2374. extern __inline __m128i
  2375. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2376. _mm_cvtsi16_si128 (short __A)
  2377. {
  2378. return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A);
  2379. }
  2380. extern __inline short
  2381. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2382. _mm_cvtsi128_si16 (__m128i __A)
  2383. {
  2384. return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0);
  2385. }
  2386. /* Intrinsics vmovsh. */
  2387. extern __inline __m128h
  2388. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2389. _mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C)
  2390. {
  2391. return __builtin_ia32_loadsh_mask (__C, __A, __B);
  2392. }
  2393. extern __inline __m128h
  2394. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2395. _mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B)
  2396. {
  2397. return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A);
  2398. }
  2399. extern __inline void
  2400. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2401. _mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C)
  2402. {
  2403. __builtin_ia32_storesh_mask (__A, __C, __B);
  2404. }
  2405. extern __inline __m128h
  2406. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2407. _mm_move_sh (__m128h __A, __m128h __B)
  2408. {
  2409. __A[0] = __B[0];
  2410. return __A;
  2411. }
  2412. extern __inline __m128h
  2413. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2414. _mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  2415. {
  2416. return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B);
  2417. }
  2418. extern __inline __m128h
  2419. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2420. _mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C)
  2421. {
  2422. return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
  2423. }
  2424. /* Intrinsics vcvtph2dq. */
  2425. extern __inline __m512i
  2426. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2427. _mm512_cvtph_epi32 (__m256h __A)
  2428. {
  2429. return (__m512i)
  2430. __builtin_ia32_vcvtph2dq512_mask_round (__A,
  2431. (__v16si)
  2432. _mm512_setzero_si512 (),
  2433. (__mmask16) -1,
  2434. _MM_FROUND_CUR_DIRECTION);
  2435. }
  2436. extern __inline __m512i
  2437. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2438. _mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
  2439. {
  2440. return (__m512i)
  2441. __builtin_ia32_vcvtph2dq512_mask_round (__C,
  2442. (__v16si) __A,
  2443. __B,
  2444. _MM_FROUND_CUR_DIRECTION);
  2445. }
  2446. extern __inline __m512i
  2447. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2448. _mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B)
  2449. {
  2450. return (__m512i)
  2451. __builtin_ia32_vcvtph2dq512_mask_round (__B,
  2452. (__v16si)
  2453. _mm512_setzero_si512 (),
  2454. __A,
  2455. _MM_FROUND_CUR_DIRECTION);
  2456. }
  2457. #ifdef __OPTIMIZE__
  2458. extern __inline __m512i
  2459. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2460. _mm512_cvt_roundph_epi32 (__m256h __A, int __B)
  2461. {
  2462. return (__m512i)
  2463. __builtin_ia32_vcvtph2dq512_mask_round (__A,
  2464. (__v16si)
  2465. _mm512_setzero_si512 (),
  2466. (__mmask16) -1,
  2467. __B);
  2468. }
  2469. extern __inline __m512i
  2470. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2471. _mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
  2472. {
  2473. return (__m512i)
  2474. __builtin_ia32_vcvtph2dq512_mask_round (__C,
  2475. (__v16si) __A,
  2476. __B,
  2477. __D);
  2478. }
  2479. extern __inline __m512i
  2480. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2481. _mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
  2482. {
  2483. return (__m512i)
  2484. __builtin_ia32_vcvtph2dq512_mask_round (__B,
  2485. (__v16si)
  2486. _mm512_setzero_si512 (),
  2487. __A,
  2488. __C);
  2489. }
  2490. #else
  2491. #define _mm512_cvt_roundph_epi32(A, B) \
  2492. ((__m512i) \
  2493. __builtin_ia32_vcvtph2dq512_mask_round ((A), \
  2494. (__v16si) \
  2495. _mm512_setzero_si512 (), \
  2496. (__mmask16)-1, \
  2497. (B)))
  2498. #define _mm512_mask_cvt_roundph_epi32(A, B, C, D) \
  2499. ((__m512i) \
  2500. __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D)))
  2501. #define _mm512_maskz_cvt_roundph_epi32(A, B, C) \
  2502. ((__m512i) \
  2503. __builtin_ia32_vcvtph2dq512_mask_round ((B), \
  2504. (__v16si) \
  2505. _mm512_setzero_si512 (), \
  2506. (A), \
  2507. (C)))
  2508. #endif /* __OPTIMIZE__ */
  2509. /* Intrinsics vcvtph2udq. */
  2510. extern __inline __m512i
  2511. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2512. _mm512_cvtph_epu32 (__m256h __A)
  2513. {
  2514. return (__m512i)
  2515. __builtin_ia32_vcvtph2udq512_mask_round (__A,
  2516. (__v16si)
  2517. _mm512_setzero_si512 (),
  2518. (__mmask16) -1,
  2519. _MM_FROUND_CUR_DIRECTION);
  2520. }
  2521. extern __inline __m512i
  2522. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2523. _mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
  2524. {
  2525. return (__m512i)
  2526. __builtin_ia32_vcvtph2udq512_mask_round (__C,
  2527. (__v16si) __A,
  2528. __B,
  2529. _MM_FROUND_CUR_DIRECTION);
  2530. }
  2531. extern __inline __m512i
  2532. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2533. _mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B)
  2534. {
  2535. return (__m512i)
  2536. __builtin_ia32_vcvtph2udq512_mask_round (__B,
  2537. (__v16si)
  2538. _mm512_setzero_si512 (),
  2539. __A,
  2540. _MM_FROUND_CUR_DIRECTION);
  2541. }
  2542. #ifdef __OPTIMIZE__
  2543. extern __inline __m512i
  2544. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2545. _mm512_cvt_roundph_epu32 (__m256h __A, int __B)
  2546. {
  2547. return (__m512i)
  2548. __builtin_ia32_vcvtph2udq512_mask_round (__A,
  2549. (__v16si)
  2550. _mm512_setzero_si512 (),
  2551. (__mmask16) -1,
  2552. __B);
  2553. }
  2554. extern __inline __m512i
  2555. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2556. _mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
  2557. {
  2558. return (__m512i)
  2559. __builtin_ia32_vcvtph2udq512_mask_round (__C,
  2560. (__v16si) __A,
  2561. __B,
  2562. __D);
  2563. }
  2564. extern __inline __m512i
  2565. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2566. _mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
  2567. {
  2568. return (__m512i)
  2569. __builtin_ia32_vcvtph2udq512_mask_round (__B,
  2570. (__v16si)
  2571. _mm512_setzero_si512 (),
  2572. __A,
  2573. __C);
  2574. }
  2575. #else
  2576. #define _mm512_cvt_roundph_epu32(A, B) \
  2577. ((__m512i) \
  2578. __builtin_ia32_vcvtph2udq512_mask_round ((A), \
  2579. (__v16si) \
  2580. _mm512_setzero_si512 (), \
  2581. (__mmask16)-1, \
  2582. (B)))
  2583. #define _mm512_mask_cvt_roundph_epu32(A, B, C, D) \
  2584. ((__m512i) \
  2585. __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D)))
  2586. #define _mm512_maskz_cvt_roundph_epu32(A, B, C) \
  2587. ((__m512i) \
  2588. __builtin_ia32_vcvtph2udq512_mask_round ((B), \
  2589. (__v16si) \
  2590. _mm512_setzero_si512 (), \
  2591. (A), \
  2592. (C)))
  2593. #endif /* __OPTIMIZE__ */
  2594. /* Intrinsics vcvttph2dq. */
  2595. extern __inline __m512i
  2596. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2597. _mm512_cvttph_epi32 (__m256h __A)
  2598. {
  2599. return (__m512i)
  2600. __builtin_ia32_vcvttph2dq512_mask_round (__A,
  2601. (__v16si)
  2602. _mm512_setzero_si512 (),
  2603. (__mmask16) -1,
  2604. _MM_FROUND_CUR_DIRECTION);
  2605. }
  2606. extern __inline __m512i
  2607. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2608. _mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
  2609. {
  2610. return (__m512i)
  2611. __builtin_ia32_vcvttph2dq512_mask_round (__C,
  2612. (__v16si) __A,
  2613. __B,
  2614. _MM_FROUND_CUR_DIRECTION);
  2615. }
  2616. extern __inline __m512i
  2617. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2618. _mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B)
  2619. {
  2620. return (__m512i)
  2621. __builtin_ia32_vcvttph2dq512_mask_round (__B,
  2622. (__v16si)
  2623. _mm512_setzero_si512 (),
  2624. __A,
  2625. _MM_FROUND_CUR_DIRECTION);
  2626. }
  2627. #ifdef __OPTIMIZE__
  2628. extern __inline __m512i
  2629. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2630. _mm512_cvtt_roundph_epi32 (__m256h __A, int __B)
  2631. {
  2632. return (__m512i)
  2633. __builtin_ia32_vcvttph2dq512_mask_round (__A,
  2634. (__v16si)
  2635. _mm512_setzero_si512 (),
  2636. (__mmask16) -1,
  2637. __B);
  2638. }
  2639. extern __inline __m512i
  2640. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2641. _mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B,
  2642. __m256h __C, int __D)
  2643. {
  2644. return (__m512i)
  2645. __builtin_ia32_vcvttph2dq512_mask_round (__C,
  2646. (__v16si) __A,
  2647. __B,
  2648. __D);
  2649. }
  2650. extern __inline __m512i
  2651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2652. _mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
  2653. {
  2654. return (__m512i)
  2655. __builtin_ia32_vcvttph2dq512_mask_round (__B,
  2656. (__v16si)
  2657. _mm512_setzero_si512 (),
  2658. __A,
  2659. __C);
  2660. }
  2661. #else
  2662. #define _mm512_cvtt_roundph_epi32(A, B) \
  2663. ((__m512i) \
  2664. __builtin_ia32_vcvttph2dq512_mask_round ((A), \
  2665. (__v16si) \
  2666. (_mm512_setzero_si512 ()), \
  2667. (__mmask16)(-1), (B)))
  2668. #define _mm512_mask_cvtt_roundph_epi32(A, B, C, D) \
  2669. ((__m512i) \
  2670. __builtin_ia32_vcvttph2dq512_mask_round ((C), \
  2671. (__v16si)(A), \
  2672. (B), \
  2673. (D)))
  2674. #define _mm512_maskz_cvtt_roundph_epi32(A, B, C) \
  2675. ((__m512i) \
  2676. __builtin_ia32_vcvttph2dq512_mask_round ((B), \
  2677. (__v16si) \
  2678. _mm512_setzero_si512 (), \
  2679. (A), \
  2680. (C)))
  2681. #endif /* __OPTIMIZE__ */
  2682. /* Intrinsics vcvttph2udq. */
  2683. extern __inline __m512i
  2684. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2685. _mm512_cvttph_epu32 (__m256h __A)
  2686. {
  2687. return (__m512i)
  2688. __builtin_ia32_vcvttph2udq512_mask_round (__A,
  2689. (__v16si)
  2690. _mm512_setzero_si512 (),
  2691. (__mmask16) -1,
  2692. _MM_FROUND_CUR_DIRECTION);
  2693. }
  2694. extern __inline __m512i
  2695. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2696. _mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
  2697. {
  2698. return (__m512i)
  2699. __builtin_ia32_vcvttph2udq512_mask_round (__C,
  2700. (__v16si) __A,
  2701. __B,
  2702. _MM_FROUND_CUR_DIRECTION);
  2703. }
  2704. extern __inline __m512i
  2705. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2706. _mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B)
  2707. {
  2708. return (__m512i)
  2709. __builtin_ia32_vcvttph2udq512_mask_round (__B,
  2710. (__v16si)
  2711. _mm512_setzero_si512 (),
  2712. __A,
  2713. _MM_FROUND_CUR_DIRECTION);
  2714. }
  2715. #ifdef __OPTIMIZE__
  2716. extern __inline __m512i
  2717. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2718. _mm512_cvtt_roundph_epu32 (__m256h __A, int __B)
  2719. {
  2720. return (__m512i)
  2721. __builtin_ia32_vcvttph2udq512_mask_round (__A,
  2722. (__v16si)
  2723. _mm512_setzero_si512 (),
  2724. (__mmask16) -1,
  2725. __B);
  2726. }
  2727. extern __inline __m512i
  2728. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2729. _mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B,
  2730. __m256h __C, int __D)
  2731. {
  2732. return (__m512i)
  2733. __builtin_ia32_vcvttph2udq512_mask_round (__C,
  2734. (__v16si) __A,
  2735. __B,
  2736. __D);
  2737. }
  2738. extern __inline __m512i
  2739. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2740. _mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
  2741. {
  2742. return (__m512i)
  2743. __builtin_ia32_vcvttph2udq512_mask_round (__B,
  2744. (__v16si)
  2745. _mm512_setzero_si512 (),
  2746. __A,
  2747. __C);
  2748. }
  2749. #else
  2750. #define _mm512_cvtt_roundph_epu32(A, B) \
  2751. ((__m512i) \
  2752. __builtin_ia32_vcvttph2udq512_mask_round ((A), \
  2753. (__v16si) \
  2754. _mm512_setzero_si512 (), \
  2755. (__mmask16)-1, \
  2756. (B)))
  2757. #define _mm512_mask_cvtt_roundph_epu32(A, B, C, D) \
  2758. ((__m512i) \
  2759. __builtin_ia32_vcvttph2udq512_mask_round ((C), \
  2760. (__v16si)(A), \
  2761. (B), \
  2762. (D)))
  2763. #define _mm512_maskz_cvtt_roundph_epu32(A, B, C) \
  2764. ((__m512i) \
  2765. __builtin_ia32_vcvttph2udq512_mask_round ((B), \
  2766. (__v16si) \
  2767. _mm512_setzero_si512 (), \
  2768. (A), \
  2769. (C)))
  2770. #endif /* __OPTIMIZE__ */
  2771. /* Intrinsics vcvtdq2ph. */
  2772. extern __inline __m256h
  2773. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2774. _mm512_cvtepi32_ph (__m512i __A)
  2775. {
  2776. return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
  2777. _mm256_setzero_ph (),
  2778. (__mmask16) -1,
  2779. _MM_FROUND_CUR_DIRECTION);
  2780. }
  2781. extern __inline __m256h
  2782. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2783. _mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C)
  2784. {
  2785. return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
  2786. __A,
  2787. __B,
  2788. _MM_FROUND_CUR_DIRECTION);
  2789. }
  2790. extern __inline __m256h
  2791. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2792. _mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B)
  2793. {
  2794. return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
  2795. _mm256_setzero_ph (),
  2796. __A,
  2797. _MM_FROUND_CUR_DIRECTION);
  2798. }
  2799. #ifdef __OPTIMIZE__
  2800. extern __inline __m256h
  2801. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2802. _mm512_cvt_roundepi32_ph (__m512i __A, int __B)
  2803. {
  2804. return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
  2805. _mm256_setzero_ph (),
  2806. (__mmask16) -1,
  2807. __B);
  2808. }
  2809. extern __inline __m256h
  2810. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2811. _mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
  2812. {
  2813. return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
  2814. __A,
  2815. __B,
  2816. __D);
  2817. }
  2818. extern __inline __m256h
  2819. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2820. _mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C)
  2821. {
  2822. return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
  2823. _mm256_setzero_ph (),
  2824. __A,
  2825. __C);
  2826. }
  2827. #else
  2828. #define _mm512_cvt_roundepi32_ph(A, B) \
  2829. (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A), \
  2830. _mm256_setzero_ph (), \
  2831. (__mmask16)-1, \
  2832. (B)))
  2833. #define _mm512_mask_cvt_roundepi32_ph(A, B, C, D) \
  2834. (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C), \
  2835. (A), \
  2836. (B), \
  2837. (D)))
  2838. #define _mm512_maskz_cvt_roundepi32_ph(A, B, C) \
  2839. (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B), \
  2840. _mm256_setzero_ph (), \
  2841. (A), \
  2842. (C)))
  2843. #endif /* __OPTIMIZE__ */
  2844. /* Intrinsics vcvtudq2ph. */
  2845. extern __inline __m256h
  2846. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2847. _mm512_cvtepu32_ph (__m512i __A)
  2848. {
  2849. return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
  2850. _mm256_setzero_ph (),
  2851. (__mmask16) -1,
  2852. _MM_FROUND_CUR_DIRECTION);
  2853. }
  2854. extern __inline __m256h
  2855. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2856. _mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C)
  2857. {
  2858. return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
  2859. __A,
  2860. __B,
  2861. _MM_FROUND_CUR_DIRECTION);
  2862. }
  2863. extern __inline __m256h
  2864. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2865. _mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B)
  2866. {
  2867. return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
  2868. _mm256_setzero_ph (),
  2869. __A,
  2870. _MM_FROUND_CUR_DIRECTION);
  2871. }
  2872. #ifdef __OPTIMIZE__
  2873. extern __inline __m256h
  2874. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2875. _mm512_cvt_roundepu32_ph (__m512i __A, int __B)
  2876. {
  2877. return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
  2878. _mm256_setzero_ph (),
  2879. (__mmask16) -1,
  2880. __B);
  2881. }
  2882. extern __inline __m256h
  2883. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2884. _mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
  2885. {
  2886. return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
  2887. __A,
  2888. __B,
  2889. __D);
  2890. }
  2891. extern __inline __m256h
  2892. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2893. _mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C)
  2894. {
  2895. return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
  2896. _mm256_setzero_ph (),
  2897. __A,
  2898. __C);
  2899. }
  2900. #else
  2901. #define _mm512_cvt_roundepu32_ph(A, B) \
  2902. (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A), \
  2903. _mm256_setzero_ph (), \
  2904. (__mmask16)-1, \
  2905. B))
  2906. #define _mm512_mask_cvt_roundepu32_ph(A, B, C, D) \
  2907. (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C, \
  2908. A, \
  2909. B, \
  2910. D))
  2911. #define _mm512_maskz_cvt_roundepu32_ph(A, B, C) \
  2912. (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B, \
  2913. _mm256_setzero_ph (), \
  2914. A, \
  2915. C))
  2916. #endif /* __OPTIMIZE__ */
  2917. /* Intrinsics vcvtph2qq. */
  2918. extern __inline __m512i
  2919. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2920. _mm512_cvtph_epi64 (__m128h __A)
  2921. {
  2922. return __builtin_ia32_vcvtph2qq512_mask_round (__A,
  2923. _mm512_setzero_si512 (),
  2924. (__mmask8) -1,
  2925. _MM_FROUND_CUR_DIRECTION);
  2926. }
  2927. extern __inline __m512i
  2928. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2929. _mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
  2930. {
  2931. return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B,
  2932. _MM_FROUND_CUR_DIRECTION);
  2933. }
  2934. extern __inline __m512i
  2935. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2936. _mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
  2937. {
  2938. return __builtin_ia32_vcvtph2qq512_mask_round (__B,
  2939. _mm512_setzero_si512 (),
  2940. __A,
  2941. _MM_FROUND_CUR_DIRECTION);
  2942. }
  2943. #ifdef __OPTIMIZE__
  2944. extern __inline __m512i
  2945. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2946. _mm512_cvt_roundph_epi64 (__m128h __A, int __B)
  2947. {
  2948. return __builtin_ia32_vcvtph2qq512_mask_round (__A,
  2949. _mm512_setzero_si512 (),
  2950. (__mmask8) -1,
  2951. __B);
  2952. }
  2953. extern __inline __m512i
  2954. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2955. _mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
  2956. {
  2957. return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D);
  2958. }
  2959. extern __inline __m512i
  2960. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2961. _mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
  2962. {
  2963. return __builtin_ia32_vcvtph2qq512_mask_round (__B,
  2964. _mm512_setzero_si512 (),
  2965. __A,
  2966. __C);
  2967. }
  2968. #else
  2969. #define _mm512_cvt_roundph_epi64(A, B) \
  2970. (__builtin_ia32_vcvtph2qq512_mask_round ((A), \
  2971. _mm512_setzero_si512 (), \
  2972. (__mmask8)-1, \
  2973. (B)))
  2974. #define _mm512_mask_cvt_roundph_epi64(A, B, C, D) \
  2975. (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D)))
  2976. #define _mm512_maskz_cvt_roundph_epi64(A, B, C) \
  2977. (__builtin_ia32_vcvtph2qq512_mask_round ((B), \
  2978. _mm512_setzero_si512 (), \
  2979. (A), \
  2980. (C)))
  2981. #endif /* __OPTIMIZE__ */
  2982. /* Intrinsics vcvtph2uqq. */
  2983. extern __inline __m512i
  2984. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2985. _mm512_cvtph_epu64 (__m128h __A)
  2986. {
  2987. return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
  2988. _mm512_setzero_si512 (),
  2989. (__mmask8) -1,
  2990. _MM_FROUND_CUR_DIRECTION);
  2991. }
  2992. extern __inline __m512i
  2993. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2994. _mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
  2995. {
  2996. return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B,
  2997. _MM_FROUND_CUR_DIRECTION);
  2998. }
  2999. extern __inline __m512i
  3000. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3001. _mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
  3002. {
  3003. return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
  3004. _mm512_setzero_si512 (),
  3005. __A,
  3006. _MM_FROUND_CUR_DIRECTION);
  3007. }
  3008. #ifdef __OPTIMIZE__
  3009. extern __inline __m512i
  3010. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3011. _mm512_cvt_roundph_epu64 (__m128h __A, int __B)
  3012. {
  3013. return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
  3014. _mm512_setzero_si512 (),
  3015. (__mmask8) -1,
  3016. __B);
  3017. }
  3018. extern __inline __m512i
  3019. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3020. _mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
  3021. {
  3022. return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D);
  3023. }
  3024. extern __inline __m512i
  3025. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3026. _mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
  3027. {
  3028. return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
  3029. _mm512_setzero_si512 (),
  3030. __A,
  3031. __C);
  3032. }
  3033. #else
  3034. #define _mm512_cvt_roundph_epu64(A, B) \
  3035. (__builtin_ia32_vcvtph2uqq512_mask_round ((A), \
  3036. _mm512_setzero_si512 (), \
  3037. (__mmask8)-1, \
  3038. (B)))
  3039. #define _mm512_mask_cvt_roundph_epu64(A, B, C, D) \
  3040. (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D)))
  3041. #define _mm512_maskz_cvt_roundph_epu64(A, B, C) \
  3042. (__builtin_ia32_vcvtph2uqq512_mask_round ((B), \
  3043. _mm512_setzero_si512 (), \
  3044. (A), \
  3045. (C)))
  3046. #endif /* __OPTIMIZE__ */
  3047. /* Intrinsics vcvttph2qq. */
  3048. extern __inline __m512i
  3049. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3050. _mm512_cvttph_epi64 (__m128h __A)
  3051. {
  3052. return __builtin_ia32_vcvttph2qq512_mask_round (__A,
  3053. _mm512_setzero_si512 (),
  3054. (__mmask8) -1,
  3055. _MM_FROUND_CUR_DIRECTION);
  3056. }
  3057. extern __inline __m512i
  3058. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3059. _mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
  3060. {
  3061. return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B,
  3062. _MM_FROUND_CUR_DIRECTION);
  3063. }
  3064. extern __inline __m512i
  3065. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3066. _mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
  3067. {
  3068. return __builtin_ia32_vcvttph2qq512_mask_round (__B,
  3069. _mm512_setzero_si512 (),
  3070. __A,
  3071. _MM_FROUND_CUR_DIRECTION);
  3072. }
  3073. #ifdef __OPTIMIZE__
  3074. extern __inline __m512i
  3075. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3076. _mm512_cvtt_roundph_epi64 (__m128h __A, int __B)
  3077. {
  3078. return __builtin_ia32_vcvttph2qq512_mask_round (__A,
  3079. _mm512_setzero_si512 (),
  3080. (__mmask8) -1,
  3081. __B);
  3082. }
  3083. extern __inline __m512i
  3084. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3085. _mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
  3086. {
  3087. return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D);
  3088. }
  3089. extern __inline __m512i
  3090. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3091. _mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
  3092. {
  3093. return __builtin_ia32_vcvttph2qq512_mask_round (__B,
  3094. _mm512_setzero_si512 (),
  3095. __A,
  3096. __C);
  3097. }
  3098. #else
  3099. #define _mm512_cvtt_roundph_epi64(A, B) \
  3100. (__builtin_ia32_vcvttph2qq512_mask_round ((A), \
  3101. _mm512_setzero_si512 (), \
  3102. (__mmask8)-1, \
  3103. (B)))
  3104. #define _mm512_mask_cvtt_roundph_epi64(A, B, C, D) \
  3105. __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D))
  3106. #define _mm512_maskz_cvtt_roundph_epi64(A, B, C) \
  3107. (__builtin_ia32_vcvttph2qq512_mask_round ((B), \
  3108. _mm512_setzero_si512 (), \
  3109. (A), \
  3110. (C)))
  3111. #endif /* __OPTIMIZE__ */
  3112. /* Intrinsics vcvttph2uqq. */
  3113. extern __inline __m512i
  3114. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3115. _mm512_cvttph_epu64 (__m128h __A)
  3116. {
  3117. return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
  3118. _mm512_setzero_si512 (),
  3119. (__mmask8) -1,
  3120. _MM_FROUND_CUR_DIRECTION);
  3121. }
  3122. extern __inline __m512i
  3123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3124. _mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
  3125. {
  3126. return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B,
  3127. _MM_FROUND_CUR_DIRECTION);
  3128. }
  3129. extern __inline __m512i
  3130. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3131. _mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
  3132. {
  3133. return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
  3134. _mm512_setzero_si512 (),
  3135. __A,
  3136. _MM_FROUND_CUR_DIRECTION);
  3137. }
  3138. #ifdef __OPTIMIZE__
  3139. extern __inline __m512i
  3140. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3141. _mm512_cvtt_roundph_epu64 (__m128h __A, int __B)
  3142. {
  3143. return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
  3144. _mm512_setzero_si512 (),
  3145. (__mmask8) -1,
  3146. __B);
  3147. }
  3148. extern __inline __m512i
  3149. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3150. _mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
  3151. {
  3152. return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D);
  3153. }
  3154. extern __inline __m512i
  3155. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3156. _mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
  3157. {
  3158. return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
  3159. _mm512_setzero_si512 (),
  3160. __A,
  3161. __C);
  3162. }
  3163. #else
  3164. #define _mm512_cvtt_roundph_epu64(A, B) \
  3165. (__builtin_ia32_vcvttph2uqq512_mask_round ((A), \
  3166. _mm512_setzero_si512 (), \
  3167. (__mmask8)-1, \
  3168. (B)))
  3169. #define _mm512_mask_cvtt_roundph_epu64(A, B, C, D) \
  3170. __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D))
  3171. #define _mm512_maskz_cvtt_roundph_epu64(A, B, C) \
  3172. (__builtin_ia32_vcvttph2uqq512_mask_round ((B), \
  3173. _mm512_setzero_si512 (), \
  3174. (A), \
  3175. (C)))
  3176. #endif /* __OPTIMIZE__ */
  3177. /* Intrinsics vcvtqq2ph. */
  3178. extern __inline __m128h
  3179. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3180. _mm512_cvtepi64_ph (__m512i __A)
  3181. {
  3182. return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
  3183. _mm_setzero_ph (),
  3184. (__mmask8) -1,
  3185. _MM_FROUND_CUR_DIRECTION);
  3186. }
  3187. extern __inline __m128h
  3188. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3189. _mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C)
  3190. {
  3191. return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
  3192. __A,
  3193. __B,
  3194. _MM_FROUND_CUR_DIRECTION);
  3195. }
  3196. extern __inline __m128h
  3197. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3198. _mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B)
  3199. {
  3200. return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
  3201. _mm_setzero_ph (),
  3202. __A,
  3203. _MM_FROUND_CUR_DIRECTION);
  3204. }
  3205. #ifdef __OPTIMIZE__
  3206. extern __inline __m128h
  3207. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3208. _mm512_cvt_roundepi64_ph (__m512i __A, int __B)
  3209. {
  3210. return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
  3211. _mm_setzero_ph (),
  3212. (__mmask8) -1,
  3213. __B);
  3214. }
  3215. extern __inline __m128h
  3216. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3217. _mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
  3218. {
  3219. return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
  3220. __A,
  3221. __B,
  3222. __D);
  3223. }
  3224. extern __inline __m128h
  3225. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3226. _mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C)
  3227. {
  3228. return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
  3229. _mm_setzero_ph (),
  3230. __A,
  3231. __C);
  3232. }
  3233. #else
  3234. #define _mm512_cvt_roundepi64_ph(A, B) \
  3235. (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A), \
  3236. _mm_setzero_ph (), \
  3237. (__mmask8)-1, \
  3238. (B)))
  3239. #define _mm512_mask_cvt_roundepi64_ph(A, B, C, D) \
  3240. (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
  3241. #define _mm512_maskz_cvt_roundepi64_ph(A, B, C) \
  3242. (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B), \
  3243. _mm_setzero_ph (), \
  3244. (A), \
  3245. (C)))
  3246. #endif /* __OPTIMIZE__ */
  3247. /* Intrinsics vcvtuqq2ph. */
  3248. extern __inline __m128h
  3249. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3250. _mm512_cvtepu64_ph (__m512i __A)
  3251. {
  3252. return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
  3253. _mm_setzero_ph (),
  3254. (__mmask8) -1,
  3255. _MM_FROUND_CUR_DIRECTION);
  3256. }
  3257. extern __inline __m128h
  3258. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3259. _mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C)
  3260. {
  3261. return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
  3262. __A,
  3263. __B,
  3264. _MM_FROUND_CUR_DIRECTION);
  3265. }
  3266. extern __inline __m128h
  3267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3268. _mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B)
  3269. {
  3270. return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
  3271. _mm_setzero_ph (),
  3272. __A,
  3273. _MM_FROUND_CUR_DIRECTION);
  3274. }
  3275. #ifdef __OPTIMIZE__
  3276. extern __inline __m128h
  3277. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3278. _mm512_cvt_roundepu64_ph (__m512i __A, int __B)
  3279. {
  3280. return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
  3281. _mm_setzero_ph (),
  3282. (__mmask8) -1,
  3283. __B);
  3284. }
  3285. extern __inline __m128h
  3286. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3287. _mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
  3288. {
  3289. return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
  3290. __A,
  3291. __B,
  3292. __D);
  3293. }
  3294. extern __inline __m128h
  3295. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3296. _mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C)
  3297. {
  3298. return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
  3299. _mm_setzero_ph (),
  3300. __A,
  3301. __C);
  3302. }
  3303. #else
  3304. #define _mm512_cvt_roundepu64_ph(A, B) \
  3305. (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A), \
  3306. _mm_setzero_ph (), \
  3307. (__mmask8)-1, \
  3308. (B)))
  3309. #define _mm512_mask_cvt_roundepu64_ph(A, B, C, D) \
  3310. (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
  3311. #define _mm512_maskz_cvt_roundepu64_ph(A, B, C) \
  3312. (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B), \
  3313. _mm_setzero_ph (), \
  3314. (A), \
  3315. (C)))
  3316. #endif /* __OPTIMIZE__ */
  3317. /* Intrinsics vcvtph2w. */
  3318. extern __inline __m512i
  3319. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3320. _mm512_cvtph_epi16 (__m512h __A)
  3321. {
  3322. return (__m512i)
  3323. __builtin_ia32_vcvtph2w512_mask_round (__A,
  3324. (__v32hi)
  3325. _mm512_setzero_si512 (),
  3326. (__mmask32) -1,
  3327. _MM_FROUND_CUR_DIRECTION);
  3328. }
  3329. extern __inline __m512i
  3330. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3331. _mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
  3332. {
  3333. return (__m512i)
  3334. __builtin_ia32_vcvtph2w512_mask_round (__C,
  3335. (__v32hi) __A,
  3336. __B,
  3337. _MM_FROUND_CUR_DIRECTION);
  3338. }
  3339. extern __inline __m512i
  3340. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3341. _mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B)
  3342. {
  3343. return (__m512i)
  3344. __builtin_ia32_vcvtph2w512_mask_round (__B,
  3345. (__v32hi)
  3346. _mm512_setzero_si512 (),
  3347. __A,
  3348. _MM_FROUND_CUR_DIRECTION);
  3349. }
  3350. #ifdef __OPTIMIZE__
  3351. extern __inline __m512i
  3352. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3353. _mm512_cvt_roundph_epi16 (__m512h __A, int __B)
  3354. {
  3355. return (__m512i)
  3356. __builtin_ia32_vcvtph2w512_mask_round (__A,
  3357. (__v32hi)
  3358. _mm512_setzero_si512 (),
  3359. (__mmask32) -1,
  3360. __B);
  3361. }
  3362. extern __inline __m512i
  3363. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3364. _mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
  3365. {
  3366. return (__m512i)
  3367. __builtin_ia32_vcvtph2w512_mask_round (__C,
  3368. (__v32hi) __A,
  3369. __B,
  3370. __D);
  3371. }
  3372. extern __inline __m512i
  3373. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3374. _mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
  3375. {
  3376. return (__m512i)
  3377. __builtin_ia32_vcvtph2w512_mask_round (__B,
  3378. (__v32hi)
  3379. _mm512_setzero_si512 (),
  3380. __A,
  3381. __C);
  3382. }
  3383. #else
  3384. #define _mm512_cvt_roundph_epi16(A, B) \
  3385. ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A), \
  3386. (__v32hi) \
  3387. _mm512_setzero_si512 (), \
  3388. (__mmask32)-1, \
  3389. (B)))
  3390. #define _mm512_mask_cvt_roundph_epi16(A, B, C, D) \
  3391. ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C), \
  3392. (__v32hi)(A), \
  3393. (B), \
  3394. (D)))
  3395. #define _mm512_maskz_cvt_roundph_epi16(A, B, C) \
  3396. ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B), \
  3397. (__v32hi) \
  3398. _mm512_setzero_si512 (), \
  3399. (A), \
  3400. (C)))
  3401. #endif /* __OPTIMIZE__ */
  3402. /* Intrinsics vcvtph2uw. */
  3403. extern __inline __m512i
  3404. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3405. _mm512_cvtph_epu16 (__m512h __A)
  3406. {
  3407. return (__m512i)
  3408. __builtin_ia32_vcvtph2uw512_mask_round (__A,
  3409. (__v32hi)
  3410. _mm512_setzero_si512 (),
  3411. (__mmask32) -1,
  3412. _MM_FROUND_CUR_DIRECTION);
  3413. }
  3414. extern __inline __m512i
  3415. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3416. _mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
  3417. {
  3418. return (__m512i)
  3419. __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B,
  3420. _MM_FROUND_CUR_DIRECTION);
  3421. }
  3422. extern __inline __m512i
  3423. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3424. _mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B)
  3425. {
  3426. return (__m512i)
  3427. __builtin_ia32_vcvtph2uw512_mask_round (__B,
  3428. (__v32hi)
  3429. _mm512_setzero_si512 (),
  3430. __A,
  3431. _MM_FROUND_CUR_DIRECTION);
  3432. }
  3433. #ifdef __OPTIMIZE__
  3434. extern __inline __m512i
  3435. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3436. _mm512_cvt_roundph_epu16 (__m512h __A, int __B)
  3437. {
  3438. return (__m512i)
  3439. __builtin_ia32_vcvtph2uw512_mask_round (__A,
  3440. (__v32hi)
  3441. _mm512_setzero_si512 (),
  3442. (__mmask32) -1,
  3443. __B);
  3444. }
  3445. extern __inline __m512i
  3446. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3447. _mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
  3448. {
  3449. return (__m512i)
  3450. __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D);
  3451. }
  3452. extern __inline __m512i
  3453. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3454. _mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
  3455. {
  3456. return (__m512i)
  3457. __builtin_ia32_vcvtph2uw512_mask_round (__B,
  3458. (__v32hi)
  3459. _mm512_setzero_si512 (),
  3460. __A,
  3461. __C);
  3462. }
  3463. #else
  3464. #define _mm512_cvt_roundph_epu16(A, B) \
  3465. ((__m512i) \
  3466. __builtin_ia32_vcvtph2uw512_mask_round ((A), \
  3467. (__v32hi) \
  3468. _mm512_setzero_si512 (), \
  3469. (__mmask32)-1, (B)))
  3470. #define _mm512_mask_cvt_roundph_epu16(A, B, C, D) \
  3471. ((__m512i) \
  3472. __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D)))
  3473. #define _mm512_maskz_cvt_roundph_epu16(A, B, C) \
  3474. ((__m512i) \
  3475. __builtin_ia32_vcvtph2uw512_mask_round ((B), \
  3476. (__v32hi) \
  3477. _mm512_setzero_si512 (), \
  3478. (A), \
  3479. (C)))
  3480. #endif /* __OPTIMIZE__ */
  3481. /* Intrinsics vcvttph2w. */
  3482. extern __inline __m512i
  3483. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3484. _mm512_cvttph_epi16 (__m512h __A)
  3485. {
  3486. return (__m512i)
  3487. __builtin_ia32_vcvttph2w512_mask_round (__A,
  3488. (__v32hi)
  3489. _mm512_setzero_si512 (),
  3490. (__mmask32) -1,
  3491. _MM_FROUND_CUR_DIRECTION);
  3492. }
  3493. extern __inline __m512i
  3494. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3495. _mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
  3496. {
  3497. return (__m512i)
  3498. __builtin_ia32_vcvttph2w512_mask_round (__C,
  3499. (__v32hi) __A,
  3500. __B,
  3501. _MM_FROUND_CUR_DIRECTION);
  3502. }
  3503. extern __inline __m512i
  3504. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3505. _mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B)
  3506. {
  3507. return (__m512i)
  3508. __builtin_ia32_vcvttph2w512_mask_round (__B,
  3509. (__v32hi)
  3510. _mm512_setzero_si512 (),
  3511. __A,
  3512. _MM_FROUND_CUR_DIRECTION);
  3513. }
  3514. #ifdef __OPTIMIZE__
  3515. extern __inline __m512i
  3516. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3517. _mm512_cvtt_roundph_epi16 (__m512h __A, int __B)
  3518. {
  3519. return (__m512i)
  3520. __builtin_ia32_vcvttph2w512_mask_round (__A,
  3521. (__v32hi)
  3522. _mm512_setzero_si512 (),
  3523. (__mmask32) -1,
  3524. __B);
  3525. }
  3526. extern __inline __m512i
  3527. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3528. _mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B,
  3529. __m512h __C, int __D)
  3530. {
  3531. return (__m512i)
  3532. __builtin_ia32_vcvttph2w512_mask_round (__C,
  3533. (__v32hi) __A,
  3534. __B,
  3535. __D);
  3536. }
  3537. extern __inline __m512i
  3538. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3539. _mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
  3540. {
  3541. return (__m512i)
  3542. __builtin_ia32_vcvttph2w512_mask_round (__B,
  3543. (__v32hi)
  3544. _mm512_setzero_si512 (),
  3545. __A,
  3546. __C);
  3547. }
  3548. #else
  3549. #define _mm512_cvtt_roundph_epi16(A, B) \
  3550. ((__m512i) \
  3551. __builtin_ia32_vcvttph2w512_mask_round ((A), \
  3552. (__v32hi) \
  3553. _mm512_setzero_si512 (), \
  3554. (__mmask32)-1, \
  3555. (B)))
  3556. #define _mm512_mask_cvtt_roundph_epi16(A, B, C, D) \
  3557. ((__m512i) \
  3558. __builtin_ia32_vcvttph2w512_mask_round ((C), \
  3559. (__v32hi)(A), \
  3560. (B), \
  3561. (D)))
  3562. #define _mm512_maskz_cvtt_roundph_epi16(A, B, C) \
  3563. ((__m512i) \
  3564. __builtin_ia32_vcvttph2w512_mask_round ((B), \
  3565. (__v32hi) \
  3566. _mm512_setzero_si512 (), \
  3567. (A), \
  3568. (C)))
  3569. #endif /* __OPTIMIZE__ */
  3570. /* Intrinsics vcvttph2uw. */
  3571. extern __inline __m512i
  3572. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3573. _mm512_cvttph_epu16 (__m512h __A)
  3574. {
  3575. return (__m512i)
  3576. __builtin_ia32_vcvttph2uw512_mask_round (__A,
  3577. (__v32hi)
  3578. _mm512_setzero_si512 (),
  3579. (__mmask32) -1,
  3580. _MM_FROUND_CUR_DIRECTION);
  3581. }
  3582. extern __inline __m512i
  3583. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3584. _mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
  3585. {
  3586. return (__m512i)
  3587. __builtin_ia32_vcvttph2uw512_mask_round (__C,
  3588. (__v32hi) __A,
  3589. __B,
  3590. _MM_FROUND_CUR_DIRECTION);
  3591. }
  3592. extern __inline __m512i
  3593. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3594. _mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B)
  3595. {
  3596. return (__m512i)
  3597. __builtin_ia32_vcvttph2uw512_mask_round (__B,
  3598. (__v32hi)
  3599. _mm512_setzero_si512 (),
  3600. __A,
  3601. _MM_FROUND_CUR_DIRECTION);
  3602. }
  3603. #ifdef __OPTIMIZE__
  3604. extern __inline __m512i
  3605. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3606. _mm512_cvtt_roundph_epu16 (__m512h __A, int __B)
  3607. {
  3608. return (__m512i)
  3609. __builtin_ia32_vcvttph2uw512_mask_round (__A,
  3610. (__v32hi)
  3611. _mm512_setzero_si512 (),
  3612. (__mmask32) -1,
  3613. __B);
  3614. }
  3615. extern __inline __m512i
  3616. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3617. _mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B,
  3618. __m512h __C, int __D)
  3619. {
  3620. return (__m512i)
  3621. __builtin_ia32_vcvttph2uw512_mask_round (__C,
  3622. (__v32hi) __A,
  3623. __B,
  3624. __D);
  3625. }
  3626. extern __inline __m512i
  3627. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3628. _mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
  3629. {
  3630. return (__m512i)
  3631. __builtin_ia32_vcvttph2uw512_mask_round (__B,
  3632. (__v32hi)
  3633. _mm512_setzero_si512 (),
  3634. __A,
  3635. __C);
  3636. }
  3637. #else
  3638. #define _mm512_cvtt_roundph_epu16(A, B) \
  3639. ((__m512i) \
  3640. __builtin_ia32_vcvttph2uw512_mask_round ((A), \
  3641. (__v32hi) \
  3642. _mm512_setzero_si512 (), \
  3643. (__mmask32)-1, \
  3644. (B)))
  3645. #define _mm512_mask_cvtt_roundph_epu16(A, B, C, D) \
  3646. ((__m512i) \
  3647. __builtin_ia32_vcvttph2uw512_mask_round ((C), \
  3648. (__v32hi)(A), \
  3649. (B), \
  3650. (D)))
  3651. #define _mm512_maskz_cvtt_roundph_epu16(A, B, C) \
  3652. ((__m512i) \
  3653. __builtin_ia32_vcvttph2uw512_mask_round ((B), \
  3654. (__v32hi) \
  3655. _mm512_setzero_si512 (), \
  3656. (A), \
  3657. (C)))
  3658. #endif /* __OPTIMIZE__ */
  3659. /* Intrinsics vcvtw2ph. */
  3660. extern __inline __m512h
  3661. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3662. _mm512_cvtepi16_ph (__m512i __A)
  3663. {
  3664. return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
  3665. _mm512_setzero_ph (),
  3666. (__mmask32) -1,
  3667. _MM_FROUND_CUR_DIRECTION);
  3668. }
  3669. extern __inline __m512h
  3670. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3671. _mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C)
  3672. {
  3673. return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
  3674. __A,
  3675. __B,
  3676. _MM_FROUND_CUR_DIRECTION);
  3677. }
  3678. extern __inline __m512h
  3679. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3680. _mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B)
  3681. {
  3682. return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
  3683. _mm512_setzero_ph (),
  3684. __A,
  3685. _MM_FROUND_CUR_DIRECTION);
  3686. }
  3687. #ifdef __OPTIMIZE__
  3688. extern __inline __m512h
  3689. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3690. _mm512_cvt_roundepi16_ph (__m512i __A, int __B)
  3691. {
  3692. return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
  3693. _mm512_setzero_ph (),
  3694. (__mmask32) -1,
  3695. __B);
  3696. }
  3697. extern __inline __m512h
  3698. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3699. _mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
  3700. {
  3701. return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
  3702. __A,
  3703. __B,
  3704. __D);
  3705. }
  3706. extern __inline __m512h
  3707. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3708. _mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C)
  3709. {
  3710. return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
  3711. _mm512_setzero_ph (),
  3712. __A,
  3713. __C);
  3714. }
  3715. #else
  3716. #define _mm512_cvt_roundepi16_ph(A, B) \
  3717. (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A), \
  3718. _mm512_setzero_ph (), \
  3719. (__mmask32)-1, \
  3720. (B)))
  3721. #define _mm512_mask_cvt_roundepi16_ph(A, B, C, D) \
  3722. (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C), \
  3723. (A), \
  3724. (B), \
  3725. (D)))
  3726. #define _mm512_maskz_cvt_roundepi16_ph(A, B, C) \
  3727. (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B), \
  3728. _mm512_setzero_ph (), \
  3729. (A), \
  3730. (C)))
  3731. #endif /* __OPTIMIZE__ */
  3732. /* Intrinsics vcvtuw2ph. */
  3733. extern __inline __m512h
  3734. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3735. _mm512_cvtepu16_ph (__m512i __A)
  3736. {
  3737. return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
  3738. _mm512_setzero_ph (),
  3739. (__mmask32) -1,
  3740. _MM_FROUND_CUR_DIRECTION);
  3741. }
  3742. extern __inline __m512h
  3743. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3744. _mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C)
  3745. {
  3746. return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
  3747. __A,
  3748. __B,
  3749. _MM_FROUND_CUR_DIRECTION);
  3750. }
  3751. extern __inline __m512h
  3752. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3753. _mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B)
  3754. {
  3755. return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
  3756. _mm512_setzero_ph (),
  3757. __A,
  3758. _MM_FROUND_CUR_DIRECTION);
  3759. }
  3760. #ifdef __OPTIMIZE__
  3761. extern __inline __m512h
  3762. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3763. _mm512_cvt_roundepu16_ph (__m512i __A, int __B)
  3764. {
  3765. return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
  3766. _mm512_setzero_ph (),
  3767. (__mmask32) -1,
  3768. __B);
  3769. }
  3770. extern __inline __m512h
  3771. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3772. _mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
  3773. {
  3774. return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
  3775. __A,
  3776. __B,
  3777. __D);
  3778. }
  3779. extern __inline __m512h
  3780. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3781. _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
  3782. {
  3783. return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
  3784. _mm512_setzero_ph (),
  3785. __A,
  3786. __C);
  3787. }
  3788. #else
  3789. #define _mm512_cvt_roundepu16_ph(A, B) \
  3790. (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A), \
  3791. _mm512_setzero_ph (), \
  3792. (__mmask32)-1, \
  3793. (B)))
  3794. #define _mm512_mask_cvt_roundepu16_ph(A, B, C, D) \
  3795. (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C), \
  3796. (A), \
  3797. (B), \
  3798. (D)))
  3799. #define _mm512_maskz_cvt_roundepu16_ph(A, B, C) \
  3800. (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B), \
  3801. _mm512_setzero_ph (), \
  3802. (A), \
  3803. (C)))
  3804. #endif /* __OPTIMIZE__ */
  3805. /* Intrinsics vcvtsh2si, vcvtsh2us. */
  3806. extern __inline int
  3807. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3808. _mm_cvtsh_i32 (__m128h __A)
  3809. {
  3810. return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
  3811. }
  3812. extern __inline unsigned
  3813. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3814. _mm_cvtsh_u32 (__m128h __A)
  3815. {
  3816. return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
  3817. _MM_FROUND_CUR_DIRECTION);
  3818. }
  3819. #ifdef __OPTIMIZE__
  3820. extern __inline int
  3821. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3822. _mm_cvt_roundsh_i32 (__m128h __A, const int __R)
  3823. {
  3824. return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
  3825. }
  3826. extern __inline unsigned
  3827. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3828. _mm_cvt_roundsh_u32 (__m128h __A, const int __R)
  3829. {
  3830. return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
  3831. }
  3832. #else
  3833. #define _mm_cvt_roundsh_i32(A, B) \
  3834. ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
  3835. #define _mm_cvt_roundsh_u32(A, B) \
  3836. ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
  3837. #endif /* __OPTIMIZE__ */
  3838. #ifdef __x86_64__
  3839. extern __inline long long
  3840. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3841. _mm_cvtsh_i64 (__m128h __A)
  3842. {
  3843. return (long long)
  3844. __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
  3845. }
  3846. extern __inline unsigned long long
  3847. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3848. _mm_cvtsh_u64 (__m128h __A)
  3849. {
  3850. return (long long)
  3851. __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
  3852. }
  3853. #ifdef __OPTIMIZE__
  3854. extern __inline long long
  3855. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3856. _mm_cvt_roundsh_i64 (__m128h __A, const int __R)
  3857. {
  3858. return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
  3859. }
  3860. extern __inline unsigned long long
  3861. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3862. _mm_cvt_roundsh_u64 (__m128h __A, const int __R)
  3863. {
  3864. return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
  3865. }
  3866. #else
  3867. #define _mm_cvt_roundsh_i64(A, B) \
  3868. ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
  3869. #define _mm_cvt_roundsh_u64(A, B) \
  3870. ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
  3871. #endif /* __OPTIMIZE__ */
  3872. #endif /* __x86_64__ */
  3873. /* Intrinsics vcvttsh2si, vcvttsh2us. */
  3874. extern __inline int
  3875. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3876. _mm_cvttsh_i32 (__m128h __A)
  3877. {
  3878. return (int)
  3879. __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
  3880. }
  3881. extern __inline unsigned
  3882. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3883. _mm_cvttsh_u32 (__m128h __A)
  3884. {
  3885. return (int)
  3886. __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
  3887. }
  3888. #ifdef __OPTIMIZE__
  3889. extern __inline int
  3890. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3891. _mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
  3892. {
  3893. return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
  3894. }
  3895. extern __inline unsigned
  3896. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3897. _mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
  3898. {
  3899. return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
  3900. }
  3901. #else
  3902. #define _mm_cvtt_roundsh_i32(A, B) \
  3903. ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
  3904. #define _mm_cvtt_roundsh_u32(A, B) \
  3905. ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
  3906. #endif /* __OPTIMIZE__ */
  3907. #ifdef __x86_64__
  3908. extern __inline long long
  3909. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3910. _mm_cvttsh_i64 (__m128h __A)
  3911. {
  3912. return (long long)
  3913. __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
  3914. }
  3915. extern __inline unsigned long long
  3916. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3917. _mm_cvttsh_u64 (__m128h __A)
  3918. {
  3919. return (long long)
  3920. __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
  3921. }
  3922. #ifdef __OPTIMIZE__
  3923. extern __inline long long
  3924. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3925. _mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
  3926. {
  3927. return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
  3928. }
  3929. extern __inline unsigned long long
  3930. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3931. _mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
  3932. {
  3933. return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
  3934. }
  3935. #else
  3936. #define _mm_cvtt_roundsh_i64(A, B) \
  3937. ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
  3938. #define _mm_cvtt_roundsh_u64(A, B) \
  3939. ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
  3940. #endif /* __OPTIMIZE__ */
  3941. #endif /* __x86_64__ */
  3942. /* Intrinsics vcvtsi2sh, vcvtusi2sh. */
  3943. extern __inline __m128h
  3944. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3945. _mm_cvti32_sh (__m128h __A, int __B)
  3946. {
  3947. return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
  3948. }
  3949. extern __inline __m128h
  3950. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3951. _mm_cvtu32_sh (__m128h __A, unsigned int __B)
  3952. {
  3953. return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
  3954. }
  3955. #ifdef __OPTIMIZE__
  3956. extern __inline __m128h
  3957. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3958. _mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
  3959. {
  3960. return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
  3961. }
  3962. extern __inline __m128h
  3963. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3964. _mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
  3965. {
  3966. return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
  3967. }
  3968. #else
  3969. #define _mm_cvt_roundi32_sh(A, B, C) \
  3970. (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
  3971. #define _mm_cvt_roundu32_sh(A, B, C) \
  3972. (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
  3973. #endif /* __OPTIMIZE__ */
  3974. #ifdef __x86_64__
  3975. extern __inline __m128h
  3976. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3977. _mm_cvti64_sh (__m128h __A, long long __B)
  3978. {
  3979. return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
  3980. }
  3981. extern __inline __m128h
  3982. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3983. _mm_cvtu64_sh (__m128h __A, unsigned long long __B)
  3984. {
  3985. return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
  3986. }
  3987. #ifdef __OPTIMIZE__
  3988. extern __inline __m128h
  3989. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3990. _mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
  3991. {
  3992. return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
  3993. }
  3994. extern __inline __m128h
  3995. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3996. _mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
  3997. {
  3998. return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
  3999. }
  4000. #else
  4001. #define _mm_cvt_roundi64_sh(A, B, C) \
  4002. (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
  4003. #define _mm_cvt_roundu64_sh(A, B, C) \
  4004. (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
  4005. #endif /* __OPTIMIZE__ */
  4006. #endif /* __x86_64__ */
  4007. /* Intrinsics vcvtph2pd. */
  4008. extern __inline __m512d
  4009. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4010. _mm512_cvtph_pd (__m128h __A)
  4011. {
  4012. return __builtin_ia32_vcvtph2pd512_mask_round (__A,
  4013. _mm512_setzero_pd (),
  4014. (__mmask8) -1,
  4015. _MM_FROUND_CUR_DIRECTION);
  4016. }
  4017. extern __inline __m512d
  4018. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4019. _mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C)
  4020. {
  4021. return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B,
  4022. _MM_FROUND_CUR_DIRECTION);
  4023. }
  4024. extern __inline __m512d
  4025. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4026. _mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
  4027. {
  4028. return __builtin_ia32_vcvtph2pd512_mask_round (__B,
  4029. _mm512_setzero_pd (),
  4030. __A,
  4031. _MM_FROUND_CUR_DIRECTION);
  4032. }
  4033. #ifdef __OPTIMIZE__
  4034. extern __inline __m512d
  4035. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4036. _mm512_cvt_roundph_pd (__m128h __A, int __B)
  4037. {
  4038. return __builtin_ia32_vcvtph2pd512_mask_round (__A,
  4039. _mm512_setzero_pd (),
  4040. (__mmask8) -1,
  4041. __B);
  4042. }
  4043. extern __inline __m512d
  4044. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4045. _mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D)
  4046. {
  4047. return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, __D);
  4048. }
  4049. extern __inline __m512d
  4050. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4051. _mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C)
  4052. {
  4053. return __builtin_ia32_vcvtph2pd512_mask_round (__B,
  4054. _mm512_setzero_pd (),
  4055. __A,
  4056. __C);
  4057. }
  4058. #else
  4059. #define _mm512_cvt_roundph_pd(A, B) \
  4060. (__builtin_ia32_vcvtph2pd512_mask_round ((A), \
  4061. _mm512_setzero_pd (), \
  4062. (__mmask8)-1, \
  4063. (B)))
  4064. #define _mm512_mask_cvt_roundph_pd(A, B, C, D) \
  4065. (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D)))
  4066. #define _mm512_maskz_cvt_roundph_pd(A, B, C) \
  4067. (__builtin_ia32_vcvtph2pd512_mask_round ((B), \
  4068. _mm512_setzero_pd (), \
  4069. (A), \
  4070. (C)))
  4071. #endif /* __OPTIMIZE__ */
  4072. /* Intrinsics vcvtph2psx. */
  4073. extern __inline __m512
  4074. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4075. _mm512_cvtxph_ps (__m256h __A)
  4076. {
  4077. return __builtin_ia32_vcvtph2psx512_mask_round (__A,
  4078. _mm512_setzero_ps (),
  4079. (__mmask16) -1,
  4080. _MM_FROUND_CUR_DIRECTION);
  4081. }
  4082. extern __inline __m512
  4083. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4084. _mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C)
  4085. {
  4086. return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B,
  4087. _MM_FROUND_CUR_DIRECTION);
  4088. }
  4089. extern __inline __m512
  4090. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4091. _mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B)
  4092. {
  4093. return __builtin_ia32_vcvtph2psx512_mask_round (__B,
  4094. _mm512_setzero_ps (),
  4095. __A,
  4096. _MM_FROUND_CUR_DIRECTION);
  4097. }
  4098. #ifdef __OPTIMIZE__
  4099. extern __inline __m512
  4100. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4101. _mm512_cvtx_roundph_ps (__m256h __A, int __B)
  4102. {
  4103. return __builtin_ia32_vcvtph2psx512_mask_round (__A,
  4104. _mm512_setzero_ps (),
  4105. (__mmask16) -1,
  4106. __B);
  4107. }
  4108. extern __inline __m512
  4109. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4110. _mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D)
  4111. {
  4112. return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, __D);
  4113. }
  4114. extern __inline __m512
  4115. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4116. _mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C)
  4117. {
  4118. return __builtin_ia32_vcvtph2psx512_mask_round (__B,
  4119. _mm512_setzero_ps (),
  4120. __A,
  4121. __C);
  4122. }
  4123. #else
  4124. #define _mm512_cvtx_roundph_ps(A, B) \
  4125. (__builtin_ia32_vcvtph2psx512_mask_round ((A), \
  4126. _mm512_setzero_ps (), \
  4127. (__mmask16)-1, \
  4128. (B)))
  4129. #define _mm512_mask_cvtx_roundph_ps(A, B, C, D) \
  4130. (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D)))
  4131. #define _mm512_maskz_cvtx_roundph_ps(A, B, C) \
  4132. (__builtin_ia32_vcvtph2psx512_mask_round ((B), \
  4133. _mm512_setzero_ps (), \
  4134. (A), \
  4135. (C)))
  4136. #endif /* __OPTIMIZE__ */
  4137. /* Intrinsics vcvtps2ph. */
  4138. extern __inline __m256h
  4139. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4140. _mm512_cvtxps_ph (__m512 __A)
  4141. {
  4142. return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
  4143. _mm256_setzero_ph (),
  4144. (__mmask16) -1,
  4145. _MM_FROUND_CUR_DIRECTION);
  4146. }
  4147. extern __inline __m256h
  4148. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4149. _mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C)
  4150. {
  4151. return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
  4152. __A, __B,
  4153. _MM_FROUND_CUR_DIRECTION);
  4154. }
  4155. extern __inline __m256h
  4156. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4157. _mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B)
  4158. {
  4159. return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
  4160. _mm256_setzero_ph (),
  4161. __A,
  4162. _MM_FROUND_CUR_DIRECTION);
  4163. }
  4164. #ifdef __OPTIMIZE__
  4165. extern __inline __m256h
  4166. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4167. _mm512_cvtx_roundps_ph (__m512 __A, int __B)
  4168. {
  4169. return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
  4170. _mm256_setzero_ph (),
  4171. (__mmask16) -1,
  4172. __B);
  4173. }
  4174. extern __inline __m256h
  4175. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4176. _mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D)
  4177. {
  4178. return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
  4179. __A, __B, __D);
  4180. }
  4181. extern __inline __m256h
  4182. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4183. _mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C)
  4184. {
  4185. return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
  4186. _mm256_setzero_ph (),
  4187. __A, __C);
  4188. }
  4189. #else
  4190. #define _mm512_cvtx_roundps_ph(A, B) \
  4191. (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A), \
  4192. _mm256_setzero_ph (),\
  4193. (__mmask16)-1, (B)))
  4194. #define _mm512_mask_cvtx_roundps_ph(A, B, C, D) \
  4195. (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C), \
  4196. (A), (B), (D)))
  4197. #define _mm512_maskz_cvtx_roundps_ph(A, B, C) \
  4198. (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B), \
  4199. _mm256_setzero_ph (),\
  4200. (A), (C)))
  4201. #endif /* __OPTIMIZE__ */
  4202. /* Intrinsics vcvtpd2ph. */
  4203. extern __inline __m128h
  4204. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4205. _mm512_cvtpd_ph (__m512d __A)
  4206. {
  4207. return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
  4208. _mm_setzero_ph (),
  4209. (__mmask8) -1,
  4210. _MM_FROUND_CUR_DIRECTION);
  4211. }
  4212. extern __inline __m128h
  4213. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4214. _mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C)
  4215. {
  4216. return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
  4217. __A, __B,
  4218. _MM_FROUND_CUR_DIRECTION);
  4219. }
  4220. extern __inline __m128h
  4221. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4222. _mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B)
  4223. {
  4224. return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
  4225. _mm_setzero_ph (),
  4226. __A,
  4227. _MM_FROUND_CUR_DIRECTION);
  4228. }
  4229. #ifdef __OPTIMIZE__
  4230. extern __inline __m128h
  4231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4232. _mm512_cvt_roundpd_ph (__m512d __A, int __B)
  4233. {
  4234. return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
  4235. _mm_setzero_ph (),
  4236. (__mmask8) -1,
  4237. __B);
  4238. }
  4239. extern __inline __m128h
  4240. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4241. _mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D)
  4242. {
  4243. return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
  4244. __A, __B, __D);
  4245. }
  4246. extern __inline __m128h
  4247. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4248. _mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C)
  4249. {
  4250. return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
  4251. _mm_setzero_ph (),
  4252. __A, __C);
  4253. }
  4254. #else
  4255. #define _mm512_cvt_roundpd_ph(A, B) \
  4256. (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A), \
  4257. _mm_setzero_ph (), \
  4258. (__mmask8)-1, (B)))
  4259. #define _mm512_mask_cvt_roundpd_ph(A, B, C, D) \
  4260. (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C), \
  4261. (A), (B), (D)))
  4262. #define _mm512_maskz_cvt_roundpd_ph(A, B, C) \
  4263. (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B), \
  4264. _mm_setzero_ph (), \
  4265. (A), (C)))
  4266. #endif /* __OPTIMIZE__ */
  4267. /* Intrinsics vcvtsh2ss, vcvtsh2sd. */
  4268. extern __inline __m128
  4269. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4270. _mm_cvtsh_ss (__m128 __A, __m128h __B)
  4271. {
  4272. return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
  4273. _mm_setzero_ps (),
  4274. (__mmask8) -1,
  4275. _MM_FROUND_CUR_DIRECTION);
  4276. }
  4277. extern __inline __m128
  4278. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4279. _mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
  4280. __m128h __D)
  4281. {
  4282. return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
  4283. _MM_FROUND_CUR_DIRECTION);
  4284. }
  4285. extern __inline __m128
  4286. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4287. _mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
  4288. __m128h __C)
  4289. {
  4290. return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
  4291. _mm_setzero_ps (),
  4292. __A, _MM_FROUND_CUR_DIRECTION);
  4293. }
  4294. extern __inline __m128d
  4295. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4296. _mm_cvtsh_sd (__m128d __A, __m128h __B)
  4297. {
  4298. return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
  4299. _mm_setzero_pd (),
  4300. (__mmask8) -1,
  4301. _MM_FROUND_CUR_DIRECTION);
  4302. }
  4303. extern __inline __m128d
  4304. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4305. _mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
  4306. __m128h __D)
  4307. {
  4308. return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
  4309. _MM_FROUND_CUR_DIRECTION);
  4310. }
  4311. extern __inline __m128d
  4312. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4313. _mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
  4314. {
  4315. return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
  4316. _mm_setzero_pd (),
  4317. __A, _MM_FROUND_CUR_DIRECTION);
  4318. }
  4319. #ifdef __OPTIMIZE__
  4320. extern __inline __m128
  4321. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4322. _mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
  4323. {
  4324. return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
  4325. _mm_setzero_ps (),
  4326. (__mmask8) -1, __R);
  4327. }
  4328. extern __inline __m128
  4329. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4330. _mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
  4331. __m128h __D, const int __R)
  4332. {
  4333. return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
  4334. }
  4335. extern __inline __m128
  4336. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4337. _mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
  4338. __m128h __C, const int __R)
  4339. {
  4340. return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
  4341. _mm_setzero_ps (),
  4342. __A, __R);
  4343. }
  4344. extern __inline __m128d
  4345. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4346. _mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
  4347. {
  4348. return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
  4349. _mm_setzero_pd (),
  4350. (__mmask8) -1, __R);
  4351. }
  4352. extern __inline __m128d
  4353. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4354. _mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
  4355. __m128h __D, const int __R)
  4356. {
  4357. return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
  4358. }
  4359. extern __inline __m128d
  4360. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4361. _mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
  4362. {
  4363. return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
  4364. _mm_setzero_pd (),
  4365. __A, __R);
  4366. }
  4367. #else
  4368. #define _mm_cvt_roundsh_ss(A, B, R) \
  4369. (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), \
  4370. _mm_setzero_ps (), \
  4371. (__mmask8) -1, (R)))
  4372. #define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) \
  4373. (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
  4374. #define _mm_maskz_cvt_roundsh_ss(A, B, C, R) \
  4375. (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), \
  4376. _mm_setzero_ps (), \
  4377. (A), (R)))
  4378. #define _mm_cvt_roundsh_sd(A, B, R) \
  4379. (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), \
  4380. _mm_setzero_pd (), \
  4381. (__mmask8) -1, (R)))
  4382. #define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) \
  4383. (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
  4384. #define _mm_maskz_cvt_roundsh_sd(A, B, C, R) \
  4385. (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), \
  4386. _mm_setzero_pd (), \
  4387. (A), (R)))
  4388. #endif /* __OPTIMIZE__ */
  4389. /* Intrinsics vcvtss2sh, vcvtsd2sh. */
  4390. extern __inline __m128h
  4391. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4392. _mm_cvtss_sh (__m128h __A, __m128 __B)
  4393. {
  4394. return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
  4395. _mm_setzero_ph (),
  4396. (__mmask8) -1,
  4397. _MM_FROUND_CUR_DIRECTION);
  4398. }
  4399. extern __inline __m128h
  4400. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4401. _mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
  4402. {
  4403. return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
  4404. _MM_FROUND_CUR_DIRECTION);
  4405. }
  4406. extern __inline __m128h
  4407. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4408. _mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
  4409. {
  4410. return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
  4411. _mm_setzero_ph (),
  4412. __A, _MM_FROUND_CUR_DIRECTION);
  4413. }
  4414. extern __inline __m128h
  4415. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4416. _mm_cvtsd_sh (__m128h __A, __m128d __B)
  4417. {
  4418. return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
  4419. _mm_setzero_ph (),
  4420. (__mmask8) -1,
  4421. _MM_FROUND_CUR_DIRECTION);
  4422. }
  4423. extern __inline __m128h
  4424. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4425. _mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
  4426. {
  4427. return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
  4428. _MM_FROUND_CUR_DIRECTION);
  4429. }
  4430. extern __inline __m128h
  4431. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4432. _mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
  4433. {
  4434. return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
  4435. _mm_setzero_ph (),
  4436. __A, _MM_FROUND_CUR_DIRECTION);
  4437. }
  4438. #ifdef __OPTIMIZE__
  4439. extern __inline __m128h
  4440. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4441. _mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
  4442. {
  4443. return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
  4444. _mm_setzero_ph (),
  4445. (__mmask8) -1, __R);
  4446. }
  4447. extern __inline __m128h
  4448. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4449. _mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
  4450. const int __R)
  4451. {
  4452. return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
  4453. }
  4454. extern __inline __m128h
  4455. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4456. _mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
  4457. const int __R)
  4458. {
  4459. return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
  4460. _mm_setzero_ph (),
  4461. __A, __R);
  4462. }
  4463. extern __inline __m128h
  4464. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4465. _mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
  4466. {
  4467. return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
  4468. _mm_setzero_ph (),
  4469. (__mmask8) -1, __R);
  4470. }
  4471. extern __inline __m128h
  4472. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4473. _mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
  4474. const int __R)
  4475. {
  4476. return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
  4477. }
  4478. extern __inline __m128h
  4479. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4480. _mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
  4481. const int __R)
  4482. {
  4483. return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
  4484. _mm_setzero_ph (),
  4485. __A, __R);
  4486. }
  4487. #else
  4488. #define _mm_cvt_roundss_sh(A, B, R) \
  4489. (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), \
  4490. _mm_setzero_ph (), \
  4491. (__mmask8) -1, R))
  4492. #define _mm_mask_cvt_roundss_sh(A, B, C, D, R) \
  4493. (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
  4494. #define _mm_maskz_cvt_roundss_sh(A, B, C, R) \
  4495. (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), \
  4496. _mm_setzero_ph (), \
  4497. A, R))
  4498. #define _mm_cvt_roundsd_sh(A, B, R) \
  4499. (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), \
  4500. _mm_setzero_ph (), \
  4501. (__mmask8) -1, R))
  4502. #define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) \
  4503. (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
  4504. #define _mm_maskz_cvt_roundsd_sh(A, B, C, R) \
  4505. (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), \
  4506. _mm_setzero_ph (), \
  4507. (A), (R)))
  4508. #endif /* __OPTIMIZE__ */
  4509. /* Intrinsics vfmaddsub[132,213,231]ph. */
  4510. extern __inline __m512h
  4511. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4512. _mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C)
  4513. {
  4514. return (__m512h)
  4515. __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
  4516. (__v32hf) __B,
  4517. (__v32hf) __C,
  4518. (__mmask32) -1,
  4519. _MM_FROUND_CUR_DIRECTION);
  4520. }
  4521. extern __inline __m512h
  4522. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4523. _mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
  4524. {
  4525. return (__m512h)
  4526. __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
  4527. (__v32hf) __B,
  4528. (__v32hf) __C,
  4529. (__mmask32) __U,
  4530. _MM_FROUND_CUR_DIRECTION);
  4531. }
  4532. extern __inline __m512h
  4533. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4534. _mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
  4535. {
  4536. return (__m512h)
  4537. __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
  4538. (__v32hf) __B,
  4539. (__v32hf) __C,
  4540. (__mmask32) __U,
  4541. _MM_FROUND_CUR_DIRECTION);
  4542. }
  4543. extern __inline __m512h
  4544. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4545. _mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
  4546. {
  4547. return (__m512h)
  4548. __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
  4549. (__v32hf) __B,
  4550. (__v32hf) __C,
  4551. (__mmask32) __U,
  4552. _MM_FROUND_CUR_DIRECTION);
  4553. }
  4554. #ifdef __OPTIMIZE__
  4555. extern __inline __m512h
  4556. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4557. _mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
  4558. {
  4559. return (__m512h)
  4560. __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
  4561. (__v32hf) __B,
  4562. (__v32hf) __C,
  4563. (__mmask32) -1, __R);
  4564. }
  4565. extern __inline __m512h
  4566. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4567. _mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
  4568. __m512h __C, const int __R)
  4569. {
  4570. return (__m512h)
  4571. __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
  4572. (__v32hf) __B,
  4573. (__v32hf) __C,
  4574. (__mmask32) __U, __R);
  4575. }
  4576. extern __inline __m512h
  4577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4578. _mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
  4579. __mmask32 __U, const int __R)
  4580. {
  4581. return (__m512h)
  4582. __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
  4583. (__v32hf) __B,
  4584. (__v32hf) __C,
  4585. (__mmask32) __U, __R);
  4586. }
  4587. extern __inline __m512h
  4588. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4589. _mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
  4590. __m512h __C, const int __R)
  4591. {
  4592. return (__m512h)
  4593. __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
  4594. (__v32hf) __B,
  4595. (__v32hf) __C,
  4596. (__mmask32) __U, __R);
  4597. }
  4598. #else
  4599. #define _mm512_fmaddsub_round_ph(A, B, C, R) \
  4600. ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R)))
  4601. #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \
  4602. ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R)))
  4603. #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \
  4604. ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R)))
  4605. #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \
  4606. ((__m512h)__builtin_ia32_vfmaddsubph512_maskz ((A), (B), (C), (U), (R)))
  4607. #endif /* __OPTIMIZE__ */
  4608. /* Intrinsics vfmsubadd[132,213,231]ph. */
  4609. extern __inline __m512h
  4610. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4611. _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C)
  4612. {
  4613. return (__m512h)
  4614. __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
  4615. (__v32hf) __B,
  4616. (__v32hf) __C,
  4617. (__mmask32) -1,
  4618. _MM_FROUND_CUR_DIRECTION);
  4619. }
  4620. extern __inline __m512h
  4621. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4622. _mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U,
  4623. __m512h __B, __m512h __C)
  4624. {
  4625. return (__m512h)
  4626. __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
  4627. (__v32hf) __B,
  4628. (__v32hf) __C,
  4629. (__mmask32) __U,
  4630. _MM_FROUND_CUR_DIRECTION);
  4631. }
  4632. extern __inline __m512h
  4633. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4634. _mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B,
  4635. __m512h __C, __mmask32 __U)
  4636. {
  4637. return (__m512h)
  4638. __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
  4639. (__v32hf) __B,
  4640. (__v32hf) __C,
  4641. (__mmask32) __U,
  4642. _MM_FROUND_CUR_DIRECTION);
  4643. }
  4644. extern __inline __m512h
  4645. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4646. _mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A,
  4647. __m512h __B, __m512h __C)
  4648. {
  4649. return (__m512h)
  4650. __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
  4651. (__v32hf) __B,
  4652. (__v32hf) __C,
  4653. (__mmask32) __U,
  4654. _MM_FROUND_CUR_DIRECTION);
  4655. }
  4656. #ifdef __OPTIMIZE__
  4657. extern __inline __m512h
  4658. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4659. _mm512_fmsubadd_round_ph (__m512h __A, __m512h __B,
  4660. __m512h __C, const int __R)
  4661. {
  4662. return (__m512h)
  4663. __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
  4664. (__v32hf) __B,
  4665. (__v32hf) __C,
  4666. (__mmask32) -1, __R);
  4667. }
  4668. extern __inline __m512h
  4669. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4670. _mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
  4671. __m512h __C, const int __R)
  4672. {
  4673. return (__m512h)
  4674. __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
  4675. (__v32hf) __B,
  4676. (__v32hf) __C,
  4677. (__mmask32) __U, __R);
  4678. }
  4679. extern __inline __m512h
  4680. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4681. _mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
  4682. __mmask32 __U, const int __R)
  4683. {
  4684. return (__m512h)
  4685. __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
  4686. (__v32hf) __B,
  4687. (__v32hf) __C,
  4688. (__mmask32) __U, __R);
  4689. }
  4690. extern __inline __m512h
  4691. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4692. _mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
  4693. __m512h __C, const int __R)
  4694. {
  4695. return (__m512h)
  4696. __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
  4697. (__v32hf) __B,
  4698. (__v32hf) __C,
  4699. (__mmask32) __U, __R);
  4700. }
  4701. #else
  4702. #define _mm512_fmsubadd_round_ph(A, B, C, R) \
  4703. ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R)))
  4704. #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \
  4705. ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R)))
  4706. #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \
  4707. ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R)))
  4708. #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \
  4709. ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R)))
  4710. #endif /* __OPTIMIZE__ */
  4711. /* Intrinsics vfmadd[132,213,231]ph. */
  4712. extern __inline __m512h
  4713. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4714. _mm512_fmadd_ph (__m512h __A, __m512h __B, __m512h __C)
  4715. {
  4716. return (__m512h)
  4717. __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
  4718. (__v32hf) __B,
  4719. (__v32hf) __C,
  4720. (__mmask32) -1,
  4721. _MM_FROUND_CUR_DIRECTION);
  4722. }
  4723. extern __inline __m512h
  4724. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4725. _mm512_mask_fmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
  4726. {
  4727. return (__m512h)
  4728. __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
  4729. (__v32hf) __B,
  4730. (__v32hf) __C,
  4731. (__mmask32) __U,
  4732. _MM_FROUND_CUR_DIRECTION);
  4733. }
  4734. extern __inline __m512h
  4735. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4736. _mm512_mask3_fmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
  4737. {
  4738. return (__m512h)
  4739. __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
  4740. (__v32hf) __B,
  4741. (__v32hf) __C,
  4742. (__mmask32) __U,
  4743. _MM_FROUND_CUR_DIRECTION);
  4744. }
  4745. extern __inline __m512h
  4746. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4747. _mm512_maskz_fmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
  4748. {
  4749. return (__m512h)
  4750. __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
  4751. (__v32hf) __B,
  4752. (__v32hf) __C,
  4753. (__mmask32) __U,
  4754. _MM_FROUND_CUR_DIRECTION);
  4755. }
  4756. #ifdef __OPTIMIZE__
  4757. extern __inline __m512h
  4758. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4759. _mm512_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
  4760. {
  4761. return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
  4762. (__v32hf) __B,
  4763. (__v32hf) __C,
  4764. (__mmask32) -1, __R);
  4765. }
  4766. extern __inline __m512h
  4767. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4768. _mm512_mask_fmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
  4769. __m512h __C, const int __R)
  4770. {
  4771. return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
  4772. (__v32hf) __B,
  4773. (__v32hf) __C,
  4774. (__mmask32) __U, __R);
  4775. }
  4776. extern __inline __m512h
  4777. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4778. _mm512_mask3_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
  4779. __mmask32 __U, const int __R)
  4780. {
  4781. return (__m512h) __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
  4782. (__v32hf) __B,
  4783. (__v32hf) __C,
  4784. (__mmask32) __U, __R);
  4785. }
  4786. extern __inline __m512h
  4787. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4788. _mm512_maskz_fmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
  4789. __m512h __C, const int __R)
  4790. {
  4791. return (__m512h) __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
  4792. (__v32hf) __B,
  4793. (__v32hf) __C,
  4794. (__mmask32) __U, __R);
  4795. }
  4796. #else
  4797. #define _mm512_fmadd_round_ph(A, B, C, R) \
  4798. ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R)))
  4799. #define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \
  4800. ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R)))
  4801. #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \
  4802. ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R)))
  4803. #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \
  4804. ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R)))
  4805. #endif /* __OPTIMIZE__ */
  4806. /* Intrinsics vfnmadd[132,213,231]ph. */
  4807. extern __inline __m512h
  4808. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4809. _mm512_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C)
  4810. {
  4811. return (__m512h)
  4812. __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
  4813. (__v32hf) __B,
  4814. (__v32hf) __C,
  4815. (__mmask32) -1,
  4816. _MM_FROUND_CUR_DIRECTION);
  4817. }
  4818. extern __inline __m512h
  4819. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4820. _mm512_mask_fnmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
  4821. {
  4822. return (__m512h)
  4823. __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
  4824. (__v32hf) __B,
  4825. (__v32hf) __C,
  4826. (__mmask32) __U,
  4827. _MM_FROUND_CUR_DIRECTION);
  4828. }
  4829. extern __inline __m512h
  4830. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4831. _mm512_mask3_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
  4832. {
  4833. return (__m512h)
  4834. __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
  4835. (__v32hf) __B,
  4836. (__v32hf) __C,
  4837. (__mmask32) __U,
  4838. _MM_FROUND_CUR_DIRECTION);
  4839. }
  4840. extern __inline __m512h
  4841. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4842. _mm512_maskz_fnmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
  4843. {
  4844. return (__m512h)
  4845. __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
  4846. (__v32hf) __B,
  4847. (__v32hf) __C,
  4848. (__mmask32) __U,
  4849. _MM_FROUND_CUR_DIRECTION);
  4850. }
  4851. #ifdef __OPTIMIZE__
  4852. extern __inline __m512h
  4853. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4854. _mm512_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
  4855. {
  4856. return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
  4857. (__v32hf) __B,
  4858. (__v32hf) __C,
  4859. (__mmask32) -1, __R);
  4860. }
  4861. extern __inline __m512h
  4862. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4863. _mm512_mask_fnmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
  4864. __m512h __C, const int __R)
  4865. {
  4866. return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
  4867. (__v32hf) __B,
  4868. (__v32hf) __C,
  4869. (__mmask32) __U, __R);
  4870. }
  4871. extern __inline __m512h
  4872. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4873. _mm512_mask3_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
  4874. __mmask32 __U, const int __R)
  4875. {
  4876. return (__m512h) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
  4877. (__v32hf) __B,
  4878. (__v32hf) __C,
  4879. (__mmask32) __U, __R);
  4880. }
  4881. extern __inline __m512h
  4882. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4883. _mm512_maskz_fnmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
  4884. __m512h __C, const int __R)
  4885. {
  4886. return (__m512h) __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
  4887. (__v32hf) __B,
  4888. (__v32hf) __C,
  4889. (__mmask32) __U, __R);
  4890. }
  4891. #else
  4892. #define _mm512_fnmadd_round_ph(A, B, C, R) \
  4893. ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R)))
  4894. #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \
  4895. ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R)))
  4896. #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \
  4897. ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R)))
  4898. #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \
  4899. ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R)))
  4900. #endif /* __OPTIMIZE__ */
  4901. /* Intrinsics vfmsub[132,213,231]ph. */
  4902. extern __inline __m512h
  4903. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4904. _mm512_fmsub_ph (__m512h __A, __m512h __B, __m512h __C)
  4905. {
  4906. return (__m512h)
  4907. __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
  4908. (__v32hf) __B,
  4909. (__v32hf) __C,
  4910. (__mmask32) -1,
  4911. _MM_FROUND_CUR_DIRECTION);
  4912. }
  4913. extern __inline __m512h
  4914. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4915. _mm512_mask_fmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
  4916. {
  4917. return (__m512h)
  4918. __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
  4919. (__v32hf) __B,
  4920. (__v32hf) __C,
  4921. (__mmask32) __U,
  4922. _MM_FROUND_CUR_DIRECTION);
  4923. }
  4924. extern __inline __m512h
  4925. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4926. _mm512_mask3_fmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
  4927. {
  4928. return (__m512h)
  4929. __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
  4930. (__v32hf) __B,
  4931. (__v32hf) __C,
  4932. (__mmask32) __U,
  4933. _MM_FROUND_CUR_DIRECTION);
  4934. }
  4935. extern __inline __m512h
  4936. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4937. _mm512_maskz_fmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
  4938. {
  4939. return (__m512h)
  4940. __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
  4941. (__v32hf) __B,
  4942. (__v32hf) __C,
  4943. (__mmask32) __U,
  4944. _MM_FROUND_CUR_DIRECTION);
  4945. }
  4946. #ifdef __OPTIMIZE__
  4947. extern __inline __m512h
  4948. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4949. _mm512_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
  4950. {
  4951. return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
  4952. (__v32hf) __B,
  4953. (__v32hf) __C,
  4954. (__mmask32) -1, __R);
  4955. }
  4956. extern __inline __m512h
  4957. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4958. _mm512_mask_fmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
  4959. __m512h __C, const int __R)
  4960. {
  4961. return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
  4962. (__v32hf) __B,
  4963. (__v32hf) __C,
  4964. (__mmask32) __U, __R);
  4965. }
  4966. extern __inline __m512h
  4967. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4968. _mm512_mask3_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
  4969. __mmask32 __U, const int __R)
  4970. {
  4971. return (__m512h) __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
  4972. (__v32hf) __B,
  4973. (__v32hf) __C,
  4974. (__mmask32) __U, __R);
  4975. }
  4976. extern __inline __m512h
  4977. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4978. _mm512_maskz_fmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
  4979. __m512h __C, const int __R)
  4980. {
  4981. return (__m512h) __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
  4982. (__v32hf) __B,
  4983. (__v32hf) __C,
  4984. (__mmask32) __U, __R);
  4985. }
  4986. #else
  4987. #define _mm512_fmsub_round_ph(A, B, C, R) \
  4988. ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R)))
  4989. #define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \
  4990. ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R)))
  4991. #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \
  4992. ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R)))
  4993. #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \
  4994. ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R)))
  4995. #endif /* __OPTIMIZE__ */
  4996. /* Intrinsics vfnmsub[132,213,231]ph. */
  4997. extern __inline __m512h
  4998. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4999. _mm512_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C)
  5000. {
  5001. return (__m512h)
  5002. __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
  5003. (__v32hf) __B,
  5004. (__v32hf) __C,
  5005. (__mmask32) -1,
  5006. _MM_FROUND_CUR_DIRECTION);
  5007. }
  5008. extern __inline __m512h
  5009. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5010. _mm512_mask_fnmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
  5011. {
  5012. return (__m512h)
  5013. __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
  5014. (__v32hf) __B,
  5015. (__v32hf) __C,
  5016. (__mmask32) __U,
  5017. _MM_FROUND_CUR_DIRECTION);
  5018. }
  5019. extern __inline __m512h
  5020. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5021. _mm512_mask3_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
  5022. {
  5023. return (__m512h)
  5024. __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
  5025. (__v32hf) __B,
  5026. (__v32hf) __C,
  5027. (__mmask32) __U,
  5028. _MM_FROUND_CUR_DIRECTION);
  5029. }
  5030. extern __inline __m512h
  5031. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5032. _mm512_maskz_fnmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
  5033. {
  5034. return (__m512h)
  5035. __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
  5036. (__v32hf) __B,
  5037. (__v32hf) __C,
  5038. (__mmask32) __U,
  5039. _MM_FROUND_CUR_DIRECTION);
  5040. }
  5041. #ifdef __OPTIMIZE__
  5042. extern __inline __m512h
  5043. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5044. _mm512_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
  5045. {
  5046. return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
  5047. (__v32hf) __B,
  5048. (__v32hf) __C,
  5049. (__mmask32) -1, __R);
  5050. }
  5051. extern __inline __m512h
  5052. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5053. _mm512_mask_fnmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
  5054. __m512h __C, const int __R)
  5055. {
  5056. return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
  5057. (__v32hf) __B,
  5058. (__v32hf) __C,
  5059. (__mmask32) __U, __R);
  5060. }
  5061. extern __inline __m512h
  5062. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5063. _mm512_mask3_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
  5064. __mmask32 __U, const int __R)
  5065. {
  5066. return (__m512h) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
  5067. (__v32hf) __B,
  5068. (__v32hf) __C,
  5069. (__mmask32) __U, __R);
  5070. }
  5071. extern __inline __m512h
  5072. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5073. _mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
  5074. __m512h __C, const int __R)
  5075. {
  5076. return (__m512h) __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
  5077. (__v32hf) __B,
  5078. (__v32hf) __C,
  5079. (__mmask32) __U, __R);
  5080. }
  5081. #else
  5082. #define _mm512_fnmsub_round_ph(A, B, C, R) \
  5083. ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R)))
  5084. #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \
  5085. ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R)))
  5086. #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \
  5087. ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R)))
  5088. #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \
  5089. ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R)))
  5090. #endif /* __OPTIMIZE__ */
  5091. /* Intrinsics vfmadd[132,213,231]sh. */
  5092. extern __inline __m128h
  5093. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5094. _mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B)
  5095. {
  5096. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5097. (__v8hf) __A,
  5098. (__v8hf) __B,
  5099. (__mmask8) -1,
  5100. _MM_FROUND_CUR_DIRECTION);
  5101. }
  5102. extern __inline __m128h
  5103. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5104. _mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
  5105. {
  5106. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5107. (__v8hf) __A,
  5108. (__v8hf) __B,
  5109. (__mmask8) __U,
  5110. _MM_FROUND_CUR_DIRECTION);
  5111. }
  5112. extern __inline __m128h
  5113. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5114. _mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
  5115. {
  5116. return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
  5117. (__v8hf) __A,
  5118. (__v8hf) __B,
  5119. (__mmask8) __U,
  5120. _MM_FROUND_CUR_DIRECTION);
  5121. }
  5122. extern __inline __m128h
  5123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5124. _mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
  5125. {
  5126. return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
  5127. (__v8hf) __A,
  5128. (__v8hf) __B,
  5129. (__mmask8) __U,
  5130. _MM_FROUND_CUR_DIRECTION);
  5131. }
  5132. #ifdef __OPTIMIZE__
  5133. extern __inline __m128h
  5134. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5135. _mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
  5136. {
  5137. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5138. (__v8hf) __A,
  5139. (__v8hf) __B,
  5140. (__mmask8) -1,
  5141. __R);
  5142. }
  5143. extern __inline __m128h
  5144. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5145. _mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
  5146. const int __R)
  5147. {
  5148. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5149. (__v8hf) __A,
  5150. (__v8hf) __B,
  5151. (__mmask8) __U, __R);
  5152. }
  5153. extern __inline __m128h
  5154. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5155. _mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
  5156. const int __R)
  5157. {
  5158. return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
  5159. (__v8hf) __A,
  5160. (__v8hf) __B,
  5161. (__mmask8) __U, __R);
  5162. }
  5163. extern __inline __m128h
  5164. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5165. _mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
  5166. __m128h __B, const int __R)
  5167. {
  5168. return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
  5169. (__v8hf) __A,
  5170. (__v8hf) __B,
  5171. (__mmask8) __U, __R);
  5172. }
  5173. #else
  5174. #define _mm_fmadd_round_sh(A, B, C, R) \
  5175. ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
  5176. #define _mm_mask_fmadd_round_sh(A, U, B, C, R) \
  5177. ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
  5178. #define _mm_mask3_fmadd_round_sh(A, B, C, U, R) \
  5179. ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
  5180. #define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
  5181. ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
  5182. #endif /* __OPTIMIZE__ */
  5183. /* Intrinsics vfnmadd[132,213,231]sh. */
  5184. extern __inline __m128h
  5185. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5186. _mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B)
  5187. {
  5188. return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
  5189. (__v8hf) __A,
  5190. (__v8hf) __B,
  5191. (__mmask8) -1,
  5192. _MM_FROUND_CUR_DIRECTION);
  5193. }
  5194. extern __inline __m128h
  5195. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5196. _mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
  5197. {
  5198. return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
  5199. (__v8hf) __A,
  5200. (__v8hf) __B,
  5201. (__mmask8) __U,
  5202. _MM_FROUND_CUR_DIRECTION);
  5203. }
  5204. extern __inline __m128h
  5205. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5206. _mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
  5207. {
  5208. return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
  5209. (__v8hf) __A,
  5210. (__v8hf) __B,
  5211. (__mmask8) __U,
  5212. _MM_FROUND_CUR_DIRECTION);
  5213. }
  5214. extern __inline __m128h
  5215. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5216. _mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
  5217. {
  5218. return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
  5219. (__v8hf) __A,
  5220. (__v8hf) __B,
  5221. (__mmask8) __U,
  5222. _MM_FROUND_CUR_DIRECTION);
  5223. }
  5224. #ifdef __OPTIMIZE__
  5225. extern __inline __m128h
  5226. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5227. _mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
  5228. {
  5229. return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
  5230. (__v8hf) __A,
  5231. (__v8hf) __B,
  5232. (__mmask8) -1,
  5233. __R);
  5234. }
  5235. extern __inline __m128h
  5236. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5237. _mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
  5238. const int __R)
  5239. {
  5240. return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
  5241. (__v8hf) __A,
  5242. (__v8hf) __B,
  5243. (__mmask8) __U, __R);
  5244. }
  5245. extern __inline __m128h
  5246. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5247. _mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
  5248. const int __R)
  5249. {
  5250. return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
  5251. (__v8hf) __A,
  5252. (__v8hf) __B,
  5253. (__mmask8) __U, __R);
  5254. }
  5255. extern __inline __m128h
  5256. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5257. _mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
  5258. __m128h __B, const int __R)
  5259. {
  5260. return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
  5261. (__v8hf) __A,
  5262. (__v8hf) __B,
  5263. (__mmask8) __U, __R);
  5264. }
  5265. #else
  5266. #define _mm_fnmadd_round_sh(A, B, C, R) \
  5267. ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
  5268. #define _mm_mask_fnmadd_round_sh(A, U, B, C, R) \
  5269. ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
  5270. #define _mm_mask3_fnmadd_round_sh(A, B, C, U, R) \
  5271. ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
  5272. #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
  5273. ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
  5274. #endif /* __OPTIMIZE__ */
  5275. /* Intrinsics vfmsub[132,213,231]sh. */
  5276. extern __inline __m128h
  5277. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5278. _mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B)
  5279. {
  5280. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5281. (__v8hf) __A,
  5282. -(__v8hf) __B,
  5283. (__mmask8) -1,
  5284. _MM_FROUND_CUR_DIRECTION);
  5285. }
  5286. extern __inline __m128h
  5287. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5288. _mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
  5289. {
  5290. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5291. (__v8hf) __A,
  5292. -(__v8hf) __B,
  5293. (__mmask8) __U,
  5294. _MM_FROUND_CUR_DIRECTION);
  5295. }
  5296. extern __inline __m128h
  5297. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5298. _mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
  5299. {
  5300. return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
  5301. (__v8hf) __A,
  5302. (__v8hf) __B,
  5303. (__mmask8) __U,
  5304. _MM_FROUND_CUR_DIRECTION);
  5305. }
  5306. extern __inline __m128h
  5307. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5308. _mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
  5309. {
  5310. return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
  5311. (__v8hf) __A,
  5312. -(__v8hf) __B,
  5313. (__mmask8) __U,
  5314. _MM_FROUND_CUR_DIRECTION);
  5315. }
  5316. #ifdef __OPTIMIZE__
  5317. extern __inline __m128h
  5318. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5319. _mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
  5320. {
  5321. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5322. (__v8hf) __A,
  5323. -(__v8hf) __B,
  5324. (__mmask8) -1,
  5325. __R);
  5326. }
  5327. extern __inline __m128h
  5328. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5329. _mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
  5330. const int __R)
  5331. {
  5332. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5333. (__v8hf) __A,
  5334. -(__v8hf) __B,
  5335. (__mmask8) __U, __R);
  5336. }
  5337. extern __inline __m128h
  5338. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5339. _mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
  5340. const int __R)
  5341. {
  5342. return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
  5343. (__v8hf) __A,
  5344. (__v8hf) __B,
  5345. (__mmask8) __U, __R);
  5346. }
  5347. extern __inline __m128h
  5348. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5349. _mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
  5350. __m128h __B, const int __R)
  5351. {
  5352. return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
  5353. (__v8hf) __A,
  5354. -(__v8hf) __B,
  5355. (__mmask8) __U, __R);
  5356. }
  5357. #else
  5358. #define _mm_fmsub_round_sh(A, B, C, R) \
  5359. ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
  5360. #define _mm_mask_fmsub_round_sh(A, U, B, C, R) \
  5361. ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
  5362. #define _mm_mask3_fmsub_round_sh(A, B, C, U, R) \
  5363. ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
  5364. #define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
  5365. ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
  5366. #endif /* __OPTIMIZE__ */
  5367. /* Intrinsics vfnmsub[132,213,231]sh. */
  5368. extern __inline __m128h
  5369. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5370. _mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B)
  5371. {
  5372. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5373. -(__v8hf) __A,
  5374. -(__v8hf) __B,
  5375. (__mmask8) -1,
  5376. _MM_FROUND_CUR_DIRECTION);
  5377. }
  5378. extern __inline __m128h
  5379. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5380. _mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
  5381. {
  5382. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5383. -(__v8hf) __A,
  5384. -(__v8hf) __B,
  5385. (__mmask8) __U,
  5386. _MM_FROUND_CUR_DIRECTION);
  5387. }
  5388. extern __inline __m128h
  5389. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5390. _mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
  5391. {
  5392. return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
  5393. -(__v8hf) __A,
  5394. (__v8hf) __B,
  5395. (__mmask8) __U,
  5396. _MM_FROUND_CUR_DIRECTION);
  5397. }
  5398. extern __inline __m128h
  5399. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5400. _mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
  5401. {
  5402. return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
  5403. -(__v8hf) __A,
  5404. -(__v8hf) __B,
  5405. (__mmask8) __U,
  5406. _MM_FROUND_CUR_DIRECTION);
  5407. }
  5408. #ifdef __OPTIMIZE__
  5409. extern __inline __m128h
  5410. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5411. _mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
  5412. {
  5413. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5414. -(__v8hf) __A,
  5415. -(__v8hf) __B,
  5416. (__mmask8) -1,
  5417. __R);
  5418. }
  5419. extern __inline __m128h
  5420. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5421. _mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
  5422. const int __R)
  5423. {
  5424. return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
  5425. -(__v8hf) __A,
  5426. -(__v8hf) __B,
  5427. (__mmask8) __U, __R);
  5428. }
  5429. extern __inline __m128h
  5430. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5431. _mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
  5432. const int __R)
  5433. {
  5434. return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
  5435. -(__v8hf) __A,
  5436. (__v8hf) __B,
  5437. (__mmask8) __U, __R);
  5438. }
  5439. extern __inline __m128h
  5440. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5441. _mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
  5442. __m128h __B, const int __R)
  5443. {
  5444. return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
  5445. -(__v8hf) __A,
  5446. -(__v8hf) __B,
  5447. (__mmask8) __U, __R);
  5448. }
  5449. #else
  5450. #define _mm_fnmsub_round_sh(A, B, C, R) \
  5451. ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
  5452. #define _mm_mask_fnmsub_round_sh(A, U, B, C, R) \
  5453. ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
  5454. #define _mm_mask3_fnmsub_round_sh(A, B, C, U, R) \
  5455. ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
  5456. #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
  5457. ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
  5458. #endif /* __OPTIMIZE__ */
  5459. /* Intrinsics vf[,c]maddcph. */
  5460. extern __inline __m512h
  5461. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5462. _mm512_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C)
  5463. {
  5464. return (__m512h)
  5465. __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
  5466. (__v32hf) __B,
  5467. (__v32hf) __C,
  5468. _MM_FROUND_CUR_DIRECTION);
  5469. }
  5470. extern __inline __m512h
  5471. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5472. _mm512_mask_fcmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
  5473. {
  5474. return (__m512h)
  5475. __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
  5476. (__v32hf) __C,
  5477. (__v32hf) __D, __B,
  5478. _MM_FROUND_CUR_DIRECTION);
  5479. }
  5480. extern __inline __m512h
  5481. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5482. _mm512_mask3_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
  5483. {
  5484. return (__m512h)
  5485. __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
  5486. (__v32hf) __B,
  5487. (__v32hf) __C,
  5488. __D, _MM_FROUND_CUR_DIRECTION);
  5489. }
  5490. extern __inline __m512h
  5491. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5492. _mm512_maskz_fcmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
  5493. {
  5494. return (__m512h)
  5495. __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
  5496. (__v32hf) __C,
  5497. (__v32hf) __D,
  5498. __A, _MM_FROUND_CUR_DIRECTION);
  5499. }
  5500. extern __inline __m512h
  5501. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5502. _mm512_fmadd_pch (__m512h __A, __m512h __B, __m512h __C)
  5503. {
  5504. return (__m512h)
  5505. __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
  5506. (__v32hf) __B,
  5507. (__v32hf) __C,
  5508. _MM_FROUND_CUR_DIRECTION);
  5509. }
  5510. extern __inline __m512h
  5511. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5512. _mm512_mask_fmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
  5513. {
  5514. return (__m512h)
  5515. __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
  5516. (__v32hf) __C,
  5517. (__v32hf) __D, __B,
  5518. _MM_FROUND_CUR_DIRECTION);
  5519. }
  5520. extern __inline __m512h
  5521. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5522. _mm512_mask3_fmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
  5523. {
  5524. return (__m512h)
  5525. __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
  5526. (__v32hf) __B,
  5527. (__v32hf) __C,
  5528. __D, _MM_FROUND_CUR_DIRECTION);
  5529. }
  5530. extern __inline __m512h
  5531. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5532. _mm512_maskz_fmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
  5533. {
  5534. return (__m512h)
  5535. __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
  5536. (__v32hf) __C,
  5537. (__v32hf) __D,
  5538. __A, _MM_FROUND_CUR_DIRECTION);
  5539. }
  5540. #ifdef __OPTIMIZE__
  5541. extern __inline __m512h
  5542. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5543. _mm512_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
  5544. {
  5545. return (__m512h)
  5546. __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
  5547. (__v32hf) __B,
  5548. (__v32hf) __C,
  5549. __D);
  5550. }
  5551. extern __inline __m512h
  5552. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5553. _mm512_mask_fcmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
  5554. __m512h __D, const int __E)
  5555. {
  5556. return (__m512h)
  5557. __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
  5558. (__v32hf) __C,
  5559. (__v32hf) __D, __B,
  5560. __E);
  5561. }
  5562. extern __inline __m512h
  5563. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5564. _mm512_mask3_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
  5565. __mmask16 __D, const int __E)
  5566. {
  5567. return (__m512h)
  5568. __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
  5569. (__v32hf) __B,
  5570. (__v32hf) __C,
  5571. __D, __E);
  5572. }
  5573. extern __inline __m512h
  5574. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5575. _mm512_maskz_fcmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
  5576. __m512h __D, const int __E)
  5577. {
  5578. return (__m512h)
  5579. __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
  5580. (__v32hf) __C,
  5581. (__v32hf) __D,
  5582. __A, __E);
  5583. }
  5584. extern __inline __m512h
  5585. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5586. _mm512_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
  5587. {
  5588. return (__m512h)
  5589. __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
  5590. (__v32hf) __B,
  5591. (__v32hf) __C,
  5592. __D);
  5593. }
  5594. extern __inline __m512h
  5595. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5596. _mm512_mask_fmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
  5597. __m512h __D, const int __E)
  5598. {
  5599. return (__m512h)
  5600. __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
  5601. (__v32hf) __C,
  5602. (__v32hf) __D, __B,
  5603. __E);
  5604. }
  5605. extern __inline __m512h
  5606. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5607. _mm512_mask3_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
  5608. __mmask16 __D, const int __E)
  5609. {
  5610. return (__m512h)
  5611. __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
  5612. (__v32hf) __B,
  5613. (__v32hf) __C,
  5614. __D, __E);
  5615. }
  5616. extern __inline __m512h
  5617. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5618. _mm512_maskz_fmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
  5619. __m512h __D, const int __E)
  5620. {
  5621. return (__m512h)
  5622. __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
  5623. (__v32hf) __C,
  5624. (__v32hf) __D,
  5625. __A, __E);
  5626. }
  5627. #else
  5628. #define _mm512_fcmadd_round_pch(A, B, C, D) \
  5629. (__m512h) __builtin_ia32_vfcmaddcph512_round ((A), (B), (C), (D))
  5630. #define _mm512_mask_fcmadd_round_pch(A, B, C, D, E) \
  5631. ((__m512h) \
  5632. __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) (A), \
  5633. (__v32hf) (C), \
  5634. (__v32hf) (D), \
  5635. (B), (E)))
  5636. #define _mm512_mask3_fcmadd_round_pch(A, B, C, D, E) \
  5637. ((__m512h) \
  5638. __builtin_ia32_vfcmaddcph512_mask3_round ((A), (B), (C), (D), (E)))
  5639. #define _mm512_maskz_fcmadd_round_pch(A, B, C, D, E) \
  5640. (__m512h) \
  5641. __builtin_ia32_vfcmaddcph512_maskz_round ((B), (C), (D), (A), (E))
  5642. #define _mm512_fmadd_round_pch(A, B, C, D) \
  5643. (__m512h) __builtin_ia32_vfmaddcph512_round ((A), (B), (C), (D))
  5644. #define _mm512_mask_fmadd_round_pch(A, B, C, D, E) \
  5645. ((__m512h) \
  5646. __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) (A), \
  5647. (__v32hf) (C), \
  5648. (__v32hf) (D), \
  5649. (B), (E)))
  5650. #define _mm512_mask3_fmadd_round_pch(A, B, C, D, E) \
  5651. (__m512h) \
  5652. __builtin_ia32_vfmaddcph512_mask3_round ((A), (B), (C), (D), (E))
  5653. #define _mm512_maskz_fmadd_round_pch(A, B, C, D, E) \
  5654. (__m512h) \
  5655. __builtin_ia32_vfmaddcph512_maskz_round ((B), (C), (D), (A), (E))
  5656. #endif /* __OPTIMIZE__ */
  5657. /* Intrinsics vf[,c]mulcph. */
  5658. extern __inline __m512h
  5659. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5660. _mm512_fcmul_pch (__m512h __A, __m512h __B)
  5661. {
  5662. return (__m512h)
  5663. __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
  5664. (__v32hf) __B,
  5665. _MM_FROUND_CUR_DIRECTION);
  5666. }
  5667. extern __inline __m512h
  5668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5669. _mm512_mask_fcmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
  5670. {
  5671. return (__m512h)
  5672. __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
  5673. (__v32hf) __D,
  5674. (__v32hf) __A,
  5675. __B, _MM_FROUND_CUR_DIRECTION);
  5676. }
  5677. extern __inline __m512h
  5678. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5679. _mm512_maskz_fcmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
  5680. {
  5681. return (__m512h)
  5682. __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
  5683. (__v32hf) __C,
  5684. _mm512_setzero_ph (),
  5685. __A, _MM_FROUND_CUR_DIRECTION);
  5686. }
  5687. extern __inline __m512h
  5688. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5689. _mm512_fmul_pch (__m512h __A, __m512h __B)
  5690. {
  5691. return (__m512h)
  5692. __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
  5693. (__v32hf) __B,
  5694. _MM_FROUND_CUR_DIRECTION);
  5695. }
  5696. extern __inline __m512h
  5697. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5698. _mm512_mask_fmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
  5699. {
  5700. return (__m512h)
  5701. __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
  5702. (__v32hf) __D,
  5703. (__v32hf) __A,
  5704. __B, _MM_FROUND_CUR_DIRECTION);
  5705. }
  5706. extern __inline __m512h
  5707. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5708. _mm512_maskz_fmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
  5709. {
  5710. return (__m512h)
  5711. __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
  5712. (__v32hf) __C,
  5713. _mm512_setzero_ph (),
  5714. __A, _MM_FROUND_CUR_DIRECTION);
  5715. }
  5716. #ifdef __OPTIMIZE__
  5717. extern __inline __m512h
  5718. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5719. _mm512_fcmul_round_pch (__m512h __A, __m512h __B, const int __D)
  5720. {
  5721. return (__m512h)
  5722. __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
  5723. (__v32hf) __B, __D);
  5724. }
  5725. extern __inline __m512h
  5726. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5727. _mm512_mask_fcmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
  5728. __m512h __D, const int __E)
  5729. {
  5730. return (__m512h)
  5731. __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
  5732. (__v32hf) __D,
  5733. (__v32hf) __A,
  5734. __B, __E);
  5735. }
  5736. extern __inline __m512h
  5737. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5738. _mm512_maskz_fcmul_round_pch (__mmask16 __A, __m512h __B,
  5739. __m512h __C, const int __E)
  5740. {
  5741. return (__m512h)
  5742. __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
  5743. (__v32hf) __C,
  5744. _mm512_setzero_ph (),
  5745. __A, __E);
  5746. }
  5747. extern __inline __m512h
  5748. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5749. _mm512_fmul_round_pch (__m512h __A, __m512h __B, const int __D)
  5750. {
  5751. return (__m512h)
  5752. __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
  5753. (__v32hf) __B,
  5754. __D);
  5755. }
  5756. extern __inline __m512h
  5757. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5758. _mm512_mask_fmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
  5759. __m512h __D, const int __E)
  5760. {
  5761. return (__m512h)
  5762. __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
  5763. (__v32hf) __D,
  5764. (__v32hf) __A,
  5765. __B, __E);
  5766. }
  5767. extern __inline __m512h
  5768. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5769. _mm512_maskz_fmul_round_pch (__mmask16 __A, __m512h __B,
  5770. __m512h __C, const int __E)
  5771. {
  5772. return (__m512h)
  5773. __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
  5774. (__v32hf) __C,
  5775. _mm512_setzero_ph (),
  5776. __A, __E);
  5777. }
  5778. #else
  5779. #define _mm512_fcmul_round_pch(A, B, D) \
  5780. (__m512h) __builtin_ia32_vfcmulcph512_round ((A), (B), (D))
  5781. #define _mm512_mask_fcmul_round_pch(A, B, C, D, E) \
  5782. (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((C), (D), (A), (B), (E))
  5783. #define _mm512_maskz_fcmul_round_pch(A, B, C, E) \
  5784. (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((B), (C), \
  5785. (__v32hf) \
  5786. _mm512_setzero_ph (), \
  5787. (A), (E))
  5788. #define _mm512_fmul_round_pch(A, B, D) \
  5789. (__m512h) __builtin_ia32_vfmulcph512_round ((A), (B), (D))
  5790. #define _mm512_mask_fmul_round_pch(A, B, C, D, E) \
  5791. (__m512h) __builtin_ia32_vfmulcph512_mask_round ((C), (D), (A), (B), (E))
  5792. #define _mm512_maskz_fmul_round_pch(A, B, C, E) \
  5793. (__m512h) __builtin_ia32_vfmulcph512_mask_round ((B), (C), \
  5794. (__v32hf) \
  5795. _mm512_setzero_ph (), \
  5796. (A), (E))
  5797. #endif /* __OPTIMIZE__ */
  5798. /* Intrinsics vf[,c]maddcsh. */
  5799. extern __inline __m128h
  5800. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5801. _mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  5802. {
  5803. return (__m128h)
  5804. __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
  5805. (__v8hf) __C,
  5806. (__v8hf) __D, __B,
  5807. _MM_FROUND_CUR_DIRECTION);
  5808. }
  5809. extern __inline __m128h
  5810. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5811. _mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
  5812. {
  5813. return (__m128h)
  5814. __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
  5815. (__v8hf) __B,
  5816. (__v8hf) __C, __D,
  5817. _MM_FROUND_CUR_DIRECTION);
  5818. }
  5819. extern __inline __m128h
  5820. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5821. _mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
  5822. {
  5823. return (__m128h)
  5824. __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
  5825. (__v8hf) __C,
  5826. (__v8hf) __D,
  5827. __A, _MM_FROUND_CUR_DIRECTION);
  5828. }
  5829. extern __inline __m128h
  5830. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5831. _mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C)
  5832. {
  5833. return (__m128h)
  5834. __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
  5835. (__v8hf) __B,
  5836. (__v8hf) __C,
  5837. _MM_FROUND_CUR_DIRECTION);
  5838. }
  5839. extern __inline __m128h
  5840. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5841. _mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  5842. {
  5843. return (__m128h)
  5844. __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
  5845. (__v8hf) __C,
  5846. (__v8hf) __D, __B,
  5847. _MM_FROUND_CUR_DIRECTION);
  5848. }
  5849. extern __inline __m128h
  5850. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5851. _mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
  5852. {
  5853. return (__m128h)
  5854. __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
  5855. (__v8hf) __B,
  5856. (__v8hf) __C, __D,
  5857. _MM_FROUND_CUR_DIRECTION);
  5858. }
  5859. extern __inline __m128h
  5860. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5861. _mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
  5862. {
  5863. return (__m128h)
  5864. __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
  5865. (__v8hf) __C,
  5866. (__v8hf) __D,
  5867. __A, _MM_FROUND_CUR_DIRECTION);
  5868. }
  5869. extern __inline __m128h
  5870. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5871. _mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C)
  5872. {
  5873. return (__m128h)
  5874. __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
  5875. (__v8hf) __B,
  5876. (__v8hf) __C,
  5877. _MM_FROUND_CUR_DIRECTION);
  5878. }
  5879. #ifdef __OPTIMIZE__
  5880. extern __inline __m128h
  5881. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5882. _mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
  5883. __m128h __D, const int __E)
  5884. {
  5885. return (__m128h)
  5886. __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
  5887. (__v8hf) __C,
  5888. (__v8hf) __D,
  5889. __B, __E);
  5890. }
  5891. extern __inline __m128h
  5892. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5893. _mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
  5894. __mmask8 __D, const int __E)
  5895. {
  5896. return (__m128h)
  5897. __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
  5898. (__v8hf) __B,
  5899. (__v8hf) __C,
  5900. __D, __E);
  5901. }
  5902. extern __inline __m128h
  5903. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5904. _mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
  5905. __m128h __D, const int __E)
  5906. {
  5907. return (__m128h)
  5908. __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
  5909. (__v8hf) __C,
  5910. (__v8hf) __D,
  5911. __A, __E);
  5912. }
  5913. extern __inline __m128h
  5914. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5915. _mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
  5916. {
  5917. return (__m128h)
  5918. __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
  5919. (__v8hf) __B,
  5920. (__v8hf) __C,
  5921. __D);
  5922. }
  5923. extern __inline __m128h
  5924. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5925. _mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
  5926. __m128h __D, const int __E)
  5927. {
  5928. return (__m128h)
  5929. __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
  5930. (__v8hf) __C,
  5931. (__v8hf) __D,
  5932. __B, __E);
  5933. }
  5934. extern __inline __m128h
  5935. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5936. _mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
  5937. __mmask8 __D, const int __E)
  5938. {
  5939. return (__m128h)
  5940. __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
  5941. (__v8hf) __B,
  5942. (__v8hf) __C,
  5943. __D, __E);
  5944. }
  5945. extern __inline __m128h
  5946. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5947. _mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
  5948. __m128h __D, const int __E)
  5949. {
  5950. return (__m128h)
  5951. __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
  5952. (__v8hf) __C,
  5953. (__v8hf) __D,
  5954. __A, __E);
  5955. }
  5956. extern __inline __m128h
  5957. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5958. _mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
  5959. {
  5960. return (__m128h)
  5961. __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
  5962. (__v8hf) __B,
  5963. (__v8hf) __C,
  5964. __D);
  5965. }
  5966. #else
  5967. #define _mm_mask_fcmadd_round_sch(A, B, C, D, E) \
  5968. ((__m128h) \
  5969. __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A), \
  5970. (__v8hf) (C), \
  5971. (__v8hf) (D), \
  5972. (B), (E)))
  5973. #define _mm_mask3_fcmadd_round_sch(A, B, C, D, E) \
  5974. ((__m128h) \
  5975. __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A), \
  5976. (__v8hf) (B), \
  5977. (__v8hf) (C), \
  5978. (D), (E)))
  5979. #define _mm_maskz_fcmadd_round_sch(A, B, C, D, E) \
  5980. __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E))
  5981. #define _mm_fcmadd_round_sch(A, B, C, D) \
  5982. __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D))
  5983. #define _mm_mask_fmadd_round_sch(A, B, C, D, E) \
  5984. ((__m128h) \
  5985. __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A), \
  5986. (__v8hf) (C), \
  5987. (__v8hf) (D), \
  5988. (B), (E)))
  5989. #define _mm_mask3_fmadd_round_sch(A, B, C, D, E) \
  5990. ((__m128h) \
  5991. __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A), \
  5992. (__v8hf) (B), \
  5993. (__v8hf) (C), \
  5994. (D), (E)))
  5995. #define _mm_maskz_fmadd_round_sch(A, B, C, D, E) \
  5996. __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E))
  5997. #define _mm_fmadd_round_sch(A, B, C, D) \
  5998. __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D))
  5999. #endif /* __OPTIMIZE__ */
  6000. /* Intrinsics vf[,c]mulcsh. */
  6001. extern __inline __m128h
  6002. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6003. _mm_fcmul_sch (__m128h __A, __m128h __B)
  6004. {
  6005. return (__m128h)
  6006. __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
  6007. (__v8hf) __B,
  6008. _MM_FROUND_CUR_DIRECTION);
  6009. }
  6010. extern __inline __m128h
  6011. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6012. _mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  6013. {
  6014. return (__m128h)
  6015. __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
  6016. (__v8hf) __D,
  6017. (__v8hf) __A,
  6018. __B, _MM_FROUND_CUR_DIRECTION);
  6019. }
  6020. extern __inline __m128h
  6021. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6022. _mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
  6023. {
  6024. return (__m128h)
  6025. __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
  6026. (__v8hf) __C,
  6027. _mm_setzero_ph (),
  6028. __A, _MM_FROUND_CUR_DIRECTION);
  6029. }
  6030. extern __inline __m128h
  6031. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6032. _mm_fmul_sch (__m128h __A, __m128h __B)
  6033. {
  6034. return (__m128h)
  6035. __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
  6036. (__v8hf) __B,
  6037. _MM_FROUND_CUR_DIRECTION);
  6038. }
  6039. extern __inline __m128h
  6040. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6041. _mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
  6042. {
  6043. return (__m128h)
  6044. __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
  6045. (__v8hf) __D,
  6046. (__v8hf) __A,
  6047. __B, _MM_FROUND_CUR_DIRECTION);
  6048. }
  6049. extern __inline __m128h
  6050. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6051. _mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
  6052. {
  6053. return (__m128h)
  6054. __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
  6055. (__v8hf) __C,
  6056. _mm_setzero_ph (),
  6057. __A, _MM_FROUND_CUR_DIRECTION);
  6058. }
  6059. #ifdef __OPTIMIZE__
  6060. extern __inline __m128h
  6061. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6062. _mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D)
  6063. {
  6064. return (__m128h)
  6065. __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
  6066. (__v8hf) __B,
  6067. __D);
  6068. }
  6069. extern __inline __m128h
  6070. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6071. _mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
  6072. __m128h __D, const int __E)
  6073. {
  6074. return (__m128h)
  6075. __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
  6076. (__v8hf) __D,
  6077. (__v8hf) __A,
  6078. __B, __E);
  6079. }
  6080. extern __inline __m128h
  6081. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6082. _mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
  6083. const int __E)
  6084. {
  6085. return (__m128h)
  6086. __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
  6087. (__v8hf) __C,
  6088. _mm_setzero_ph (),
  6089. __A, __E);
  6090. }
  6091. extern __inline __m128h
  6092. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6093. _mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D)
  6094. {
  6095. return (__m128h)
  6096. __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
  6097. (__v8hf) __B, __D);
  6098. }
  6099. extern __inline __m128h
  6100. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6101. _mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
  6102. __m128h __D, const int __E)
  6103. {
  6104. return (__m128h)
  6105. __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
  6106. (__v8hf) __D,
  6107. (__v8hf) __A,
  6108. __B, __E);
  6109. }
  6110. extern __inline __m128h
  6111. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6112. _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
  6113. {
  6114. return (__m128h)
  6115. __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
  6116. (__v8hf) __C,
  6117. _mm_setzero_ph (),
  6118. __A, __E);
  6119. }
  6120. #else
  6121. #define _mm_fcmul_round_sch(__A, __B, __D) \
  6122. (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, \
  6123. (__v8hf) __B, __D)
  6124. #define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E) \
  6125. (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, \
  6126. (__v8hf) __D, \
  6127. (__v8hf) __A, \
  6128. __B, __E)
  6129. #define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E) \
  6130. (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, \
  6131. (__v8hf) __C, \
  6132. _mm_setzero_ph (), \
  6133. __A, __E)
  6134. #define _mm_fmul_round_sch(__A, __B, __D) \
  6135. (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A, \
  6136. (__v8hf) __B, __D)
  6137. #define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E) \
  6138. (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, \
  6139. (__v8hf) __D, \
  6140. (__v8hf) __A, \
  6141. __B, __E)
  6142. #define _mm_maskz_fmul_round_sch(__A, __B, __C, __E) \
  6143. (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, \
  6144. (__v8hf) __C, \
  6145. _mm_setzero_ph (), \
  6146. __A, __E)
  6147. #endif /* __OPTIMIZE__ */
  6148. #define _MM512_REDUCE_OP(op) \
  6149. __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
  6150. __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
  6151. __m256h __T3 = (__T1 op __T2); \
  6152. __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
  6153. __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
  6154. __m128h __T6 = (__T4 op __T5); \
  6155. __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
  6156. (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \
  6157. __m128h __T8 = (__T6 op __T7); \
  6158. __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
  6159. (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); \
  6160. __m128h __T10 = __T8 op __T9; \
  6161. return __T10[0] op __T10[1]
  6162. // TODO reduce
  6163. extern __inline _Float16
  6164. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6165. _mm512_reduce_add_ph (__m512h __A)
  6166. {
  6167. _MM512_REDUCE_OP (+);
  6168. }
  6169. extern __inline _Float16
  6170. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6171. _mm512_reduce_mul_ph (__m512h __A)
  6172. {
  6173. _MM512_REDUCE_OP (*);
  6174. }
  6175. #undef _MM512_REDUCE_OP
  6176. #ifdef __AVX512VL__
  6177. #define _MM512_REDUCE_OP(op) \
  6178. __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
  6179. __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
  6180. __m256h __T3 = __builtin_ia32_##op##ph256_mask (__T1, __T2, \
  6181. _mm256_setzero_ph (), (__mmask16) -1); \
  6182. __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
  6183. __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
  6184. __m128h __T6 = __builtin_ia32_##op##ph128_mask \
  6185. (__T4, __T5, _mm_setzero_ph (),(__mmask8) -1); \
  6186. __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
  6187. (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \
  6188. __m128h __T8 = (__m128h) __builtin_ia32_##op##ph128_mask \
  6189. (__T6, __T7, _mm_setzero_ph (),(__mmask8) -1); \
  6190. __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
  6191. (__v8hi) { 4, 5 }); \
  6192. __m128h __T10 = __builtin_ia32_##op##ph128_mask \
  6193. (__T8, __T9, _mm_setzero_ph (),(__mmask8) -1); \
  6194. __m128h __T11 = (__m128h) __builtin_shuffle (__T10, \
  6195. (__v8hi) { 1, 0 }); \
  6196. __m128h __T12 = __builtin_ia32_##op##ph128_mask \
  6197. (__T10, __T11, _mm_setzero_ph (),(__mmask8) -1); \
  6198. return __T12[0]
  6199. #else
  6200. #define _MM512_REDUCE_OP(op) \
  6201. __m512h __T1 = (__m512h) __builtin_shuffle ((__m512d) __A, \
  6202. (__v8di) { 4, 5, 6, 7, 0, 0, 0, 0 }); \
  6203. __m512h __T2 = _mm512_##op##_ph (__A, __T1); \
  6204. __m512h __T3 = (__m512h) __builtin_shuffle ((__m512d) __T2, \
  6205. (__v8di) { 2, 3, 0, 0, 0, 0, 0, 0 }); \
  6206. __m512h __T4 = _mm512_##op##_ph (__T2, __T3); \
  6207. __m512h __T5 = (__m512h) __builtin_shuffle ((__m512d) __T4, \
  6208. (__v8di) { 1, 0, 0, 0, 0, 0, 0, 0 }); \
  6209. __m512h __T6 = _mm512_##op##_ph (__T4, __T5); \
  6210. __m512h __T7 = (__m512h) __builtin_shuffle ((__m512) __T6, \
  6211. (__v16si) { 1, 0, 0, 0, 0, 0, 0, 0, \
  6212. 0, 0, 0, 0, 0, 0, 0, 0 }); \
  6213. __m512h __T8 = _mm512_##op##_ph (__T6, __T7); \
  6214. __m512h __T9 = (__m512h) __builtin_shuffle (__T8, \
  6215. (__v32hi) { 1, 0, 0, 0, 0, 0, 0, 0, \
  6216. 0, 0, 0, 0, 0, 0, 0, 0, \
  6217. 0, 0, 0, 0, 0, 0, 0, 0, \
  6218. 0, 0, 0, 0, 0, 0, 0, 0 }); \
  6219. __m512h __T10 = _mm512_##op##_ph (__T8, __T9); \
  6220. return __T10[0]
  6221. #endif
  6222. extern __inline _Float16
  6223. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6224. _mm512_reduce_min_ph (__m512h __A)
  6225. {
  6226. _MM512_REDUCE_OP (min);
  6227. }
  6228. extern __inline _Float16
  6229. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6230. _mm512_reduce_max_ph (__m512h __A)
  6231. {
  6232. _MM512_REDUCE_OP (max);
  6233. }
  6234. #undef _MM512_REDUCE_OP
  6235. extern __inline __m512h
  6236. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6237. _mm512_mask_blend_ph (__mmask32 __U, __m512h __A, __m512h __W)
  6238. {
  6239. return (__m512h) __builtin_ia32_movdquhi512_mask ((__v32hi) __W,
  6240. (__v32hi) __A,
  6241. (__mmask32) __U);
  6242. }
  6243. extern __inline __m512h
  6244. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6245. _mm512_permutex2var_ph (__m512h __A, __m512i __I, __m512h __B)
  6246. {
  6247. return (__m512h) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
  6248. (__v32hi) __I,
  6249. (__v32hi) __B,
  6250. (__mmask32)-1);
  6251. }
  6252. extern __inline __m512h
  6253. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6254. _mm512_permutexvar_ph (__m512i __A, __m512h __B)
  6255. {
  6256. return (__m512h) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
  6257. (__v32hi) __A,
  6258. (__v32hi)
  6259. (_mm512_setzero_ph ()),
  6260. (__mmask32)-1);
  6261. }
  6262. extern __inline __m512h
  6263. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6264. _mm512_set1_pch (_Float16 _Complex __A)
  6265. {
  6266. union
  6267. {
  6268. _Float16 _Complex a;
  6269. float b;
  6270. } u = { .a = __A};
  6271. return (__m512h) _mm512_set1_ps (u.b);
  6272. }
  6273. // intrinsics below are alias for f*mul_*ch
  6274. #define _mm512_mul_pch(A, B) _mm512_fmul_pch ((A), (B))
  6275. #define _mm512_mask_mul_pch(W, U, A, B) \
  6276. _mm512_mask_fmul_pch ((W), (U), (A), (B))
  6277. #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B))
  6278. #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R))
  6279. #define _mm512_mask_mul_round_pch(W, U, A, B, R) \
  6280. _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R))
  6281. #define _mm512_maskz_mul_round_pch(U, A, B, R) \
  6282. _mm512_maskz_fmul_round_pch ((U), (A), (B), (R))
  6283. #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B))
  6284. #define _mm512_mask_cmul_pch(W, U, A, B) \
  6285. _mm512_mask_fcmul_pch ((W), (U), (A), (B))
  6286. #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B))
  6287. #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), (R))
  6288. #define _mm512_mask_cmul_round_pch(W, U, A, B, R) \
  6289. _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
  6290. #define _mm512_maskz_cmul_round_pch(U, A, B, R) \
  6291. _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
  6292. #define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B))
  6293. #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B))
  6294. #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B))
  6295. #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R))
  6296. #define _mm_mask_mul_round_sch(W, U, A, B, R) \
  6297. _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R))
  6298. #define _mm_maskz_mul_round_sch(U, A, B, R) \
  6299. _mm_maskz_fmul_round_sch ((U), (A), (B), (R))
  6300. #define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B))
  6301. #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B))
  6302. #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B))
  6303. #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R))
  6304. #define _mm_mask_cmul_round_sch(W, U, A, B, R) \
  6305. _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R))
  6306. #define _mm_maskz_cmul_round_sch(U, A, B, R) \
  6307. _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
  6308. #ifdef __DISABLE_AVX512FP16__
  6309. #undef __DISABLE_AVX512FP16__
  6310. #pragma GCC pop_options
  6311. #endif /* __DISABLE_AVX512FP16__ */
  6312. #endif /* __AVX512FP16INTRIN_H_INCLUDED */