123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209 |
- /* Copyright (C) 2019-2022 Free Software Foundation, Inc.
- This file is part of GCC.
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
- #ifndef _IMMINTRIN_H_INCLUDED
- #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
- #endif
- #ifndef __AVX512FP16INTRIN_H_INCLUDED
- #define __AVX512FP16INTRIN_H_INCLUDED
- #ifndef __AVX512FP16__
- #pragma GCC push_options
- #pragma GCC target("avx512fp16")
- #define __DISABLE_AVX512FP16__
- #endif /* __AVX512FP16__ */
- /* Internal data types for implementing the intrinsics. */
- typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
- typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
- typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
- /* The Intel API is flexible enough that we must allow aliasing with other
- vector types, and their scalar components. */
- typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
- typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
- typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
- /* Unaligned version of the same type. */
- typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16), \
- __may_alias__, __aligned__ (1)));
- typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32), \
- __may_alias__, __aligned__ (1)));
- typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \
- __may_alias__, __aligned__ (1)));
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5,
- _Float16 __A4, _Float16 __A3, _Float16 __A2,
- _Float16 __A1, _Float16 __A0)
- {
- return __extension__ (__m128h)(__v8hf){ __A0, __A1, __A2, __A3,
- __A4, __A5, __A6, __A7 };
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_set_ph (_Float16 __A15, _Float16 __A14, _Float16 __A13,
- _Float16 __A12, _Float16 __A11, _Float16 __A10,
- _Float16 __A9, _Float16 __A8, _Float16 __A7,
- _Float16 __A6, _Float16 __A5, _Float16 __A4,
- _Float16 __A3, _Float16 __A2, _Float16 __A1,
- _Float16 __A0)
- {
- return __extension__ (__m256h)(__v16hf){ __A0, __A1, __A2, __A3,
- __A4, __A5, __A6, __A7,
- __A8, __A9, __A10, __A11,
- __A12, __A13, __A14, __A15 };
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29,
- _Float16 __A28, _Float16 __A27, _Float16 __A26,
- _Float16 __A25, _Float16 __A24, _Float16 __A23,
- _Float16 __A22, _Float16 __A21, _Float16 __A20,
- _Float16 __A19, _Float16 __A18, _Float16 __A17,
- _Float16 __A16, _Float16 __A15, _Float16 __A14,
- _Float16 __A13, _Float16 __A12, _Float16 __A11,
- _Float16 __A10, _Float16 __A9, _Float16 __A8,
- _Float16 __A7, _Float16 __A6, _Float16 __A5,
- _Float16 __A4, _Float16 __A3, _Float16 __A2,
- _Float16 __A1, _Float16 __A0)
- {
- return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3,
- __A4, __A5, __A6, __A7,
- __A8, __A9, __A10, __A11,
- __A12, __A13, __A14, __A15,
- __A16, __A17, __A18, __A19,
- __A20, __A21, __A22, __A23,
- __A24, __A25, __A26, __A27,
- __A28, __A29, __A30, __A31 };
- }
- /* Create vectors of elements in the reversed order from _mm_set_ph,
- _mm256_set_ph and _mm512_set_ph functions. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
- _Float16 __A3, _Float16 __A4, _Float16 __A5,
- _Float16 __A6, _Float16 __A7)
- {
- return _mm_set_ph (__A7, __A6, __A5, __A4, __A3, __A2, __A1, __A0);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
- _Float16 __A3, _Float16 __A4, _Float16 __A5,
- _Float16 __A6, _Float16 __A7, _Float16 __A8,
- _Float16 __A9, _Float16 __A10, _Float16 __A11,
- _Float16 __A12, _Float16 __A13, _Float16 __A14,
- _Float16 __A15)
- {
- return _mm256_set_ph (__A15, __A14, __A13, __A12, __A11, __A10, __A9,
- __A8, __A7, __A6, __A5, __A4, __A3, __A2, __A1,
- __A0);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
- _Float16 __A3, _Float16 __A4, _Float16 __A5,
- _Float16 __A6, _Float16 __A7, _Float16 __A8,
- _Float16 __A9, _Float16 __A10, _Float16 __A11,
- _Float16 __A12, _Float16 __A13, _Float16 __A14,
- _Float16 __A15, _Float16 __A16, _Float16 __A17,
- _Float16 __A18, _Float16 __A19, _Float16 __A20,
- _Float16 __A21, _Float16 __A22, _Float16 __A23,
- _Float16 __A24, _Float16 __A25, _Float16 __A26,
- _Float16 __A27, _Float16 __A28, _Float16 __A29,
- _Float16 __A30, _Float16 __A31)
- {
- return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25,
- __A24, __A23, __A22, __A21, __A20, __A19, __A18,
- __A17, __A16, __A15, __A14, __A13, __A12, __A11,
- __A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3,
- __A2, __A1, __A0);
- }
- /* Broadcast _Float16 to vector. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_set1_ph (_Float16 __A)
- {
- return _mm_set_ph (__A, __A, __A, __A, __A, __A, __A, __A);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_set1_ph (_Float16 __A)
- {
- return _mm256_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
- __A, __A, __A, __A, __A, __A, __A, __A);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_set1_ph (_Float16 __A)
- {
- return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
- __A, __A, __A, __A, __A, __A, __A, __A,
- __A, __A, __A, __A, __A, __A, __A, __A,
- __A, __A, __A, __A, __A, __A, __A, __A);
- }
- /* Create a vector with all zeros. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_setzero_ph (void)
- {
- return _mm_set1_ph (0.0f);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_setzero_ph (void)
- {
- return _mm256_set1_ph (0.0f);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_setzero_ph (void)
- {
- return _mm512_set1_ph (0.0f);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_undefined_ph (void)
- {
- __m128h __Y = __Y;
- return __Y;
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_undefined_ph (void)
- {
- __m256h __Y = __Y;
- return __Y;
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_undefined_ph (void)
- {
- __m512h __Y = __Y;
- return __Y;
- }
- extern __inline _Float16
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsh_h (__m128h __A)
- {
- return __A[0];
- }
- extern __inline _Float16
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtsh_h (__m256h __A)
- {
- return __A[0];
- }
- extern __inline _Float16
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtsh_h (__m512h __A)
- {
- return __A[0];
- }
- extern __inline __m512
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_castph_ps (__m512h __a)
- {
- return (__m512) __a;
- }
- extern __inline __m512d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_castph_pd (__m512h __a)
- {
- return (__m512d) __a;
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_castph_si512 (__m512h __a)
- {
- return (__m512i) __a;
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_castph512_ph128 (__m512h __A)
- {
- union
- {
- __m128h a[4];
- __m512h v;
- } u = { .v = __A };
- return u.a[0];
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_castph512_ph256 (__m512h __A)
- {
- union
- {
- __m256h a[2];
- __m512h v;
- } u = { .v = __A };
- return u.a[0];
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_castph128_ph512 (__m128h __A)
- {
- union
- {
- __m128h a[4];
- __m512h v;
- } u;
- u.a[0] = __A;
- return u.v;
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_castph256_ph512 (__m256h __A)
- {
- union
- {
- __m256h a[2];
- __m512h v;
- } u;
- u.a[0] = __A;
- return u.v;
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_zextph128_ph512 (__m128h __A)
- {
- return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
- (__m128) __A, 0);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_zextph256_ph512 (__m256h __A)
- {
- return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
- (__m256d) __A, 0);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_castps_ph (__m512 __a)
- {
- return (__m512h) __a;
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_castpd_ph (__m512d __a)
- {
- return (__m512h) __a;
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_castsi512_ph (__m512i __a)
- {
- return (__m512h) __a;
- }
- /* Create a vector with element 0 as F and the rest zero. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_set_sh (_Float16 __F)
- {
- return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, __F);
- }
- /* Create a vector with element 0 as *P and the rest zero. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_load_sh (void const *__P)
- {
- return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- *(_Float16 const *) __P);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_load_ph (void const *__P)
- {
- return *(const __m512h *) __P;
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_load_ph (void const *__P)
- {
- return *(const __m256h *) __P;
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_load_ph (void const *__P)
- {
- return *(const __m128h *) __P;
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_loadu_ph (void const *__P)
- {
- return *(const __m512h_u *) __P;
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_loadu_ph (void const *__P)
- {
- return *(const __m256h_u *) __P;
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_loadu_ph (void const *__P)
- {
- return *(const __m128h_u *) __P;
- }
- /* Stores the lower _Float16 value. */
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_store_sh (void *__P, __m128h __A)
- {
- *(_Float16 *) __P = ((__v8hf)__A)[0];
- }
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_store_ph (void *__P, __m512h __A)
- {
- *(__m512h *) __P = __A;
- }
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_store_ph (void *__P, __m256h __A)
- {
- *(__m256h *) __P = __A;
- }
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_store_ph (void *__P, __m128h __A)
- {
- *(__m128h *) __P = __A;
- }
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_storeu_ph (void *__P, __m512h __A)
- {
- *(__m512h_u *) __P = __A;
- }
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_storeu_ph (void *__P, __m256h __A)
- {
- *(__m256h_u *) __P = __A;
- }
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_storeu_ph (void *__P, __m128h __A)
- {
- *(__m128h_u *) __P = __A;
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_abs_ph (__m512h __A)
- {
- return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
- (__m512i) __A);
- }
- /* Intrinsics v[add,sub,mul,div]ph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_add_ph (__m512h __A, __m512h __B)
- {
- return (__m512h) ((__v32hf) __A + (__v32hf) __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
- {
- return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
- {
- return __builtin_ia32_addph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_sub_ph (__m512h __A, __m512h __B)
- {
- return (__m512h) ((__v32hf) __A - (__v32hf) __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
- {
- return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
- {
- return __builtin_ia32_subph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mul_ph (__m512h __A, __m512h __B)
- {
- return (__m512h) ((__v32hf) __A * (__v32hf) __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
- {
- return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
- {
- return __builtin_ia32_mulph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_div_ph (__m512h __A, __m512h __B)
- {
- return (__m512h) ((__v32hf) __A / (__v32hf) __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
- {
- return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
- {
- return __builtin_ia32_divph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
- {
- return __builtin_ia32_addph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
- {
- return __builtin_ia32_addph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
- {
- return __builtin_ia32_subph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
- {
- return __builtin_ia32_subph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
- {
- return __builtin_ia32_mulph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
- {
- return __builtin_ia32_mulph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
- {
- return __builtin_ia32_divph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
- {
- return __builtin_ia32_divph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
- }
- #else
- #define _mm512_add_round_ph(A, B, C) \
- ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
- #define _mm512_mask_add_round_ph(A, B, C, D, E) \
- ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
- #define _mm512_maskz_add_round_ph(A, B, C, D) \
- ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
- #define _mm512_sub_round_ph(A, B, C) \
- ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
- #define _mm512_mask_sub_round_ph(A, B, C, D, E) \
- ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
- #define _mm512_maskz_sub_round_ph(A, B, C, D) \
- ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
- #define _mm512_mul_round_ph(A, B, C) \
- ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
- #define _mm512_mask_mul_round_ph(A, B, C, D, E) \
- ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
- #define _mm512_maskz_mul_round_ph(A, B, C, D) \
- ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
- #define _mm512_div_round_ph(A, B, C) \
- ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
- #define _mm512_mask_div_round_ph(A, B, C, D, E) \
- ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
- #define _mm512_maskz_div_round_ph(A, B, C, D) \
- ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
- #endif /* __OPTIMIZE__ */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_conj_pch (__m512h __A)
- {
- return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31));
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A)
- {
- return (__m512h)
- __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
- (__v16sf) __W,
- (__mmask16) __U);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_conj_pch (__mmask16 __U, __m512h __A)
- {
- return (__m512h)
- __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
- (__v16sf) _mm512_setzero_ps (),
- (__mmask16) __U);
- }
- /* Intrinsics of v[add,sub,mul,div]sh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_add_sh (__m128h __A, __m128h __B)
- {
- __A[0] += __B[0];
- return __A;
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return __builtin_ia32_addsh_mask (__C, __D, __A, __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (),
- __A);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sub_sh (__m128h __A, __m128h __B)
- {
- __A[0] -= __B[0];
- return __A;
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return __builtin_ia32_subsh_mask (__C, __D, __A, __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (),
- __A);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mul_sh (__m128h __A, __m128h __B)
- {
- __A[0] *= __B[0];
- return __A;
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return __builtin_ia32_mulsh_mask (__C, __D, __A, __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_div_sh (__m128h __A, __m128h __B)
- {
- __A[0] /= __B[0];
- return __A;
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return __builtin_ia32_divsh_mask (__C, __D, __A, __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (),
- __A);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_add_round_sh (__m128h __A, __m128h __B, const int __C)
- {
- return __builtin_ia32_addsh_mask_round (__A, __B,
- _mm_setzero_ph (),
- (__mmask8) -1, __C);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
- const int __D)
- {
- return __builtin_ia32_addsh_mask_round (__B, __C,
- _mm_setzero_ph (),
- __A, __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C)
- {
- return __builtin_ia32_subsh_mask_round (__A, __B,
- _mm_setzero_ph (),
- (__mmask8) -1, __C);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
- const int __D)
- {
- return __builtin_ia32_subsh_mask_round (__B, __C,
- _mm_setzero_ph (),
- __A, __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C)
- {
- return __builtin_ia32_mulsh_mask_round (__A, __B,
- _mm_setzero_ph (),
- (__mmask8) -1, __C);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
- const int __D)
- {
- return __builtin_ia32_mulsh_mask_round (__B, __C,
- _mm_setzero_ph (),
- __A, __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_div_round_sh (__m128h __A, __m128h __B, const int __C)
- {
- return __builtin_ia32_divsh_mask_round (__A, __B,
- _mm_setzero_ph (),
- (__mmask8) -1, __C);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
- const int __D)
- {
- return __builtin_ia32_divsh_mask_round (__B, __C,
- _mm_setzero_ph (),
- __A, __D);
- }
- #else
- #define _mm_add_round_sh(A, B, C) \
- ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B), \
- _mm_setzero_ph (), \
- (__mmask8)-1, (C)))
- #define _mm_mask_add_round_sh(A, B, C, D, E) \
- ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
- #define _mm_maskz_add_round_sh(A, B, C, D) \
- ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C), \
- _mm_setzero_ph (), \
- (A), (D)))
- #define _mm_sub_round_sh(A, B, C) \
- ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B), \
- _mm_setzero_ph (), \
- (__mmask8)-1, (C)))
- #define _mm_mask_sub_round_sh(A, B, C, D, E) \
- ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
- #define _mm_maskz_sub_round_sh(A, B, C, D) \
- ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C), \
- _mm_setzero_ph (), \
- (A), (D)))
- #define _mm_mul_round_sh(A, B, C) \
- ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B), \
- _mm_setzero_ph (), \
- (__mmask8)-1, (C)))
- #define _mm_mask_mul_round_sh(A, B, C, D, E) \
- ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
- #define _mm_maskz_mul_round_sh(A, B, C, D) \
- ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C), \
- _mm_setzero_ph (), \
- (A), (D)))
- #define _mm_div_round_sh(A, B, C) \
- ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B), \
- _mm_setzero_ph (), \
- (__mmask8)-1, (C)))
- #define _mm_mask_div_round_sh(A, B, C, D, E) \
- ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
- #define _mm_maskz_div_round_sh(A, B, C, D) \
- ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C), \
- _mm_setzero_ph (), \
- (A), (D)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsic vmaxph vminph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_max_ph (__m512h __A, __m512h __B)
- {
- return __builtin_ia32_maxph512_mask (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
- {
- return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
- {
- return __builtin_ia32_maxph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_min_ph (__m512h __A, __m512h __B)
- {
- return __builtin_ia32_minph512_mask (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
- {
- return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
- {
- return __builtin_ia32_minph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
- {
- return __builtin_ia32_maxph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
- {
- return __builtin_ia32_maxph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
- {
- return __builtin_ia32_minph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
- {
- return __builtin_ia32_minph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
- }
- #else
- #define _mm512_max_round_ph(A, B, C) \
- (__builtin_ia32_maxph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
- #define _mm512_mask_max_round_ph(A, B, C, D, E) \
- (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
- #define _mm512_maskz_max_round_ph(A, B, C, D) \
- (__builtin_ia32_maxph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
- #define _mm512_min_round_ph(A, B, C) \
- (__builtin_ia32_minph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
- #define _mm512_mask_min_round_ph(A, B, C, D, E) \
- (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
- #define _mm512_maskz_min_round_ph(A, B, C, D) \
- (__builtin_ia32_minph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsic vmaxsh vminsh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_max_sh (__m128h __A, __m128h __B)
- {
- __A[0] = __A[0] > __B[0] ? __A[0] : __B[0];
- return __A;
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return __builtin_ia32_maxsh_mask (__C, __D, __A, __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (),
- __A);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_min_sh (__m128h __A, __m128h __B)
- {
- __A[0] = __A[0] < __B[0] ? __A[0] : __B[0];
- return __A;
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return __builtin_ia32_minsh_mask (__C, __D, __A, __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (),
- __A);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_max_round_sh (__m128h __A, __m128h __B, const int __C)
- {
- return __builtin_ia32_maxsh_mask_round (__A, __B,
- _mm_setzero_ph (),
- (__mmask8) -1, __C);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
- const int __D)
- {
- return __builtin_ia32_maxsh_mask_round (__B, __C,
- _mm_setzero_ph (),
- __A, __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_min_round_sh (__m128h __A, __m128h __B, const int __C)
- {
- return __builtin_ia32_minsh_mask_round (__A, __B,
- _mm_setzero_ph (),
- (__mmask8) -1, __C);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
- const int __D)
- {
- return __builtin_ia32_minsh_mask_round (__B, __C,
- _mm_setzero_ph (),
- __A, __D);
- }
- #else
- #define _mm_max_round_sh(A, B, C) \
- (__builtin_ia32_maxsh_mask_round ((A), (B), \
- _mm_setzero_ph (), \
- (__mmask8)-1, (C)))
- #define _mm_mask_max_round_sh(A, B, C, D, E) \
- (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
- #define _mm_maskz_max_round_sh(A, B, C, D) \
- (__builtin_ia32_maxsh_mask_round ((B), (C), \
- _mm_setzero_ph (), \
- (A), (D)))
- #define _mm_min_round_sh(A, B, C) \
- (__builtin_ia32_minsh_mask_round ((A), (B), \
- _mm_setzero_ph (), \
- (__mmask8)-1, (C)))
- #define _mm_mask_min_round_sh(A, B, C, D, E) \
- (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
- #define _mm_maskz_min_round_sh(A, B, C, D) \
- (__builtin_ia32_minsh_mask_round ((B), (C), \
- _mm_setzero_ph (), \
- (A), (D)))
- #endif /* __OPTIMIZE__ */
- /* vcmpph */
- #ifdef __OPTIMIZE
- extern __inline __mmask32
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
- {
- return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
- (__mmask32) -1);
- }
- extern __inline __mmask32
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
- {
- return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
- __A);
- }
- extern __inline __mmask32
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
- const int __D)
- {
- return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
- __C, (__mmask32) -1,
- __D);
- }
- extern __inline __mmask32
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D, const int __E)
- {
- return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
- __D, __A,
- __E);
- }
- #else
- #define _mm512_cmp_ph_mask(A, B, C) \
- (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
- #define _mm512_mask_cmp_ph_mask(A, B, C, D) \
- (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
- #define _mm512_cmp_round_ph_mask(A, B, C, D) \
- (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
- #define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) \
- (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcmpsh. */
- #ifdef __OPTIMIZE__
- extern __inline __mmask8
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C)
- {
- return (__mmask8)
- __builtin_ia32_cmpsh_mask_round (__A, __B,
- __C, (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __mmask8
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
- const int __D)
- {
- return (__mmask8)
- __builtin_ia32_cmpsh_mask_round (__B, __C,
- __D, __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __mmask8
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C,
- const int __D)
- {
- return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B,
- __C, (__mmask8) -1,
- __D);
- }
- extern __inline __mmask8
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
- const int __D, const int __E)
- {
- return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C,
- __D, __A,
- __E);
- }
- #else
- #define _mm_cmp_sh_mask(A, B, C) \
- (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), \
- (_MM_FROUND_CUR_DIRECTION)))
- #define _mm_mask_cmp_sh_mask(A, B, C, D) \
- (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), \
- (_MM_FROUND_CUR_DIRECTION)))
- #define _mm_cmp_round_sh_mask(A, B, C, D) \
- (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
- #define _mm_mask_cmp_round_sh_mask(A, B, C, D, E) \
- (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcomish. */
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_comieq_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_comilt_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_comile_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_comigt_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_comige_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_comineq_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_ucomieq_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_ucomilt_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_ucomile_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_ucomigt_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_ucomige_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_ucomineq_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_comi_sh (__m128h __A, __m128h __B, const int __P)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
- {
- return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
- (__mmask8) -1,__R);
- }
- #else
- #define _mm_comi_round_sh(A, B, P, R) \
- (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
- #define _mm_comi_sh(A, B, P) \
- (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), \
- _MM_FROUND_CUR_DIRECTION))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vsqrtph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_sqrt_ph (__m512h __A)
- {
- return __builtin_ia32_sqrtph512_mask_round (__A,
- _mm512_setzero_ph(),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
- {
- return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
- {
- return __builtin_ia32_sqrtph512_mask_round (__B,
- _mm512_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_sqrt_round_ph (__m512h __A, const int __B)
- {
- return __builtin_ia32_sqrtph512_mask_round (__A,
- _mm512_setzero_ph(),
- (__mmask32) -1, __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- const int __D)
- {
- return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
- {
- return __builtin_ia32_sqrtph512_mask_round (__B,
- _mm512_setzero_ph (),
- __A, __C);
- }
- #else
- #define _mm512_sqrt_round_ph(A, B) \
- (__builtin_ia32_sqrtph512_mask_round ((A), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (B)))
- #define _mm512_mask_sqrt_round_ph(A, B, C, D) \
- (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
- #define _mm512_maskz_sqrt_round_ph(A, B, C) \
- (__builtin_ia32_sqrtph512_mask_round ((B), \
- _mm512_setzero_ph (), \
- (A), (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vrsqrtph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_rsqrt_ph (__m512h __A)
- {
- return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
- (__mmask32) -1);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
- {
- return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
- {
- return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
- __A);
- }
- /* Intrinsics vrsqrtsh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_rsqrt_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
- (__mmask8) -1);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
- __A);
- }
- /* Intrinsics vsqrtsh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sqrt_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_sqrtsh_mask_round (__B, __A,
- _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return __builtin_ia32_sqrtsh_mask_round (__C, __B,
- _mm_setzero_ph (),
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C)
- {
- return __builtin_ia32_sqrtsh_mask_round (__B, __A,
- _mm_setzero_ph (),
- (__mmask8) -1, __C);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
- __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
- const int __D)
- {
- return __builtin_ia32_sqrtsh_mask_round (__C, __B,
- _mm_setzero_ph (),
- __A, __D);
- }
- #else
- #define _mm_sqrt_round_sh(A, B, C) \
- (__builtin_ia32_sqrtsh_mask_round ((B), (A), \
- _mm_setzero_ph (), \
- (__mmask8)-1, (C)))
- #define _mm_mask_sqrt_round_sh(A, B, C, D, E) \
- (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
- #define _mm_maskz_sqrt_round_sh(A, B, C, D) \
- (__builtin_ia32_sqrtsh_mask_round ((C), (B), \
- _mm_setzero_ph (), \
- (A), (D)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vrcpph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_rcp_ph (__m512h __A)
- {
- return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
- (__mmask32) -1);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
- {
- return __builtin_ia32_rcpph512_mask (__C, __A, __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
- {
- return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
- __A);
- }
- /* Intrinsics vrcpsh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_rcp_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (),
- (__mmask8) -1);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
- {
- return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
- {
- return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (),
- __A);
- }
- /* Intrinsics vscalefph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_scalef_ph (__m512h __A, __m512h __B)
- {
- return __builtin_ia32_scalefph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
- {
- return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
- {
- return __builtin_ia32_scalefph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
- {
- return __builtin_ia32_scalefph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
- __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
- {
- return __builtin_ia32_scalefph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
- }
- #else
- #define _mm512_scalef_round_ph(A, B, C) \
- (__builtin_ia32_scalefph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
- #define _mm512_mask_scalef_round_ph(A, B, C, D, E) \
- (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
- #define _mm512_maskz_scalef_round_ph(A, B, C, D) \
- (__builtin_ia32_scalefph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vscalefsh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_scalef_sh (__m128h __A, __m128h __B)
- {
- return __builtin_ia32_scalefsh_mask_round (__A, __B,
- _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return __builtin_ia32_scalefsh_mask_round (__B, __C,
- _mm_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
- {
- return __builtin_ia32_scalefsh_mask_round (__A, __B,
- _mm_setzero_ph (),
- (__mmask8) -1, __C);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
- __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
- const int __D)
- {
- return __builtin_ia32_scalefsh_mask_round (__B, __C,
- _mm_setzero_ph (),
- __A, __D);
- }
- #else
- #define _mm_scalef_round_sh(A, B, C) \
- (__builtin_ia32_scalefsh_mask_round ((A), (B), \
- _mm_setzero_ph (), \
- (__mmask8)-1, (C)))
- #define _mm_mask_scalef_round_sh(A, B, C, D, E) \
- (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
- #define _mm_maskz_scalef_round_sh(A, B, C, D) \
- (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (), \
- (A), (D)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vreduceph. */
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_reduce_ph (__m512h __A, int __B)
- {
- return __builtin_ia32_reduceph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
- {
- return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
- {
- return __builtin_ia32_reduceph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
- {
- return __builtin_ia32_reduceph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- int __D, const int __E)
- {
- return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
- __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
- const int __D)
- {
- return __builtin_ia32_reduceph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
- }
- #else
- #define _mm512_reduce_ph(A, B) \
- (__builtin_ia32_reduceph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm512_mask_reduce_ph(A, B, C, D) \
- (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm512_maskz_reduce_ph(A, B, C) \
- (__builtin_ia32_reduceph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), _MM_FROUND_CUR_DIRECTION))
- #define _mm512_reduce_round_ph(A, B, C) \
- (__builtin_ia32_reduceph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
- #define _mm512_mask_reduce_round_ph(A, B, C, D, E) \
- (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
- #define _mm512_maskz_reduce_round_ph(A, B, C, D) \
- (__builtin_ia32_reduceph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vreducesh. */
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_reduce_sh (__m128h __A, __m128h __B, int __C)
- {
- return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
- _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, int __E)
- {
- return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
- {
- return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
- _mm_setzero_ph (), __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
- {
- return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
- _mm_setzero_ph (),
- (__mmask8) -1, __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, int __E, const int __F)
- {
- return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A,
- __B, __F);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
- int __D, const int __E)
- {
- return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
- _mm_setzero_ph (),
- __A, __E);
- }
- #else
- #define _mm_reduce_sh(A, B, C) \
- (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
- _mm_setzero_ph (), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm_mask_reduce_sh(A, B, C, D, E) \
- (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm_maskz_reduce_sh(A, B, C, D) \
- (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
- _mm_setzero_ph (), \
- (A), _MM_FROUND_CUR_DIRECTION))
- #define _mm_reduce_round_sh(A, B, C, D) \
- (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
- _mm_setzero_ph (), \
- (__mmask8)-1, (D)))
- #define _mm_mask_reduce_round_sh(A, B, C, D, E, F) \
- (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
- #define _mm_maskz_reduce_round_sh(A, B, C, D, E) \
- (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
- _mm_setzero_ph (), \
- (A), (E)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vrndscaleph. */
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_roundscale_ph (__m512h __A, int __B)
- {
- return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
- __m512h __C, int __D)
- {
- return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
- {
- return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
- {
- return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- __C);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
- __m512h __C, int __D, const int __E)
- {
- return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
- __B, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
- const int __D)
- {
- return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
- }
- #else
- #define _mm512_roundscale_ph(A, B) \
- (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm512_mask_roundscale_ph(A, B, C, D) \
- (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm512_maskz_roundscale_ph(A, B, C) \
- (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm512_roundscale_round_ph(A, B, C) \
- (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
- #define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \
- (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
- #define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
- (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vrndscalesh. */
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
- {
- return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
- _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, int __E)
- {
- return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
- {
- return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
- _mm_setzero_ph (), __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
- {
- return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
- _mm_setzero_ph (),
- (__mmask8) -1,
- __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, int __E, const int __F)
- {
- return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E,
- __A, __B, __F);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
- int __D, const int __E)
- {
- return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
- _mm_setzero_ph (),
- __A, __E);
- }
- #else
- #define _mm_roundscale_sh(A, B, C) \
- (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
- _mm_setzero_ph (), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm_mask_roundscale_sh(A, B, C, D, E) \
- (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm_maskz_roundscale_sh(A, B, C, D) \
- (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
- _mm_setzero_ph (), \
- (A), _MM_FROUND_CUR_DIRECTION))
- #define _mm_roundscale_round_sh(A, B, C, D) \
- (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
- _mm_setzero_ph (), \
- (__mmask8)-1, (D)))
- #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) \
- (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
- #define _mm_maskz_roundscale_round_sh(A, B, C, D, E) \
- (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
- _mm_setzero_ph (), \
- (A), (E)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfpclasssh. */
- #ifdef __OPTIMIZE__
- extern __inline __mmask8
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fpclass_sh_mask (__m128h __A, const int __imm)
- {
- return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm,
- (__mmask8) -1);
- }
- extern __inline __mmask8
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm)
- {
- return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, __U);
- }
- #else
- #define _mm_fpclass_sh_mask(X, C) \
- ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
- (int) (C), (__mmask8) (-1))) \
- #define _mm_mask_fpclass_sh_mask(U, X, C) \
- ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
- (int) (C), (__mmask8) (U)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfpclassph. */
- #ifdef __OPTIMIZE__
- extern __inline __mmask32
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A,
- const int __imm)
- {
- return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
- __imm, __U);
- }
- extern __inline __mmask32
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
- {
- return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
- __imm,
- (__mmask32) -1);
- }
- #else
- #define _mm512_mask_fpclass_ph_mask(u, x, c) \
- ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
- (int) (c),(__mmask8)(u)))
- #define _mm512_fpclass_ph_mask(x, c) \
- ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
- (int) (c),(__mmask8)-1))
- #endif /* __OPIMTIZE__ */
- /* Intrinsics vgetexpph, vgetexpsh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_getexp_sh (__m128h __A, __m128h __B)
- {
- return (__m128h)
- __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
- (__v8hf) _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_getexp_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
- {
- return (__m128h)
- __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
- (__v8hf) __W, (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B)
- {
- return (__m128h)
- __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
- (__v8hf) _mm_setzero_ph (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_getexp_ph (__m512h __A)
- {
- return (__m512h)
- __builtin_ia32_getexpph512_mask ((__v32hf) __A,
- (__v32hf) _mm512_setzero_ph (),
- (__mmask32) -1, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A)
- {
- return (__m512h)
- __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W,
- (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A)
- {
- return (__m512h)
- __builtin_ia32_getexpph512_mask ((__v32hf) __A,
- (__v32hf) _mm512_setzero_ph (),
- (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_getexp_round_sh (__m128h __A, __m128h __B, const int __R)
- {
- return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
- (__v8hf) __B,
- _mm_setzero_ph (),
- (__mmask8) -1,
- __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_getexp_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
- __m128h __B, const int __R)
- {
- return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __W,
- (__mmask8) __U, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
- const int __R)
- {
- return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf)
- _mm_setzero_ph (),
- (__mmask8) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_getexp_round_ph (__m512h __A, const int __R)
- {
- return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) -1, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
- const int __R)
- {
- return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
- (__v32hf) __W,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
- {
- return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) __U, __R);
- }
- #else
- #define _mm_getexp_round_sh(A, B, R) \
- ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A), \
- (__v8hf)(__m128h)(B), \
- (__v8hf)_mm_setzero_ph(), \
- (__mmask8)-1, R))
- #define _mm_mask_getexp_round_sh(W, U, A, B, C) \
- (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
- #define _mm_maskz_getexp_round_sh(U, A, B, C) \
- (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, \
- (__v8hf)_mm_setzero_ph(), \
- U, C)
- #define _mm512_getexp_round_ph(A, R) \
- ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
- (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
- #define _mm512_mask_getexp_round_ph(W, U, A, R) \
- ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
- (__v32hf)(__m512h)(W), (__mmask32)(U), R))
- #define _mm512_maskz_getexp_round_ph(U, A, R) \
- ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
- (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vgetmantph, vgetmantsh. */
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_getmant_sh (__m128h __A, __m128h __B,
- _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D)
- {
- return (__m128h)
- __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
- (__D << 2) | __C, _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_getmant_sh (__m128h __W, __mmask8 __U, __m128h __A,
- __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D)
- {
- return (__m128h)
- __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
- (__D << 2) | __C, (__v8hf) __W,
- __U, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B,
- _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D)
- {
- return (__m128h)
- __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
- (__D << 2) | __C,
- (__v8hf) _mm_setzero_ph(),
- __U, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
- {
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
- {
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- (__v32hf) __W, __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
- {
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- (__v32hf)
- _mm512_setzero_ph (),
- __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_getmant_round_sh (__m128h __A, __m128h __B,
- _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D, const int __R)
- {
- return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
- (__v8hf) __B,
- (__D << 2) | __C,
- _mm_setzero_ph (),
- (__mmask8) -1,
- __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_getmant_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
- __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D, const int __R)
- {
- return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
- (__v8hf) __B,
- (__D << 2) | __C,
- (__v8hf) __W,
- __U, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
- _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D, const int __R)
- {
- return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
- (__v8hf) __B,
- (__D << 2) | __C,
- (__v8hf)
- _mm_setzero_ph(),
- __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C, const int __R)
- {
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C, const int __R)
- {
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- (__v32hf) __W, __U,
- __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C, const int __R)
- {
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- (__v32hf)
- _mm512_setzero_ph (),
- __U, __R);
- }
- #else
- #define _mm512_getmant_ph(X, B, C) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h) \
- _mm512_setzero_ph(), \
- (__mmask32)-1, \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm512_mask_getmant_ph(W, U, X, B, C) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h)(W), \
- (__mmask32)(U), \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm512_maskz_getmant_ph(U, X, B, C) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h) \
- _mm512_setzero_ph(), \
- (__mmask32)(U), \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm_getmant_sh(X, Y, C, D) \
- ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
- (__v8hf)(__m128h)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v8hf)(__m128h) \
- _mm_setzero_ph (), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm_mask_getmant_sh(W, U, X, Y, C, D) \
- ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
- (__v8hf)(__m128h)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v8hf)(__m128h)(W), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm_maskz_getmant_sh(U, X, Y, C, D) \
- ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
- (__v8hf)(__m128h)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v8hf)(__m128h) \
- _mm_setzero_ph(), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION))
- #define _mm512_getmant_round_ph(X, B, C, R) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h) \
- _mm512_setzero_ph(), \
- (__mmask32)-1, \
- (R)))
- #define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h)(W), \
- (__mmask32)(U), \
- (R)))
- #define _mm512_maskz_getmant_round_ph(U, X, B, C, R) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h) \
- _mm512_setzero_ph(), \
- (__mmask32)(U), \
- (R)))
- #define _mm_getmant_round_sh(X, Y, C, D, R) \
- ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
- (__v8hf)(__m128h)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v8hf)(__m128h) \
- _mm_setzero_ph (), \
- (__mmask8)-1, \
- (R)))
- #define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R) \
- ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
- (__v8hf)(__m128h)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v8hf)(__m128h)(W), \
- (__mmask8)(U), \
- (R)))
- #define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R) \
- ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
- (__v8hf)(__m128h)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v8hf)(__m128h) \
- _mm_setzero_ph(), \
- (__mmask8)(U), \
- (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vmovw. */
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsi16_si128 (short __A)
- {
- return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A);
- }
- extern __inline short
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsi128_si16 (__m128i __A)
- {
- return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0);
- }
- /* Intrinsics vmovsh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C)
- {
- return __builtin_ia32_loadsh_mask (__C, __A, __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B)
- {
- return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A);
- }
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C)
- {
- __builtin_ia32_storesh_mask (__A, __C, __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_move_sh (__m128h __A, __m128h __B)
- {
- __A[0] = __B[0];
- return __A;
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
- }
- /* Intrinsics vcvtph2dq. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtph_epi32 (__m256h __A)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2dq512_mask_round (__A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2dq512_mask_round (__C,
- (__v16si) __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2dq512_mask_round (__B,
- (__v16si)
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundph_epi32 (__m256h __A, int __B)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2dq512_mask_round (__A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2dq512_mask_round (__C,
- (__v16si) __A,
- __B,
- __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2dq512_mask_round (__B,
- (__v16si)
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundph_epi32(A, B) \
- ((__m512i) \
- __builtin_ia32_vcvtph2dq512_mask_round ((A), \
- (__v16si) \
- _mm512_setzero_si512 (), \
- (__mmask16)-1, \
- (B)))
- #define _mm512_mask_cvt_roundph_epi32(A, B, C, D) \
- ((__m512i) \
- __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D)))
- #define _mm512_maskz_cvt_roundph_epi32(A, B, C) \
- ((__m512i) \
- __builtin_ia32_vcvtph2dq512_mask_round ((B), \
- (__v16si) \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtph2udq. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtph_epu32 (__m256h __A)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2udq512_mask_round (__A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2udq512_mask_round (__C,
- (__v16si) __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2udq512_mask_round (__B,
- (__v16si)
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundph_epu32 (__m256h __A, int __B)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2udq512_mask_round (__A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2udq512_mask_round (__C,
- (__v16si) __A,
- __B,
- __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2udq512_mask_round (__B,
- (__v16si)
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundph_epu32(A, B) \
- ((__m512i) \
- __builtin_ia32_vcvtph2udq512_mask_round ((A), \
- (__v16si) \
- _mm512_setzero_si512 (), \
- (__mmask16)-1, \
- (B)))
- #define _mm512_mask_cvt_roundph_epu32(A, B, C, D) \
- ((__m512i) \
- __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D)))
- #define _mm512_maskz_cvt_roundph_epu32(A, B, C) \
- ((__m512i) \
- __builtin_ia32_vcvtph2udq512_mask_round ((B), \
- (__v16si) \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvttph2dq. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvttph_epi32 (__m256h __A)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2dq512_mask_round (__A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2dq512_mask_round (__C,
- (__v16si) __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2dq512_mask_round (__B,
- (__v16si)
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtt_roundph_epi32 (__m256h __A, int __B)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2dq512_mask_round (__A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B,
- __m256h __C, int __D)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2dq512_mask_round (__C,
- (__v16si) __A,
- __B,
- __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2dq512_mask_round (__B,
- (__v16si)
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvtt_roundph_epi32(A, B) \
- ((__m512i) \
- __builtin_ia32_vcvttph2dq512_mask_round ((A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16)(-1), (B)))
- #define _mm512_mask_cvtt_roundph_epi32(A, B, C, D) \
- ((__m512i) \
- __builtin_ia32_vcvttph2dq512_mask_round ((C), \
- (__v16si)(A), \
- (B), \
- (D)))
- #define _mm512_maskz_cvtt_roundph_epi32(A, B, C) \
- ((__m512i) \
- __builtin_ia32_vcvttph2dq512_mask_round ((B), \
- (__v16si) \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvttph2udq. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvttph_epu32 (__m256h __A)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2udq512_mask_round (__A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2udq512_mask_round (__C,
- (__v16si) __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2udq512_mask_round (__B,
- (__v16si)
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtt_roundph_epu32 (__m256h __A, int __B)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2udq512_mask_round (__A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B,
- __m256h __C, int __D)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2udq512_mask_round (__C,
- (__v16si) __A,
- __B,
- __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2udq512_mask_round (__B,
- (__v16si)
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvtt_roundph_epu32(A, B) \
- ((__m512i) \
- __builtin_ia32_vcvttph2udq512_mask_round ((A), \
- (__v16si) \
- _mm512_setzero_si512 (), \
- (__mmask16)-1, \
- (B)))
- #define _mm512_mask_cvtt_roundph_epu32(A, B, C, D) \
- ((__m512i) \
- __builtin_ia32_vcvttph2udq512_mask_round ((C), \
- (__v16si)(A), \
- (B), \
- (D)))
- #define _mm512_maskz_cvtt_roundph_epu32(A, B, C) \
- ((__m512i) \
- __builtin_ia32_vcvttph2udq512_mask_round ((B), \
- (__v16si) \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtdq2ph. */
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtepi32_ph (__m512i __A)
- {
- return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
- _mm256_setzero_ph (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C)
- {
- return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
- __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B)
- {
- return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
- _mm256_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundepi32_ph (__m512i __A, int __B)
- {
- return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
- _mm256_setzero_ph (),
- (__mmask16) -1,
- __B);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
- {
- return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
- __A,
- __B,
- __D);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C)
- {
- return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
- _mm256_setzero_ph (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundepi32_ph(A, B) \
- (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A), \
- _mm256_setzero_ph (), \
- (__mmask16)-1, \
- (B)))
- #define _mm512_mask_cvt_roundepi32_ph(A, B, C, D) \
- (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C), \
- (A), \
- (B), \
- (D)))
- #define _mm512_maskz_cvt_roundepi32_ph(A, B, C) \
- (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B), \
- _mm256_setzero_ph (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtudq2ph. */
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtepu32_ph (__m512i __A)
- {
- return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
- _mm256_setzero_ph (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C)
- {
- return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
- __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B)
- {
- return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
- _mm256_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundepu32_ph (__m512i __A, int __B)
- {
- return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
- _mm256_setzero_ph (),
- (__mmask16) -1,
- __B);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
- {
- return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
- __A,
- __B,
- __D);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C)
- {
- return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
- _mm256_setzero_ph (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundepu32_ph(A, B) \
- (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A), \
- _mm256_setzero_ph (), \
- (__mmask16)-1, \
- B))
- #define _mm512_mask_cvt_roundepu32_ph(A, B, C, D) \
- (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C, \
- A, \
- B, \
- D))
- #define _mm512_maskz_cvt_roundepu32_ph(A, B, C) \
- (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B, \
- _mm256_setzero_ph (), \
- A, \
- C))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtph2qq. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtph_epi64 (__m128h __A)
- {
- return __builtin_ia32_vcvtph2qq512_mask_round (__A,
- _mm512_setzero_si512 (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
- {
- return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
- {
- return __builtin_ia32_vcvtph2qq512_mask_round (__B,
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundph_epi64 (__m128h __A, int __B)
- {
- return __builtin_ia32_vcvtph2qq512_mask_round (__A,
- _mm512_setzero_si512 (),
- (__mmask8) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
- {
- return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
- {
- return __builtin_ia32_vcvtph2qq512_mask_round (__B,
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundph_epi64(A, B) \
- (__builtin_ia32_vcvtph2qq512_mask_round ((A), \
- _mm512_setzero_si512 (), \
- (__mmask8)-1, \
- (B)))
- #define _mm512_mask_cvt_roundph_epi64(A, B, C, D) \
- (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D)))
- #define _mm512_maskz_cvt_roundph_epi64(A, B, C) \
- (__builtin_ia32_vcvtph2qq512_mask_round ((B), \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtph2uqq. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtph_epu64 (__m128h __A)
- {
- return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
- _mm512_setzero_si512 (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
- {
- return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
- {
- return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundph_epu64 (__m128h __A, int __B)
- {
- return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
- _mm512_setzero_si512 (),
- (__mmask8) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
- {
- return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
- {
- return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundph_epu64(A, B) \
- (__builtin_ia32_vcvtph2uqq512_mask_round ((A), \
- _mm512_setzero_si512 (), \
- (__mmask8)-1, \
- (B)))
- #define _mm512_mask_cvt_roundph_epu64(A, B, C, D) \
- (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D)))
- #define _mm512_maskz_cvt_roundph_epu64(A, B, C) \
- (__builtin_ia32_vcvtph2uqq512_mask_round ((B), \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvttph2qq. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvttph_epi64 (__m128h __A)
- {
- return __builtin_ia32_vcvttph2qq512_mask_round (__A,
- _mm512_setzero_si512 (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
- {
- return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
- {
- return __builtin_ia32_vcvttph2qq512_mask_round (__B,
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtt_roundph_epi64 (__m128h __A, int __B)
- {
- return __builtin_ia32_vcvttph2qq512_mask_round (__A,
- _mm512_setzero_si512 (),
- (__mmask8) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
- {
- return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
- {
- return __builtin_ia32_vcvttph2qq512_mask_round (__B,
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvtt_roundph_epi64(A, B) \
- (__builtin_ia32_vcvttph2qq512_mask_round ((A), \
- _mm512_setzero_si512 (), \
- (__mmask8)-1, \
- (B)))
- #define _mm512_mask_cvtt_roundph_epi64(A, B, C, D) \
- __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D))
- #define _mm512_maskz_cvtt_roundph_epi64(A, B, C) \
- (__builtin_ia32_vcvttph2qq512_mask_round ((B), \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvttph2uqq. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvttph_epu64 (__m128h __A)
- {
- return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
- _mm512_setzero_si512 (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
- {
- return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
- {
- return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtt_roundph_epu64 (__m128h __A, int __B)
- {
- return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
- _mm512_setzero_si512 (),
- (__mmask8) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
- {
- return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
- {
- return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvtt_roundph_epu64(A, B) \
- (__builtin_ia32_vcvttph2uqq512_mask_round ((A), \
- _mm512_setzero_si512 (), \
- (__mmask8)-1, \
- (B)))
- #define _mm512_mask_cvtt_roundph_epu64(A, B, C, D) \
- __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D))
- #define _mm512_maskz_cvtt_roundph_epu64(A, B, C) \
- (__builtin_ia32_vcvttph2uqq512_mask_round ((B), \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtqq2ph. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtepi64_ph (__m512i __A)
- {
- return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
- _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C)
- {
- return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
- __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B)
- {
- return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
- _mm_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundepi64_ph (__m512i __A, int __B)
- {
- return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
- _mm_setzero_ph (),
- (__mmask8) -1,
- __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
- {
- return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
- __A,
- __B,
- __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C)
- {
- return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
- _mm_setzero_ph (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundepi64_ph(A, B) \
- (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A), \
- _mm_setzero_ph (), \
- (__mmask8)-1, \
- (B)))
- #define _mm512_mask_cvt_roundepi64_ph(A, B, C, D) \
- (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
- #define _mm512_maskz_cvt_roundepi64_ph(A, B, C) \
- (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B), \
- _mm_setzero_ph (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtuqq2ph. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtepu64_ph (__m512i __A)
- {
- return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
- _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C)
- {
- return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
- __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B)
- {
- return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
- _mm_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundepu64_ph (__m512i __A, int __B)
- {
- return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
- _mm_setzero_ph (),
- (__mmask8) -1,
- __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
- {
- return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
- __A,
- __B,
- __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C)
- {
- return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
- _mm_setzero_ph (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundepu64_ph(A, B) \
- (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A), \
- _mm_setzero_ph (), \
- (__mmask8)-1, \
- (B)))
- #define _mm512_mask_cvt_roundepu64_ph(A, B, C, D) \
- (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
- #define _mm512_maskz_cvt_roundepu64_ph(A, B, C) \
- (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B), \
- _mm_setzero_ph (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtph2w. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtph_epi16 (__m512h __A)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2w512_mask_round (__A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2w512_mask_round (__C,
- (__v32hi) __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2w512_mask_round (__B,
- (__v32hi)
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundph_epi16 (__m512h __A, int __B)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2w512_mask_round (__A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2w512_mask_round (__C,
- (__v32hi) __A,
- __B,
- __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2w512_mask_round (__B,
- (__v32hi)
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundph_epi16(A, B) \
- ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A), \
- (__v32hi) \
- _mm512_setzero_si512 (), \
- (__mmask32)-1, \
- (B)))
- #define _mm512_mask_cvt_roundph_epi16(A, B, C, D) \
- ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C), \
- (__v32hi)(A), \
- (B), \
- (D)))
- #define _mm512_maskz_cvt_roundph_epi16(A, B, C) \
- ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B), \
- (__v32hi) \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtph2uw. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtph_epu16 (__m512h __A)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2uw512_mask_round (__A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2uw512_mask_round (__B,
- (__v32hi)
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundph_epu16 (__m512h __A, int __B)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2uw512_mask_round (__A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
- {
- return (__m512i)
- __builtin_ia32_vcvtph2uw512_mask_round (__B,
- (__v32hi)
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundph_epu16(A, B) \
- ((__m512i) \
- __builtin_ia32_vcvtph2uw512_mask_round ((A), \
- (__v32hi) \
- _mm512_setzero_si512 (), \
- (__mmask32)-1, (B)))
- #define _mm512_mask_cvt_roundph_epu16(A, B, C, D) \
- ((__m512i) \
- __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D)))
- #define _mm512_maskz_cvt_roundph_epu16(A, B, C) \
- ((__m512i) \
- __builtin_ia32_vcvtph2uw512_mask_round ((B), \
- (__v32hi) \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvttph2w. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvttph_epi16 (__m512h __A)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2w512_mask_round (__A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2w512_mask_round (__C,
- (__v32hi) __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2w512_mask_round (__B,
- (__v32hi)
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtt_roundph_epi16 (__m512h __A, int __B)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2w512_mask_round (__A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B,
- __m512h __C, int __D)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2w512_mask_round (__C,
- (__v32hi) __A,
- __B,
- __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2w512_mask_round (__B,
- (__v32hi)
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvtt_roundph_epi16(A, B) \
- ((__m512i) \
- __builtin_ia32_vcvttph2w512_mask_round ((A), \
- (__v32hi) \
- _mm512_setzero_si512 (), \
- (__mmask32)-1, \
- (B)))
- #define _mm512_mask_cvtt_roundph_epi16(A, B, C, D) \
- ((__m512i) \
- __builtin_ia32_vcvttph2w512_mask_round ((C), \
- (__v32hi)(A), \
- (B), \
- (D)))
- #define _mm512_maskz_cvtt_roundph_epi16(A, B, C) \
- ((__m512i) \
- __builtin_ia32_vcvttph2w512_mask_round ((B), \
- (__v32hi) \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvttph2uw. */
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvttph_epu16 (__m512h __A)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2uw512_mask_round (__A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2uw512_mask_round (__C,
- (__v32hi) __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2uw512_mask_round (__B,
- (__v32hi)
- _mm512_setzero_si512 (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtt_roundph_epu16 (__m512h __A, int __B)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2uw512_mask_round (__A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) -1,
- __B);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B,
- __m512h __C, int __D)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2uw512_mask_round (__C,
- (__v32hi) __A,
- __B,
- __D);
- }
- extern __inline __m512i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
- {
- return (__m512i)
- __builtin_ia32_vcvttph2uw512_mask_round (__B,
- (__v32hi)
- _mm512_setzero_si512 (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvtt_roundph_epu16(A, B) \
- ((__m512i) \
- __builtin_ia32_vcvttph2uw512_mask_round ((A), \
- (__v32hi) \
- _mm512_setzero_si512 (), \
- (__mmask32)-1, \
- (B)))
- #define _mm512_mask_cvtt_roundph_epu16(A, B, C, D) \
- ((__m512i) \
- __builtin_ia32_vcvttph2uw512_mask_round ((C), \
- (__v32hi)(A), \
- (B), \
- (D)))
- #define _mm512_maskz_cvtt_roundph_epu16(A, B, C) \
- ((__m512i) \
- __builtin_ia32_vcvttph2uw512_mask_round ((B), \
- (__v32hi) \
- _mm512_setzero_si512 (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtw2ph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtepi16_ph (__m512i __A)
- {
- return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C)
- {
- return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
- __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B)
- {
- return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
- _mm512_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundepi16_ph (__m512i __A, int __B)
- {
- return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
- {
- return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
- __A,
- __B,
- __D);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C)
- {
- return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
- _mm512_setzero_ph (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundepi16_ph(A, B) \
- (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, \
- (B)))
- #define _mm512_mask_cvt_roundepi16_ph(A, B, C, D) \
- (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C), \
- (A), \
- (B), \
- (D)))
- #define _mm512_maskz_cvt_roundepi16_ph(A, B, C) \
- (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B), \
- _mm512_setzero_ph (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtuw2ph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtepu16_ph (__m512i __A)
- {
- return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C)
- {
- return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
- __A,
- __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B)
- {
- return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
- _mm512_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundepu16_ph (__m512i __A, int __B)
- {
- return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- __B);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
- {
- return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
- __A,
- __B,
- __D);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
- {
- return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
- _mm512_setzero_ph (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundepu16_ph(A, B) \
- (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, \
- (B)))
- #define _mm512_mask_cvt_roundepu16_ph(A, B, C, D) \
- (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C), \
- (A), \
- (B), \
- (D)))
- #define _mm512_maskz_cvt_roundepu16_ph(A, B, C) \
- (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B), \
- _mm512_setzero_ph (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtsh2si, vcvtsh2us. */
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsh_i32 (__m128h __A)
- {
- return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline unsigned
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsh_u32 (__m128h __A)
- {
- return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundsh_i32 (__m128h __A, const int __R)
- {
- return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
- }
- extern __inline unsigned
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundsh_u32 (__m128h __A, const int __R)
- {
- return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
- }
- #else
- #define _mm_cvt_roundsh_i32(A, B) \
- ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
- #define _mm_cvt_roundsh_u32(A, B) \
- ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
- #endif /* __OPTIMIZE__ */
- #ifdef __x86_64__
- extern __inline long long
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsh_i64 (__m128h __A)
- {
- return (long long)
- __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline unsigned long long
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsh_u64 (__m128h __A)
- {
- return (long long)
- __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline long long
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundsh_i64 (__m128h __A, const int __R)
- {
- return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
- }
- extern __inline unsigned long long
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundsh_u64 (__m128h __A, const int __R)
- {
- return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
- }
- #else
- #define _mm_cvt_roundsh_i64(A, B) \
- ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
- #define _mm_cvt_roundsh_u64(A, B) \
- ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
- #endif /* __OPTIMIZE__ */
- #endif /* __x86_64__ */
- /* Intrinsics vcvttsh2si, vcvttsh2us. */
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvttsh_i32 (__m128h __A)
- {
- return (int)
- __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline unsigned
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvttsh_u32 (__m128h __A)
- {
- return (int)
- __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
- {
- return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
- }
- extern __inline unsigned
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
- {
- return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
- }
- #else
- #define _mm_cvtt_roundsh_i32(A, B) \
- ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
- #define _mm_cvtt_roundsh_u32(A, B) \
- ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
- #endif /* __OPTIMIZE__ */
- #ifdef __x86_64__
- extern __inline long long
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvttsh_i64 (__m128h __A)
- {
- return (long long)
- __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline unsigned long long
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvttsh_u64 (__m128h __A)
- {
- return (long long)
- __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline long long
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
- {
- return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
- }
- extern __inline unsigned long long
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
- {
- return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
- }
- #else
- #define _mm_cvtt_roundsh_i64(A, B) \
- ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
- #define _mm_cvtt_roundsh_u64(A, B) \
- ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
- #endif /* __OPTIMIZE__ */
- #endif /* __x86_64__ */
- /* Intrinsics vcvtsi2sh, vcvtusi2sh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvti32_sh (__m128h __A, int __B)
- {
- return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtu32_sh (__m128h __A, unsigned int __B)
- {
- return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
- {
- return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
- {
- return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
- }
- #else
- #define _mm_cvt_roundi32_sh(A, B, C) \
- (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
- #define _mm_cvt_roundu32_sh(A, B, C) \
- (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
- #endif /* __OPTIMIZE__ */
- #ifdef __x86_64__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvti64_sh (__m128h __A, long long __B)
- {
- return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtu64_sh (__m128h __A, unsigned long long __B)
- {
- return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
- {
- return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
- {
- return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
- }
- #else
- #define _mm_cvt_roundi64_sh(A, B, C) \
- (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
- #define _mm_cvt_roundu64_sh(A, B, C) \
- (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
- #endif /* __OPTIMIZE__ */
- #endif /* __x86_64__ */
- /* Intrinsics vcvtph2pd. */
- extern __inline __m512d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtph_pd (__m128h __A)
- {
- return __builtin_ia32_vcvtph2pd512_mask_round (__A,
- _mm512_setzero_pd (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C)
- {
- return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
- {
- return __builtin_ia32_vcvtph2pd512_mask_round (__B,
- _mm512_setzero_pd (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundph_pd (__m128h __A, int __B)
- {
- return __builtin_ia32_vcvtph2pd512_mask_round (__A,
- _mm512_setzero_pd (),
- (__mmask8) -1,
- __B);
- }
- extern __inline __m512d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D)
- {
- return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, __D);
- }
- extern __inline __m512d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C)
- {
- return __builtin_ia32_vcvtph2pd512_mask_round (__B,
- _mm512_setzero_pd (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvt_roundph_pd(A, B) \
- (__builtin_ia32_vcvtph2pd512_mask_round ((A), \
- _mm512_setzero_pd (), \
- (__mmask8)-1, \
- (B)))
- #define _mm512_mask_cvt_roundph_pd(A, B, C, D) \
- (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D)))
- #define _mm512_maskz_cvt_roundph_pd(A, B, C) \
- (__builtin_ia32_vcvtph2pd512_mask_round ((B), \
- _mm512_setzero_pd (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtph2psx. */
- extern __inline __m512
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtxph_ps (__m256h __A)
- {
- return __builtin_ia32_vcvtph2psx512_mask_round (__A,
- _mm512_setzero_ps (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C)
- {
- return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B)
- {
- return __builtin_ia32_vcvtph2psx512_mask_round (__B,
- _mm512_setzero_ps (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtx_roundph_ps (__m256h __A, int __B)
- {
- return __builtin_ia32_vcvtph2psx512_mask_round (__A,
- _mm512_setzero_ps (),
- (__mmask16) -1,
- __B);
- }
- extern __inline __m512
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D)
- {
- return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, __D);
- }
- extern __inline __m512
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C)
- {
- return __builtin_ia32_vcvtph2psx512_mask_round (__B,
- _mm512_setzero_ps (),
- __A,
- __C);
- }
- #else
- #define _mm512_cvtx_roundph_ps(A, B) \
- (__builtin_ia32_vcvtph2psx512_mask_round ((A), \
- _mm512_setzero_ps (), \
- (__mmask16)-1, \
- (B)))
- #define _mm512_mask_cvtx_roundph_ps(A, B, C, D) \
- (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D)))
- #define _mm512_maskz_cvtx_roundph_ps(A, B, C) \
- (__builtin_ia32_vcvtph2psx512_mask_round ((B), \
- _mm512_setzero_ps (), \
- (A), \
- (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtps2ph. */
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtxps_ph (__m512 __A)
- {
- return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
- _mm256_setzero_ph (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C)
- {
- return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
- __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B)
- {
- return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
- _mm256_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtx_roundps_ph (__m512 __A, int __B)
- {
- return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
- _mm256_setzero_ph (),
- (__mmask16) -1,
- __B);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D)
- {
- return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
- __A, __B, __D);
- }
- extern __inline __m256h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C)
- {
- return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
- _mm256_setzero_ph (),
- __A, __C);
- }
- #else
- #define _mm512_cvtx_roundps_ph(A, B) \
- (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A), \
- _mm256_setzero_ph (),\
- (__mmask16)-1, (B)))
- #define _mm512_mask_cvtx_roundps_ph(A, B, C, D) \
- (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C), \
- (A), (B), (D)))
- #define _mm512_maskz_cvtx_roundps_ph(A, B, C) \
- (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B), \
- _mm256_setzero_ph (),\
- (A), (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtpd2ph. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvtpd_ph (__m512d __A)
- {
- return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
- _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C)
- {
- return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
- __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B)
- {
- return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
- _mm_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_cvt_roundpd_ph (__m512d __A, int __B)
- {
- return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
- _mm_setzero_ph (),
- (__mmask8) -1,
- __B);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D)
- {
- return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
- __A, __B, __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C)
- {
- return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
- _mm_setzero_ph (),
- __A, __C);
- }
- #else
- #define _mm512_cvt_roundpd_ph(A, B) \
- (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A), \
- _mm_setzero_ph (), \
- (__mmask8)-1, (B)))
- #define _mm512_mask_cvt_roundpd_ph(A, B, C, D) \
- (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C), \
- (A), (B), (D)))
- #define _mm512_maskz_cvt_roundpd_ph(A, B, C) \
- (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B), \
- _mm_setzero_ph (), \
- (A), (C)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtsh2ss, vcvtsh2sd. */
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsh_ss (__m128 __A, __m128h __B)
- {
- return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
- _mm_setzero_ps (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
- __m128h __D)
- {
- return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
- __m128h __C)
- {
- return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
- _mm_setzero_ps (),
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsh_sd (__m128d __A, __m128h __B)
- {
- return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
- _mm_setzero_pd (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
- __m128h __D)
- {
- return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
- {
- return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
- _mm_setzero_pd (),
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
- {
- return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
- _mm_setzero_ps (),
- (__mmask8) -1, __R);
- }
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
- __m128h __D, const int __R)
- {
- return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
- }
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
- __m128h __C, const int __R)
- {
- return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
- _mm_setzero_ps (),
- __A, __R);
- }
- extern __inline __m128d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
- {
- return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
- _mm_setzero_pd (),
- (__mmask8) -1, __R);
- }
- extern __inline __m128d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
- __m128h __D, const int __R)
- {
- return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
- }
- extern __inline __m128d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
- {
- return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
- _mm_setzero_pd (),
- __A, __R);
- }
- #else
- #define _mm_cvt_roundsh_ss(A, B, R) \
- (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), \
- _mm_setzero_ps (), \
- (__mmask8) -1, (R)))
- #define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) \
- (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
- #define _mm_maskz_cvt_roundsh_ss(A, B, C, R) \
- (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), \
- _mm_setzero_ps (), \
- (A), (R)))
- #define _mm_cvt_roundsh_sd(A, B, R) \
- (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), \
- _mm_setzero_pd (), \
- (__mmask8) -1, (R)))
- #define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) \
- (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
- #define _mm_maskz_cvt_roundsh_sd(A, B, C, R) \
- (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), \
- _mm_setzero_pd (), \
- (A), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vcvtss2sh, vcvtsd2sh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtss_sh (__m128h __A, __m128 __B)
- {
- return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
- _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
- {
- return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
- {
- return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
- _mm_setzero_ph (),
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsd_sh (__m128h __A, __m128d __B)
- {
- return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
- _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
- {
- return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
- {
- return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
- _mm_setzero_ph (),
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
- {
- return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
- _mm_setzero_ph (),
- (__mmask8) -1, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
- const int __R)
- {
- return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
- const int __R)
- {
- return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
- _mm_setzero_ph (),
- __A, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
- {
- return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
- _mm_setzero_ph (),
- (__mmask8) -1, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
- const int __R)
- {
- return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
- const int __R)
- {
- return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
- _mm_setzero_ph (),
- __A, __R);
- }
- #else
- #define _mm_cvt_roundss_sh(A, B, R) \
- (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), \
- _mm_setzero_ph (), \
- (__mmask8) -1, R))
- #define _mm_mask_cvt_roundss_sh(A, B, C, D, R) \
- (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
- #define _mm_maskz_cvt_roundss_sh(A, B, C, R) \
- (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), \
- _mm_setzero_ph (), \
- A, R))
- #define _mm_cvt_roundsd_sh(A, B, R) \
- (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), \
- _mm_setzero_ph (), \
- (__mmask8) -1, R))
- #define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) \
- (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
- #define _mm_maskz_cvt_roundsd_sh(A, B, C, R) \
- (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), \
- _mm_setzero_ph (), \
- (A), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfmaddsub[132,213,231]ph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
- {
- return (__m512h)
- __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
- {
- return (__m512h)
- __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h)
- __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
- __mmask32 __U, const int __R)
- {
- return (__m512h)
- __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h)
- __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- #else
- #define _mm512_fmaddsub_round_ph(A, B, C, R) \
- ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R)))
- #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \
- ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R)))
- #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \
- ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R)))
- #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \
- ((__m512h)__builtin_ia32_vfmaddsubph512_maskz ((A), (B), (C), (U), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfmsubadd[132,213,231]ph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U,
- __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B,
- __m512h __C, __mmask32 __U)
- {
- return (__m512h)
- __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A,
- __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmsubadd_round_ph (__m512h __A, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h)
- __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h)
- __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
- __mmask32 __U, const int __R)
- {
- return (__m512h)
- __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h)
- __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- #else
- #define _mm512_fmsubadd_round_ph(A, B, C, R) \
- ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R)))
- #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \
- ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R)))
- #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \
- ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R)))
- #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \
- ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfmadd[132,213,231]ph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmadd_ph (__m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
- {
- return (__m512h)
- __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
- __mmask32 __U, const int __R)
- {
- return (__m512h) __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- #else
- #define _mm512_fmadd_round_ph(A, B, C, R) \
- ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R)))
- #define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \
- ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R)))
- #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \
- ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R)))
- #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \
- ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfnmadd[132,213,231]ph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fnmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
- {
- return (__m512h)
- __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fnmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fnmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
- __mmask32 __U, const int __R)
- {
- return (__m512h) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fnmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- #else
- #define _mm512_fnmadd_round_ph(A, B, C, R) \
- ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R)))
- #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \
- ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R)))
- #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \
- ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R)))
- #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \
- ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfmsub[132,213,231]ph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmsub_ph (__m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
- {
- return (__m512h)
- __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
- __mmask32 __U, const int __R)
- {
- return (__m512h) __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- #else
- #define _mm512_fmsub_round_ph(A, B, C, R) \
- ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R)))
- #define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \
- ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R)))
- #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \
- ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R)))
- #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \
- ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfnmsub[132,213,231]ph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fnmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
- {
- return (__m512h)
- __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fnmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) -1, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fnmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
- __mmask32 __U, const int __R)
- {
- return (__m512h) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
- __m512h __C, const int __R)
- {
- return (__m512h) __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- (__mmask32) __U, __R);
- }
- #else
- #define _mm512_fnmsub_round_ph(A, B, C, R) \
- ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R)))
- #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \
- ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R)))
- #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \
- ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R)))
- #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \
- ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfmadd[132,213,231]sh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) -1,
- __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
- const int __R)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
- const int __R)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
- __m128h __B, const int __R)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
- }
- #else
- #define _mm_fmadd_round_sh(A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
- #define _mm_mask_fmadd_round_sh(A, U, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
- #define _mm_mask3_fmadd_round_sh(A, B, C, U, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
- #define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfnmadd[132,213,231]sh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
- {
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
- {
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) -1,
- __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
- const int __R)
- {
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
- const int __R)
- {
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
- __m128h __B, const int __R)
- {
- return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
- }
- #else
- #define _mm_fnmadd_round_sh(A, B, C, R) \
- ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
- #define _mm_mask_fnmadd_round_sh(A, U, B, C, R) \
- ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
- #define _mm_mask3_fnmadd_round_sh(A, B, C, U, R) \
- ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
- #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
- ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfmsub[132,213,231]sh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
- {
- return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) -1,
- __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
- const int __R)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
- const int __R)
- {
- return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
- __m128h __B, const int __R)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U, __R);
- }
- #else
- #define _mm_fmsub_round_sh(A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
- #define _mm_mask_fmsub_round_sh(A, U, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
- #define _mm_mask3_fmsub_round_sh(A, B, C, U, R) \
- ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
- #define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vfnmsub[132,213,231]sh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
- {
- return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
- -(__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) -1,
- __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
- const int __R)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
- const int __R)
- {
- return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
- -(__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
- __m128h __B, const int __R)
- {
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U, __R);
- }
- #else
- #define _mm_fnmsub_round_sh(A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
- #define _mm_mask_fnmsub_round_sh(A, U, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
- #define _mm_mask3_fnmsub_round_sh(A, B, C, U, R) \
- ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
- #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vf[,c]maddcph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fcmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
- {
- return (__m512h)
- __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
- (__v32hf) __C,
- (__v32hf) __D, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
- {
- return (__m512h)
- __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- __D, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fcmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
- {
- return (__m512h)
- __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
- (__v32hf) __C,
- (__v32hf) __D,
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmadd_pch (__m512h __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
- {
- return (__m512h)
- __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
- (__v32hf) __C,
- (__v32hf) __D, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
- {
- return (__m512h)
- __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- __D, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
- {
- return (__m512h)
- __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
- (__v32hf) __C,
- (__v32hf) __D,
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
- {
- return (__m512h)
- __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- __D);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fcmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return (__m512h)
- __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
- (__v32hf) __C,
- (__v32hf) __D, __B,
- __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
- __mmask16 __D, const int __E)
- {
- return (__m512h)
- __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- __D, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fcmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return (__m512h)
- __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
- (__v32hf) __C,
- (__v32hf) __D,
- __A, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
- {
- return (__m512h)
- __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- __D);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return (__m512h)
- __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
- (__v32hf) __C,
- (__v32hf) __D, __B,
- __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask3_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
- __mmask16 __D, const int __E)
- {
- return (__m512h)
- __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
- (__v32hf) __B,
- (__v32hf) __C,
- __D, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return (__m512h)
- __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
- (__v32hf) __C,
- (__v32hf) __D,
- __A, __E);
- }
- #else
- #define _mm512_fcmadd_round_pch(A, B, C, D) \
- (__m512h) __builtin_ia32_vfcmaddcph512_round ((A), (B), (C), (D))
- #define _mm512_mask_fcmadd_round_pch(A, B, C, D, E) \
- ((__m512h) \
- __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) (A), \
- (__v32hf) (C), \
- (__v32hf) (D), \
- (B), (E)))
- #define _mm512_mask3_fcmadd_round_pch(A, B, C, D, E) \
- ((__m512h) \
- __builtin_ia32_vfcmaddcph512_mask3_round ((A), (B), (C), (D), (E)))
- #define _mm512_maskz_fcmadd_round_pch(A, B, C, D, E) \
- (__m512h) \
- __builtin_ia32_vfcmaddcph512_maskz_round ((B), (C), (D), (A), (E))
- #define _mm512_fmadd_round_pch(A, B, C, D) \
- (__m512h) __builtin_ia32_vfmaddcph512_round ((A), (B), (C), (D))
- #define _mm512_mask_fmadd_round_pch(A, B, C, D, E) \
- ((__m512h) \
- __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) (A), \
- (__v32hf) (C), \
- (__v32hf) (D), \
- (B), (E)))
- #define _mm512_mask3_fmadd_round_pch(A, B, C, D, E) \
- (__m512h) \
- __builtin_ia32_vfmaddcph512_mask3_round ((A), (B), (C), (D), (E))
- #define _mm512_maskz_fmadd_round_pch(A, B, C, D, E) \
- (__m512h) \
- __builtin_ia32_vfmaddcph512_maskz_round ((B), (C), (D), (A), (E))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vf[,c]mulcph. */
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fcmul_pch (__m512h __A, __m512h __B)
- {
- return (__m512h)
- __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
- (__v32hf) __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fcmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
- {
- return (__m512h)
- __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
- (__v32hf) __D,
- (__v32hf) __A,
- __B, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fcmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
- (__v32hf) __C,
- _mm512_setzero_ph (),
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmul_pch (__m512h __A, __m512h __B)
- {
- return (__m512h)
- __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
- (__v32hf) __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
- {
- return (__m512h)
- __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
- (__v32hf) __D,
- (__v32hf) __A,
- __B, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
- {
- return (__m512h)
- __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
- (__v32hf) __C,
- _mm512_setzero_ph (),
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fcmul_round_pch (__m512h __A, __m512h __B, const int __D)
- {
- return (__m512h)
- __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
- (__v32hf) __B, __D);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fcmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return (__m512h)
- __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
- (__v32hf) __D,
- (__v32hf) __A,
- __B, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fcmul_round_pch (__mmask16 __A, __m512h __B,
- __m512h __C, const int __E)
- {
- return (__m512h)
- __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
- (__v32hf) __C,
- _mm512_setzero_ph (),
- __A, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_fmul_round_pch (__m512h __A, __m512h __B, const int __D)
- {
- return (__m512h)
- __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
- (__v32hf) __B,
- __D);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_fmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
- __m512h __D, const int __E)
- {
- return (__m512h)
- __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
- (__v32hf) __D,
- (__v32hf) __A,
- __B, __E);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_maskz_fmul_round_pch (__mmask16 __A, __m512h __B,
- __m512h __C, const int __E)
- {
- return (__m512h)
- __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
- (__v32hf) __C,
- _mm512_setzero_ph (),
- __A, __E);
- }
- #else
- #define _mm512_fcmul_round_pch(A, B, D) \
- (__m512h) __builtin_ia32_vfcmulcph512_round ((A), (B), (D))
- #define _mm512_mask_fcmul_round_pch(A, B, C, D, E) \
- (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((C), (D), (A), (B), (E))
- #define _mm512_maskz_fcmul_round_pch(A, B, C, E) \
- (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((B), (C), \
- (__v32hf) \
- _mm512_setzero_ph (), \
- (A), (E))
- #define _mm512_fmul_round_pch(A, B, D) \
- (__m512h) __builtin_ia32_vfmulcph512_round ((A), (B), (D))
- #define _mm512_mask_fmul_round_pch(A, B, C, D, E) \
- (__m512h) __builtin_ia32_vfmulcph512_mask_round ((C), (D), (A), (B), (E))
- #define _mm512_maskz_fmul_round_pch(A, B, C, E) \
- (__m512h) __builtin_ia32_vfmulcph512_mask_round ((B), (C), \
- (__v32hf) \
- _mm512_setzero_ph (), \
- (A), (E))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vf[,c]maddcsh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
- (__v8hf) __C,
- (__v8hf) __D, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
- {
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C, __D,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
- {
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
- (__v8hf) __C,
- (__v8hf) __D,
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C)
- {
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return (__m128h)
- __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
- (__v8hf) __C,
- (__v8hf) __D, __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
- {
- return (__m128h)
- __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C, __D,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
- {
- return (__m128h)
- __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
- (__v8hf) __C,
- (__v8hf) __D,
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C)
- {
- return (__m128h)
- __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
- (__v8hf) __C,
- (__v8hf) __D,
- __B, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
- __mmask8 __D, const int __E)
- {
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- __D, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
- (__v8hf) __C,
- (__v8hf) __D,
- __A, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
- {
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return (__m128h)
- __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
- (__v8hf) __C,
- (__v8hf) __D,
- __B, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
- __mmask8 __D, const int __E)
- {
- return (__m128h)
- __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- __D, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return (__m128h)
- __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
- (__v8hf) __C,
- (__v8hf) __D,
- __A, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
- {
- return (__m128h)
- __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- __D);
- }
- #else
- #define _mm_mask_fcmadd_round_sch(A, B, C, D, E) \
- ((__m128h) \
- __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A), \
- (__v8hf) (C), \
- (__v8hf) (D), \
- (B), (E)))
- #define _mm_mask3_fcmadd_round_sch(A, B, C, D, E) \
- ((__m128h) \
- __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A), \
- (__v8hf) (B), \
- (__v8hf) (C), \
- (D), (E)))
- #define _mm_maskz_fcmadd_round_sch(A, B, C, D, E) \
- __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E))
- #define _mm_fcmadd_round_sch(A, B, C, D) \
- __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D))
- #define _mm_mask_fmadd_round_sch(A, B, C, D, E) \
- ((__m128h) \
- __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A), \
- (__v8hf) (C), \
- (__v8hf) (D), \
- (B), (E)))
- #define _mm_mask3_fmadd_round_sch(A, B, C, D, E) \
- ((__m128h) \
- __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A), \
- (__v8hf) (B), \
- (__v8hf) (C), \
- (D), (E)))
- #define _mm_maskz_fmadd_round_sch(A, B, C, D, E) \
- __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E))
- #define _mm_fmadd_round_sch(A, B, C, D) \
- __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D))
- #endif /* __OPTIMIZE__ */
- /* Intrinsics vf[,c]mulcsh. */
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fcmul_sch (__m128h __A, __m128h __B)
- {
- return (__m128h)
- __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return (__m128h)
- __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
- (__v8hf) __D,
- (__v8hf) __A,
- __B, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return (__m128h)
- __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
- (__v8hf) __C,
- _mm_setzero_ph (),
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fmul_sch (__m128h __A, __m128h __B)
- {
- return (__m128h)
- __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
- {
- return (__m128h)
- __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
- (__v8hf) __D,
- (__v8hf) __A,
- __B, _MM_FROUND_CUR_DIRECTION);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
- {
- return (__m128h)
- __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
- (__v8hf) __C,
- _mm_setzero_ph (),
- __A, _MM_FROUND_CUR_DIRECTION);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D)
- {
- return (__m128h)
- __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return (__m128h)
- __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
- (__v8hf) __D,
- (__v8hf) __A,
- __B, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
- const int __E)
- {
- return (__m128h)
- __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
- (__v8hf) __C,
- _mm_setzero_ph (),
- __A, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D)
- {
- return (__m128h)
- __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
- (__v8hf) __B, __D);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
- {
- return (__m128h)
- __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
- (__v8hf) __D,
- (__v8hf) __A,
- __B, __E);
- }
- extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
- {
- return (__m128h)
- __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
- (__v8hf) __C,
- _mm_setzero_ph (),
- __A, __E);
- }
- #else
- #define _mm_fcmul_round_sch(__A, __B, __D) \
- (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, \
- (__v8hf) __B, __D)
- #define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E) \
- (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, \
- (__v8hf) __D, \
- (__v8hf) __A, \
- __B, __E)
- #define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E) \
- (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, \
- (__v8hf) __C, \
- _mm_setzero_ph (), \
- __A, __E)
- #define _mm_fmul_round_sch(__A, __B, __D) \
- (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A, \
- (__v8hf) __B, __D)
- #define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E) \
- (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, \
- (__v8hf) __D, \
- (__v8hf) __A, \
- __B, __E)
- #define _mm_maskz_fmul_round_sch(__A, __B, __C, __E) \
- (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, \
- (__v8hf) __C, \
- _mm_setzero_ph (), \
- __A, __E)
- #endif /* __OPTIMIZE__ */
- #define _MM512_REDUCE_OP(op) \
- __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
- __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
- __m256h __T3 = (__T1 op __T2); \
- __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
- __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
- __m128h __T6 = (__T4 op __T5); \
- __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
- (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \
- __m128h __T8 = (__T6 op __T7); \
- __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
- (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); \
- __m128h __T10 = __T8 op __T9; \
- return __T10[0] op __T10[1]
- // TODO reduce
- extern __inline _Float16
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_reduce_add_ph (__m512h __A)
- {
- _MM512_REDUCE_OP (+);
- }
- extern __inline _Float16
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_reduce_mul_ph (__m512h __A)
- {
- _MM512_REDUCE_OP (*);
- }
- #undef _MM512_REDUCE_OP
- #ifdef __AVX512VL__
- #define _MM512_REDUCE_OP(op) \
- __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
- __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
- __m256h __T3 = __builtin_ia32_##op##ph256_mask (__T1, __T2, \
- _mm256_setzero_ph (), (__mmask16) -1); \
- __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
- __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
- __m128h __T6 = __builtin_ia32_##op##ph128_mask \
- (__T4, __T5, _mm_setzero_ph (),(__mmask8) -1); \
- __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
- (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \
- __m128h __T8 = (__m128h) __builtin_ia32_##op##ph128_mask \
- (__T6, __T7, _mm_setzero_ph (),(__mmask8) -1); \
- __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
- (__v8hi) { 4, 5 }); \
- __m128h __T10 = __builtin_ia32_##op##ph128_mask \
- (__T8, __T9, _mm_setzero_ph (),(__mmask8) -1); \
- __m128h __T11 = (__m128h) __builtin_shuffle (__T10, \
- (__v8hi) { 1, 0 }); \
- __m128h __T12 = __builtin_ia32_##op##ph128_mask \
- (__T10, __T11, _mm_setzero_ph (),(__mmask8) -1); \
- return __T12[0]
- #else
- #define _MM512_REDUCE_OP(op) \
- __m512h __T1 = (__m512h) __builtin_shuffle ((__m512d) __A, \
- (__v8di) { 4, 5, 6, 7, 0, 0, 0, 0 }); \
- __m512h __T2 = _mm512_##op##_ph (__A, __T1); \
- __m512h __T3 = (__m512h) __builtin_shuffle ((__m512d) __T2, \
- (__v8di) { 2, 3, 0, 0, 0, 0, 0, 0 }); \
- __m512h __T4 = _mm512_##op##_ph (__T2, __T3); \
- __m512h __T5 = (__m512h) __builtin_shuffle ((__m512d) __T4, \
- (__v8di) { 1, 0, 0, 0, 0, 0, 0, 0 }); \
- __m512h __T6 = _mm512_##op##_ph (__T4, __T5); \
- __m512h __T7 = (__m512h) __builtin_shuffle ((__m512) __T6, \
- (__v16si) { 1, 0, 0, 0, 0, 0, 0, 0, \
- 0, 0, 0, 0, 0, 0, 0, 0 }); \
- __m512h __T8 = _mm512_##op##_ph (__T6, __T7); \
- __m512h __T9 = (__m512h) __builtin_shuffle (__T8, \
- (__v32hi) { 1, 0, 0, 0, 0, 0, 0, 0, \
- 0, 0, 0, 0, 0, 0, 0, 0, \
- 0, 0, 0, 0, 0, 0, 0, 0, \
- 0, 0, 0, 0, 0, 0, 0, 0 }); \
- __m512h __T10 = _mm512_##op##_ph (__T8, __T9); \
- return __T10[0]
- #endif
- extern __inline _Float16
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_reduce_min_ph (__m512h __A)
- {
- _MM512_REDUCE_OP (min);
- }
- extern __inline _Float16
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_reduce_max_ph (__m512h __A)
- {
- _MM512_REDUCE_OP (max);
- }
- #undef _MM512_REDUCE_OP
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_mask_blend_ph (__mmask32 __U, __m512h __A, __m512h __W)
- {
- return (__m512h) __builtin_ia32_movdquhi512_mask ((__v32hi) __W,
- (__v32hi) __A,
- (__mmask32) __U);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_permutex2var_ph (__m512h __A, __m512i __I, __m512h __B)
- {
- return (__m512h) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
- (__v32hi) __I,
- (__v32hi) __B,
- (__mmask32)-1);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_permutexvar_ph (__m512i __A, __m512h __B)
- {
- return (__m512h) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
- (__v32hi) __A,
- (__v32hi)
- (_mm512_setzero_ph ()),
- (__mmask32)-1);
- }
- extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm512_set1_pch (_Float16 _Complex __A)
- {
- union
- {
- _Float16 _Complex a;
- float b;
- } u = { .a = __A};
- return (__m512h) _mm512_set1_ps (u.b);
- }
- // intrinsics below are alias for f*mul_*ch
- #define _mm512_mul_pch(A, B) _mm512_fmul_pch ((A), (B))
- #define _mm512_mask_mul_pch(W, U, A, B) \
- _mm512_mask_fmul_pch ((W), (U), (A), (B))
- #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B))
- #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R))
- #define _mm512_mask_mul_round_pch(W, U, A, B, R) \
- _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R))
- #define _mm512_maskz_mul_round_pch(U, A, B, R) \
- _mm512_maskz_fmul_round_pch ((U), (A), (B), (R))
- #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B))
- #define _mm512_mask_cmul_pch(W, U, A, B) \
- _mm512_mask_fcmul_pch ((W), (U), (A), (B))
- #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B))
- #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), (R))
- #define _mm512_mask_cmul_round_pch(W, U, A, B, R) \
- _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
- #define _mm512_maskz_cmul_round_pch(U, A, B, R) \
- _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
- #define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B))
- #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B))
- #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B))
- #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R))
- #define _mm_mask_mul_round_sch(W, U, A, B, R) \
- _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R))
- #define _mm_maskz_mul_round_sch(U, A, B, R) \
- _mm_maskz_fmul_round_sch ((U), (A), (B), (R))
- #define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B))
- #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B))
- #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B))
- #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R))
- #define _mm_mask_cmul_round_sch(W, U, A, B, R) \
- _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R))
- #define _mm_maskz_cmul_round_sch(U, A, B, R) \
- _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
- #ifdef __DISABLE_AVX512FP16__
- #undef __DISABLE_AVX512FP16__
- #pragma GCC pop_options
- #endif /* __DISABLE_AVX512FP16__ */
- #endif /* __AVX512FP16INTRIN_H_INCLUDED */
|