avx512fintrin.h 499 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949
  1. /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
  2. This file is part of GCC.
  3. GCC is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. GCC is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. Under Section 7 of GPL version 3, you are granted additional
  12. permissions described in the GCC Runtime Library Exception, version
  13. 3.1, as published by the Free Software Foundation.
  14. You should have received a copy of the GNU General Public License and
  15. a copy of the GCC Runtime Library Exception along with this program;
  16. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  17. <http://www.gnu.org/licenses/>. */
  18. #ifndef _IMMINTRIN_H_INCLUDED
  19. #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
  20. #endif
  21. #ifndef _AVX512FINTRIN_H_INCLUDED
  22. #define _AVX512FINTRIN_H_INCLUDED
  23. #ifndef __AVX512F__
  24. #pragma GCC push_options
  25. #pragma GCC target("avx512f")
  26. #define __DISABLE_AVX512F__
  27. #endif /* __AVX512F__ */
  28. /* Internal data types for implementing the intrinsics. */
  29. typedef double __v8df __attribute__ ((__vector_size__ (64)));
  30. typedef float __v16sf __attribute__ ((__vector_size__ (64)));
  31. typedef long long __v8di __attribute__ ((__vector_size__ (64)));
  32. typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
  33. typedef int __v16si __attribute__ ((__vector_size__ (64)));
  34. typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
  35. typedef short __v32hi __attribute__ ((__vector_size__ (64)));
  36. typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
  37. typedef char __v64qi __attribute__ ((__vector_size__ (64)));
  38. typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
  39. /* The Intel API is flexible enough that we must allow aliasing with other
  40. vector types, and their scalar components. */
  41. typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
  42. typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
  43. typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
  44. /* Unaligned version of the same type. */
  45. typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
  46. typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
  47. typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
  48. typedef unsigned char __mmask8;
  49. typedef unsigned short __mmask16;
  50. extern __inline __mmask16
  51. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  52. _mm512_int2mask (int __M)
  53. {
  54. return (__mmask16) __M;
  55. }
  56. extern __inline int
  57. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  58. _mm512_mask2int (__mmask16 __M)
  59. {
  60. return (int) __M;
  61. }
  62. extern __inline __m512i
  63. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  64. _mm512_set_epi64 (long long __A, long long __B, long long __C,
  65. long long __D, long long __E, long long __F,
  66. long long __G, long long __H)
  67. {
  68. return __extension__ (__m512i) (__v8di)
  69. { __H, __G, __F, __E, __D, __C, __B, __A };
  70. }
  71. /* Create the vector [A B C D E F G H I J K L M N O P]. */
  72. extern __inline __m512i
  73. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  74. _mm512_set_epi32 (int __A, int __B, int __C, int __D,
  75. int __E, int __F, int __G, int __H,
  76. int __I, int __J, int __K, int __L,
  77. int __M, int __N, int __O, int __P)
  78. {
  79. return __extension__ (__m512i)(__v16si)
  80. { __P, __O, __N, __M, __L, __K, __J, __I,
  81. __H, __G, __F, __E, __D, __C, __B, __A };
  82. }
  83. extern __inline __m512i
  84. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  85. _mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28,
  86. short __q27, short __q26, short __q25, short __q24,
  87. short __q23, short __q22, short __q21, short __q20,
  88. short __q19, short __q18, short __q17, short __q16,
  89. short __q15, short __q14, short __q13, short __q12,
  90. short __q11, short __q10, short __q09, short __q08,
  91. short __q07, short __q06, short __q05, short __q04,
  92. short __q03, short __q02, short __q01, short __q00)
  93. {
  94. return __extension__ (__m512i)(__v32hi){
  95. __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
  96. __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
  97. __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
  98. __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
  99. };
  100. }
  101. extern __inline __m512i
  102. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  103. _mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60,
  104. char __q59, char __q58, char __q57, char __q56,
  105. char __q55, char __q54, char __q53, char __q52,
  106. char __q51, char __q50, char __q49, char __q48,
  107. char __q47, char __q46, char __q45, char __q44,
  108. char __q43, char __q42, char __q41, char __q40,
  109. char __q39, char __q38, char __q37, char __q36,
  110. char __q35, char __q34, char __q33, char __q32,
  111. char __q31, char __q30, char __q29, char __q28,
  112. char __q27, char __q26, char __q25, char __q24,
  113. char __q23, char __q22, char __q21, char __q20,
  114. char __q19, char __q18, char __q17, char __q16,
  115. char __q15, char __q14, char __q13, char __q12,
  116. char __q11, char __q10, char __q09, char __q08,
  117. char __q07, char __q06, char __q05, char __q04,
  118. char __q03, char __q02, char __q01, char __q00)
  119. {
  120. return __extension__ (__m512i)(__v64qi){
  121. __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
  122. __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
  123. __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
  124. __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31,
  125. __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39,
  126. __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47,
  127. __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55,
  128. __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63
  129. };
  130. }
  131. extern __inline __m512d
  132. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  133. _mm512_set_pd (double __A, double __B, double __C, double __D,
  134. double __E, double __F, double __G, double __H)
  135. {
  136. return __extension__ (__m512d)
  137. { __H, __G, __F, __E, __D, __C, __B, __A };
  138. }
  139. extern __inline __m512
  140. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  141. _mm512_set_ps (float __A, float __B, float __C, float __D,
  142. float __E, float __F, float __G, float __H,
  143. float __I, float __J, float __K, float __L,
  144. float __M, float __N, float __O, float __P)
  145. {
  146. return __extension__ (__m512)
  147. { __P, __O, __N, __M, __L, __K, __J, __I,
  148. __H, __G, __F, __E, __D, __C, __B, __A };
  149. }
  150. #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
  151. _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
  152. #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
  153. e8,e9,e10,e11,e12,e13,e14,e15) \
  154. _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
  155. #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
  156. _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
  157. #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
  158. _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
  159. extern __inline __m512
  160. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  161. _mm512_undefined_ps (void)
  162. {
  163. __m512 __Y = __Y;
  164. return __Y;
  165. }
  166. #define _mm512_undefined _mm512_undefined_ps
  167. extern __inline __m512d
  168. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  169. _mm512_undefined_pd (void)
  170. {
  171. __m512d __Y = __Y;
  172. return __Y;
  173. }
  174. extern __inline __m512i
  175. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  176. _mm512_undefined_epi32 (void)
  177. {
  178. __m512i __Y = __Y;
  179. return __Y;
  180. }
  181. #define _mm512_undefined_si512 _mm512_undefined_epi32
  182. extern __inline __m512i
  183. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  184. _mm512_set1_epi8 (char __A)
  185. {
  186. return __extension__ (__m512i)(__v64qi)
  187. { __A, __A, __A, __A, __A, __A, __A, __A,
  188. __A, __A, __A, __A, __A, __A, __A, __A,
  189. __A, __A, __A, __A, __A, __A, __A, __A,
  190. __A, __A, __A, __A, __A, __A, __A, __A,
  191. __A, __A, __A, __A, __A, __A, __A, __A,
  192. __A, __A, __A, __A, __A, __A, __A, __A,
  193. __A, __A, __A, __A, __A, __A, __A, __A,
  194. __A, __A, __A, __A, __A, __A, __A, __A };
  195. }
  196. extern __inline __m512i
  197. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  198. _mm512_set1_epi16 (short __A)
  199. {
  200. return __extension__ (__m512i)(__v32hi)
  201. { __A, __A, __A, __A, __A, __A, __A, __A,
  202. __A, __A, __A, __A, __A, __A, __A, __A,
  203. __A, __A, __A, __A, __A, __A, __A, __A,
  204. __A, __A, __A, __A, __A, __A, __A, __A };
  205. }
  206. extern __inline __m512d
  207. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  208. _mm512_set1_pd (double __A)
  209. {
  210. return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
  211. (__v2df) { __A, },
  212. (__v8df)
  213. _mm512_undefined_pd (),
  214. (__mmask8) -1);
  215. }
  216. extern __inline __m512
  217. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  218. _mm512_set1_ps (float __A)
  219. {
  220. return (__m512) __builtin_ia32_broadcastss512 (__extension__
  221. (__v4sf) { __A, },
  222. (__v16sf)
  223. _mm512_undefined_ps (),
  224. (__mmask16) -1);
  225. }
  226. /* Create the vector [A B C D A B C D A B C D A B C D]. */
  227. extern __inline __m512i
  228. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  229. _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
  230. {
  231. return __extension__ (__m512i)(__v16si)
  232. { __D, __C, __B, __A, __D, __C, __B, __A,
  233. __D, __C, __B, __A, __D, __C, __B, __A };
  234. }
  235. extern __inline __m512i
  236. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  237. _mm512_set4_epi64 (long long __A, long long __B, long long __C,
  238. long long __D)
  239. {
  240. return __extension__ (__m512i) (__v8di)
  241. { __D, __C, __B, __A, __D, __C, __B, __A };
  242. }
  243. extern __inline __m512d
  244. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  245. _mm512_set4_pd (double __A, double __B, double __C, double __D)
  246. {
  247. return __extension__ (__m512d)
  248. { __D, __C, __B, __A, __D, __C, __B, __A };
  249. }
  250. extern __inline __m512
  251. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  252. _mm512_set4_ps (float __A, float __B, float __C, float __D)
  253. {
  254. return __extension__ (__m512)
  255. { __D, __C, __B, __A, __D, __C, __B, __A,
  256. __D, __C, __B, __A, __D, __C, __B, __A };
  257. }
  258. #define _mm512_setr4_epi64(e0,e1,e2,e3) \
  259. _mm512_set4_epi64(e3,e2,e1,e0)
  260. #define _mm512_setr4_epi32(e0,e1,e2,e3) \
  261. _mm512_set4_epi32(e3,e2,e1,e0)
  262. #define _mm512_setr4_pd(e0,e1,e2,e3) \
  263. _mm512_set4_pd(e3,e2,e1,e0)
  264. #define _mm512_setr4_ps(e0,e1,e2,e3) \
  265. _mm512_set4_ps(e3,e2,e1,e0)
  266. extern __inline __m512
  267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  268. _mm512_setzero_ps (void)
  269. {
  270. return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
  271. 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
  272. }
  273. extern __inline __m512
  274. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  275. _mm512_setzero (void)
  276. {
  277. return _mm512_setzero_ps ();
  278. }
  279. extern __inline __m512d
  280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  281. _mm512_setzero_pd (void)
  282. {
  283. return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
  284. }
  285. extern __inline __m512i
  286. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  287. _mm512_setzero_epi32 (void)
  288. {
  289. return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
  290. }
  291. extern __inline __m512i
  292. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  293. _mm512_setzero_si512 (void)
  294. {
  295. return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
  296. }
  297. extern __inline __m512d
  298. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  299. _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
  300. {
  301. return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
  302. (__v8df) __W,
  303. (__mmask8) __U);
  304. }
  305. extern __inline __m512d
  306. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  307. _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
  308. {
  309. return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
  310. (__v8df)
  311. _mm512_setzero_pd (),
  312. (__mmask8) __U);
  313. }
  314. extern __inline __m512
  315. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  316. _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
  317. {
  318. return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
  319. (__v16sf) __W,
  320. (__mmask16) __U);
  321. }
  322. extern __inline __m512
  323. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  324. _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
  325. {
  326. return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
  327. (__v16sf)
  328. _mm512_setzero_ps (),
  329. (__mmask16) __U);
  330. }
  331. extern __inline __m512d
  332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  333. _mm512_load_pd (void const *__P)
  334. {
  335. return *(__m512d *) __P;
  336. }
  337. extern __inline __m512d
  338. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  339. _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
  340. {
  341. return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
  342. (__v8df) __W,
  343. (__mmask8) __U);
  344. }
  345. extern __inline __m512d
  346. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  347. _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
  348. {
  349. return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
  350. (__v8df)
  351. _mm512_setzero_pd (),
  352. (__mmask8) __U);
  353. }
  354. extern __inline void
  355. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  356. _mm512_store_pd (void *__P, __m512d __A)
  357. {
  358. *(__m512d *) __P = __A;
  359. }
  360. extern __inline void
  361. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  362. _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
  363. {
  364. __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
  365. (__mmask8) __U);
  366. }
  367. extern __inline __m512
  368. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  369. _mm512_load_ps (void const *__P)
  370. {
  371. return *(__m512 *) __P;
  372. }
  373. extern __inline __m512
  374. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  375. _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
  376. {
  377. return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
  378. (__v16sf) __W,
  379. (__mmask16) __U);
  380. }
  381. extern __inline __m512
  382. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  383. _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
  384. {
  385. return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
  386. (__v16sf)
  387. _mm512_setzero_ps (),
  388. (__mmask16) __U);
  389. }
  390. extern __inline void
  391. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  392. _mm512_store_ps (void *__P, __m512 __A)
  393. {
  394. *(__m512 *) __P = __A;
  395. }
  396. extern __inline void
  397. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  398. _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
  399. {
  400. __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
  401. (__mmask16) __U);
  402. }
  403. extern __inline __m512i
  404. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  405. _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
  406. {
  407. return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
  408. (__v8di) __W,
  409. (__mmask8) __U);
  410. }
  411. extern __inline __m512i
  412. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  413. _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
  414. {
  415. return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
  416. (__v8di)
  417. _mm512_setzero_si512 (),
  418. (__mmask8) __U);
  419. }
  420. extern __inline __m512i
  421. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  422. _mm512_load_epi64 (void const *__P)
  423. {
  424. return *(__m512i *) __P;
  425. }
  426. extern __inline __m512i
  427. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  428. _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
  429. {
  430. return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
  431. (__v8di) __W,
  432. (__mmask8) __U);
  433. }
  434. extern __inline __m512i
  435. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  436. _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
  437. {
  438. return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
  439. (__v8di)
  440. _mm512_setzero_si512 (),
  441. (__mmask8) __U);
  442. }
  443. extern __inline void
  444. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  445. _mm512_store_epi64 (void *__P, __m512i __A)
  446. {
  447. *(__m512i *) __P = __A;
  448. }
  449. extern __inline void
  450. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  451. _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
  452. {
  453. __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
  454. (__mmask8) __U);
  455. }
  456. extern __inline __m512i
  457. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  458. _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
  459. {
  460. return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
  461. (__v16si) __W,
  462. (__mmask16) __U);
  463. }
  464. extern __inline __m512i
  465. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  466. _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
  467. {
  468. return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
  469. (__v16si)
  470. _mm512_setzero_si512 (),
  471. (__mmask16) __U);
  472. }
  473. extern __inline __m512i
  474. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  475. _mm512_load_si512 (void const *__P)
  476. {
  477. return *(__m512i *) __P;
  478. }
  479. extern __inline __m512i
  480. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  481. _mm512_load_epi32 (void const *__P)
  482. {
  483. return *(__m512i *) __P;
  484. }
  485. extern __inline __m512i
  486. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  487. _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
  488. {
  489. return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
  490. (__v16si) __W,
  491. (__mmask16) __U);
  492. }
  493. extern __inline __m512i
  494. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  495. _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
  496. {
  497. return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
  498. (__v16si)
  499. _mm512_setzero_si512 (),
  500. (__mmask16) __U);
  501. }
  502. extern __inline void
  503. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  504. _mm512_store_si512 (void *__P, __m512i __A)
  505. {
  506. *(__m512i *) __P = __A;
  507. }
  508. extern __inline void
  509. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  510. _mm512_store_epi32 (void *__P, __m512i __A)
  511. {
  512. *(__m512i *) __P = __A;
  513. }
  514. extern __inline void
  515. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  516. _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
  517. {
  518. __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
  519. (__mmask16) __U);
  520. }
  521. extern __inline __m512i
  522. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  523. _mm512_mullo_epi32 (__m512i __A, __m512i __B)
  524. {
  525. return (__m512i) ((__v16su) __A * (__v16su) __B);
  526. }
  527. extern __inline __m512i
  528. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  529. _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
  530. {
  531. return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
  532. (__v16si) __B,
  533. (__v16si)
  534. _mm512_setzero_si512 (),
  535. __M);
  536. }
  537. extern __inline __m512i
  538. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  539. _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
  540. {
  541. return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
  542. (__v16si) __B,
  543. (__v16si) __W, __M);
  544. }
  545. extern __inline __m512i
  546. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  547. _mm512_mullox_epi64 (__m512i __A, __m512i __B)
  548. {
  549. return (__m512i) ((__v8du) __A * (__v8du) __B);
  550. }
  551. extern __inline __m512i
  552. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  553. _mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
  554. {
  555. return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
  556. }
  557. extern __inline __m512i
  558. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  559. _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
  560. {
  561. return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
  562. (__v16si) __Y,
  563. (__v16si)
  564. _mm512_undefined_epi32 (),
  565. (__mmask16) -1);
  566. }
  567. extern __inline __m512i
  568. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  569. _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
  570. {
  571. return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
  572. (__v16si) __Y,
  573. (__v16si) __W,
  574. (__mmask16) __U);
  575. }
  576. extern __inline __m512i
  577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  578. _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
  579. {
  580. return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
  581. (__v16si) __Y,
  582. (__v16si)
  583. _mm512_setzero_si512 (),
  584. (__mmask16) __U);
  585. }
  586. extern __inline __m512i
  587. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  588. _mm512_srav_epi32 (__m512i __X, __m512i __Y)
  589. {
  590. return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
  591. (__v16si) __Y,
  592. (__v16si)
  593. _mm512_undefined_epi32 (),
  594. (__mmask16) -1);
  595. }
  596. extern __inline __m512i
  597. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  598. _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
  599. {
  600. return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
  601. (__v16si) __Y,
  602. (__v16si) __W,
  603. (__mmask16) __U);
  604. }
  605. extern __inline __m512i
  606. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  607. _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
  608. {
  609. return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
  610. (__v16si) __Y,
  611. (__v16si)
  612. _mm512_setzero_si512 (),
  613. (__mmask16) __U);
  614. }
  615. extern __inline __m512i
  616. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  617. _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
  618. {
  619. return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
  620. (__v16si) __Y,
  621. (__v16si)
  622. _mm512_undefined_epi32 (),
  623. (__mmask16) -1);
  624. }
  625. extern __inline __m512i
  626. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  627. _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
  628. {
  629. return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
  630. (__v16si) __Y,
  631. (__v16si) __W,
  632. (__mmask16) __U);
  633. }
  634. extern __inline __m512i
  635. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  636. _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
  637. {
  638. return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
  639. (__v16si) __Y,
  640. (__v16si)
  641. _mm512_setzero_si512 (),
  642. (__mmask16) __U);
  643. }
  644. extern __inline __m512i
  645. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  646. _mm512_add_epi64 (__m512i __A, __m512i __B)
  647. {
  648. return (__m512i) ((__v8du) __A + (__v8du) __B);
  649. }
  650. extern __inline __m512i
  651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  652. _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  653. {
  654. return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
  655. (__v8di) __B,
  656. (__v8di) __W,
  657. (__mmask8) __U);
  658. }
  659. extern __inline __m512i
  660. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  661. _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  662. {
  663. return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
  664. (__v8di) __B,
  665. (__v8di)
  666. _mm512_setzero_si512 (),
  667. (__mmask8) __U);
  668. }
  669. extern __inline __m512i
  670. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  671. _mm512_sub_epi64 (__m512i __A, __m512i __B)
  672. {
  673. return (__m512i) ((__v8du) __A - (__v8du) __B);
  674. }
  675. extern __inline __m512i
  676. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  677. _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  678. {
  679. return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
  680. (__v8di) __B,
  681. (__v8di) __W,
  682. (__mmask8) __U);
  683. }
  684. extern __inline __m512i
  685. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  686. _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  687. {
  688. return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
  689. (__v8di) __B,
  690. (__v8di)
  691. _mm512_setzero_si512 (),
  692. (__mmask8) __U);
  693. }
  694. extern __inline __m512i
  695. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  696. _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
  697. {
  698. return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
  699. (__v8di) __Y,
  700. (__v8di)
  701. _mm512_undefined_pd (),
  702. (__mmask8) -1);
  703. }
  704. extern __inline __m512i
  705. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  706. _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
  707. {
  708. return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
  709. (__v8di) __Y,
  710. (__v8di) __W,
  711. (__mmask8) __U);
  712. }
  713. extern __inline __m512i
  714. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  715. _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
  716. {
  717. return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
  718. (__v8di) __Y,
  719. (__v8di)
  720. _mm512_setzero_si512 (),
  721. (__mmask8) __U);
  722. }
  723. extern __inline __m512i
  724. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  725. _mm512_srav_epi64 (__m512i __X, __m512i __Y)
  726. {
  727. return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
  728. (__v8di) __Y,
  729. (__v8di)
  730. _mm512_undefined_epi32 (),
  731. (__mmask8) -1);
  732. }
  733. extern __inline __m512i
  734. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  735. _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
  736. {
  737. return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
  738. (__v8di) __Y,
  739. (__v8di) __W,
  740. (__mmask8) __U);
  741. }
  742. extern __inline __m512i
  743. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  744. _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
  745. {
  746. return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
  747. (__v8di) __Y,
  748. (__v8di)
  749. _mm512_setzero_si512 (),
  750. (__mmask8) __U);
  751. }
  752. extern __inline __m512i
  753. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  754. _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
  755. {
  756. return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
  757. (__v8di) __Y,
  758. (__v8di)
  759. _mm512_undefined_epi32 (),
  760. (__mmask8) -1);
  761. }
  762. extern __inline __m512i
  763. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  764. _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
  765. {
  766. return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
  767. (__v8di) __Y,
  768. (__v8di) __W,
  769. (__mmask8) __U);
  770. }
  771. extern __inline __m512i
  772. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  773. _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
  774. {
  775. return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
  776. (__v8di) __Y,
  777. (__v8di)
  778. _mm512_setzero_si512 (),
  779. (__mmask8) __U);
  780. }
  781. extern __inline __m512i
  782. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  783. _mm512_add_epi32 (__m512i __A, __m512i __B)
  784. {
  785. return (__m512i) ((__v16su) __A + (__v16su) __B);
  786. }
  787. extern __inline __m512i
  788. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  789. _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  790. {
  791. return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
  792. (__v16si) __B,
  793. (__v16si) __W,
  794. (__mmask16) __U);
  795. }
  796. extern __inline __m512i
  797. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  798. _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  799. {
  800. return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
  801. (__v16si) __B,
  802. (__v16si)
  803. _mm512_setzero_si512 (),
  804. (__mmask16) __U);
  805. }
  806. extern __inline __m512i
  807. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  808. _mm512_mul_epi32 (__m512i __X, __m512i __Y)
  809. {
  810. return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
  811. (__v16si) __Y,
  812. (__v8di)
  813. _mm512_undefined_epi32 (),
  814. (__mmask8) -1);
  815. }
  816. extern __inline __m512i
  817. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  818. _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
  819. {
  820. return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
  821. (__v16si) __Y,
  822. (__v8di) __W, __M);
  823. }
  824. extern __inline __m512i
  825. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  826. _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
  827. {
  828. return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
  829. (__v16si) __Y,
  830. (__v8di)
  831. _mm512_setzero_si512 (),
  832. __M);
  833. }
  834. extern __inline __m512i
  835. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  836. _mm512_sub_epi32 (__m512i __A, __m512i __B)
  837. {
  838. return (__m512i) ((__v16su) __A - (__v16su) __B);
  839. }
  840. extern __inline __m512i
  841. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  842. _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  843. {
  844. return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
  845. (__v16si) __B,
  846. (__v16si) __W,
  847. (__mmask16) __U);
  848. }
  849. extern __inline __m512i
  850. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  851. _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  852. {
  853. return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
  854. (__v16si) __B,
  855. (__v16si)
  856. _mm512_setzero_si512 (),
  857. (__mmask16) __U);
  858. }
  859. extern __inline __m512i
  860. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  861. _mm512_mul_epu32 (__m512i __X, __m512i __Y)
  862. {
  863. return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
  864. (__v16si) __Y,
  865. (__v8di)
  866. _mm512_undefined_epi32 (),
  867. (__mmask8) -1);
  868. }
  869. extern __inline __m512i
  870. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  871. _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
  872. {
  873. return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
  874. (__v16si) __Y,
  875. (__v8di) __W, __M);
  876. }
  877. extern __inline __m512i
  878. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  879. _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
  880. {
  881. return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
  882. (__v16si) __Y,
  883. (__v8di)
  884. _mm512_setzero_si512 (),
  885. __M);
  886. }
  887. #ifdef __OPTIMIZE__
  888. extern __inline __m512i
  889. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  890. _mm512_slli_epi64 (__m512i __A, unsigned int __B)
  891. {
  892. return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
  893. (__v8di)
  894. _mm512_undefined_epi32 (),
  895. (__mmask8) -1);
  896. }
  897. extern __inline __m512i
  898. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  899. _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
  900. unsigned int __B)
  901. {
  902. return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
  903. (__v8di) __W,
  904. (__mmask8) __U);
  905. }
  906. extern __inline __m512i
  907. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  908. _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
  909. {
  910. return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
  911. (__v8di)
  912. _mm512_setzero_si512 (),
  913. (__mmask8) __U);
  914. }
  915. #else
  916. #define _mm512_slli_epi64(X, C) \
  917. ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  918. (__v8di)(__m512i)_mm512_undefined_epi32 (),\
  919. (__mmask8)-1))
  920. #define _mm512_mask_slli_epi64(W, U, X, C) \
  921. ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  922. (__v8di)(__m512i)(W),\
  923. (__mmask8)(U)))
  924. #define _mm512_maskz_slli_epi64(U, X, C) \
  925. ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  926. (__v8di)(__m512i)_mm512_setzero_si512 (),\
  927. (__mmask8)(U)))
  928. #endif
  929. extern __inline __m512i
  930. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  931. _mm512_sll_epi64 (__m512i __A, __m128i __B)
  932. {
  933. return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
  934. (__v2di) __B,
  935. (__v8di)
  936. _mm512_undefined_epi32 (),
  937. (__mmask8) -1);
  938. }
  939. extern __inline __m512i
  940. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  941. _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
  942. {
  943. return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
  944. (__v2di) __B,
  945. (__v8di) __W,
  946. (__mmask8) __U);
  947. }
  948. extern __inline __m512i
  949. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  950. _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
  951. {
  952. return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
  953. (__v2di) __B,
  954. (__v8di)
  955. _mm512_setzero_si512 (),
  956. (__mmask8) __U);
  957. }
  958. #ifdef __OPTIMIZE__
  959. extern __inline __m512i
  960. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  961. _mm512_srli_epi64 (__m512i __A, unsigned int __B)
  962. {
  963. return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
  964. (__v8di)
  965. _mm512_undefined_epi32 (),
  966. (__mmask8) -1);
  967. }
  968. extern __inline __m512i
  969. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  970. _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
  971. __m512i __A, unsigned int __B)
  972. {
  973. return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
  974. (__v8di) __W,
  975. (__mmask8) __U);
  976. }
  977. extern __inline __m512i
  978. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  979. _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
  980. {
  981. return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
  982. (__v8di)
  983. _mm512_setzero_si512 (),
  984. (__mmask8) __U);
  985. }
  986. #else
  987. #define _mm512_srli_epi64(X, C) \
  988. ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  989. (__v8di)(__m512i)_mm512_undefined_epi32 (),\
  990. (__mmask8)-1))
  991. #define _mm512_mask_srli_epi64(W, U, X, C) \
  992. ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  993. (__v8di)(__m512i)(W),\
  994. (__mmask8)(U)))
  995. #define _mm512_maskz_srli_epi64(U, X, C) \
  996. ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  997. (__v8di)(__m512i)_mm512_setzero_si512 (),\
  998. (__mmask8)(U)))
  999. #endif
  1000. extern __inline __m512i
  1001. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1002. _mm512_srl_epi64 (__m512i __A, __m128i __B)
  1003. {
  1004. return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
  1005. (__v2di) __B,
  1006. (__v8di)
  1007. _mm512_undefined_epi32 (),
  1008. (__mmask8) -1);
  1009. }
  1010. extern __inline __m512i
  1011. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1012. _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
  1013. {
  1014. return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
  1015. (__v2di) __B,
  1016. (__v8di) __W,
  1017. (__mmask8) __U);
  1018. }
  1019. extern __inline __m512i
  1020. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1021. _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
  1022. {
  1023. return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
  1024. (__v2di) __B,
  1025. (__v8di)
  1026. _mm512_setzero_si512 (),
  1027. (__mmask8) __U);
  1028. }
  1029. #ifdef __OPTIMIZE__
  1030. extern __inline __m512i
  1031. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1032. _mm512_srai_epi64 (__m512i __A, unsigned int __B)
  1033. {
  1034. return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
  1035. (__v8di)
  1036. _mm512_undefined_epi32 (),
  1037. (__mmask8) -1);
  1038. }
  1039. extern __inline __m512i
  1040. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1041. _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
  1042. unsigned int __B)
  1043. {
  1044. return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
  1045. (__v8di) __W,
  1046. (__mmask8) __U);
  1047. }
  1048. extern __inline __m512i
  1049. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1050. _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
  1051. {
  1052. return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
  1053. (__v8di)
  1054. _mm512_setzero_si512 (),
  1055. (__mmask8) __U);
  1056. }
  1057. #else
  1058. #define _mm512_srai_epi64(X, C) \
  1059. ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  1060. (__v8di)(__m512i)_mm512_undefined_epi32 (),\
  1061. (__mmask8)-1))
  1062. #define _mm512_mask_srai_epi64(W, U, X, C) \
  1063. ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  1064. (__v8di)(__m512i)(W),\
  1065. (__mmask8)(U)))
  1066. #define _mm512_maskz_srai_epi64(U, X, C) \
  1067. ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  1068. (__v8di)(__m512i)_mm512_setzero_si512 (),\
  1069. (__mmask8)(U)))
  1070. #endif
  1071. extern __inline __m512i
  1072. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1073. _mm512_sra_epi64 (__m512i __A, __m128i __B)
  1074. {
  1075. return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
  1076. (__v2di) __B,
  1077. (__v8di)
  1078. _mm512_undefined_epi32 (),
  1079. (__mmask8) -1);
  1080. }
  1081. extern __inline __m512i
  1082. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1083. _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
  1084. {
  1085. return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
  1086. (__v2di) __B,
  1087. (__v8di) __W,
  1088. (__mmask8) __U);
  1089. }
  1090. extern __inline __m512i
  1091. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1092. _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
  1093. {
  1094. return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
  1095. (__v2di) __B,
  1096. (__v8di)
  1097. _mm512_setzero_si512 (),
  1098. (__mmask8) __U);
  1099. }
  1100. #ifdef __OPTIMIZE__
  1101. extern __inline __m512i
  1102. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1103. _mm512_slli_epi32 (__m512i __A, unsigned int __B)
  1104. {
  1105. return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
  1106. (__v16si)
  1107. _mm512_undefined_epi32 (),
  1108. (__mmask16) -1);
  1109. }
  1110. extern __inline __m512i
  1111. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1112. _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  1113. unsigned int __B)
  1114. {
  1115. return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
  1116. (__v16si) __W,
  1117. (__mmask16) __U);
  1118. }
  1119. extern __inline __m512i
  1120. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1121. _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
  1122. {
  1123. return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
  1124. (__v16si)
  1125. _mm512_setzero_si512 (),
  1126. (__mmask16) __U);
  1127. }
  1128. #else
  1129. #define _mm512_slli_epi32(X, C) \
  1130. ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1131. (__v16si)(__m512i)_mm512_undefined_epi32 (),\
  1132. (__mmask16)-1))
  1133. #define _mm512_mask_slli_epi32(W, U, X, C) \
  1134. ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1135. (__v16si)(__m512i)(W),\
  1136. (__mmask16)(U)))
  1137. #define _mm512_maskz_slli_epi32(U, X, C) \
  1138. ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1139. (__v16si)(__m512i)_mm512_setzero_si512 (),\
  1140. (__mmask16)(U)))
  1141. #endif
  1142. extern __inline __m512i
  1143. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1144. _mm512_sll_epi32 (__m512i __A, __m128i __B)
  1145. {
  1146. return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
  1147. (__v4si) __B,
  1148. (__v16si)
  1149. _mm512_undefined_epi32 (),
  1150. (__mmask16) -1);
  1151. }
  1152. extern __inline __m512i
  1153. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1154. _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
  1155. {
  1156. return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
  1157. (__v4si) __B,
  1158. (__v16si) __W,
  1159. (__mmask16) __U);
  1160. }
  1161. extern __inline __m512i
  1162. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1163. _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
  1164. {
  1165. return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
  1166. (__v4si) __B,
  1167. (__v16si)
  1168. _mm512_setzero_si512 (),
  1169. (__mmask16) __U);
  1170. }
  1171. #ifdef __OPTIMIZE__
  1172. extern __inline __m512i
  1173. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1174. _mm512_srli_epi32 (__m512i __A, unsigned int __B)
  1175. {
  1176. return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
  1177. (__v16si)
  1178. _mm512_undefined_epi32 (),
  1179. (__mmask16) -1);
  1180. }
  1181. extern __inline __m512i
  1182. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1183. _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
  1184. __m512i __A, unsigned int __B)
  1185. {
  1186. return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
  1187. (__v16si) __W,
  1188. (__mmask16) __U);
  1189. }
  1190. extern __inline __m512i
  1191. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1192. _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
  1193. {
  1194. return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
  1195. (__v16si)
  1196. _mm512_setzero_si512 (),
  1197. (__mmask16) __U);
  1198. }
  1199. #else
  1200. #define _mm512_srli_epi32(X, C) \
  1201. ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1202. (__v16si)(__m512i)_mm512_undefined_epi32 (),\
  1203. (__mmask16)-1))
  1204. #define _mm512_mask_srli_epi32(W, U, X, C) \
  1205. ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1206. (__v16si)(__m512i)(W),\
  1207. (__mmask16)(U)))
  1208. #define _mm512_maskz_srli_epi32(U, X, C) \
  1209. ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1210. (__v16si)(__m512i)_mm512_setzero_si512 (),\
  1211. (__mmask16)(U)))
  1212. #endif
  1213. extern __inline __m512i
  1214. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1215. _mm512_srl_epi32 (__m512i __A, __m128i __B)
  1216. {
  1217. return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
  1218. (__v4si) __B,
  1219. (__v16si)
  1220. _mm512_undefined_epi32 (),
  1221. (__mmask16) -1);
  1222. }
  1223. extern __inline __m512i
  1224. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1225. _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
  1226. {
  1227. return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
  1228. (__v4si) __B,
  1229. (__v16si) __W,
  1230. (__mmask16) __U);
  1231. }
  1232. extern __inline __m512i
  1233. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1234. _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
  1235. {
  1236. return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
  1237. (__v4si) __B,
  1238. (__v16si)
  1239. _mm512_setzero_si512 (),
  1240. (__mmask16) __U);
  1241. }
  1242. #ifdef __OPTIMIZE__
  1243. extern __inline __m512i
  1244. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1245. _mm512_srai_epi32 (__m512i __A, unsigned int __B)
  1246. {
  1247. return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
  1248. (__v16si)
  1249. _mm512_undefined_epi32 (),
  1250. (__mmask16) -1);
  1251. }
  1252. extern __inline __m512i
  1253. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1254. _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  1255. unsigned int __B)
  1256. {
  1257. return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
  1258. (__v16si) __W,
  1259. (__mmask16) __U);
  1260. }
  1261. extern __inline __m512i
  1262. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1263. _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
  1264. {
  1265. return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
  1266. (__v16si)
  1267. _mm512_setzero_si512 (),
  1268. (__mmask16) __U);
  1269. }
  1270. #else
  1271. #define _mm512_srai_epi32(X, C) \
  1272. ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1273. (__v16si)(__m512i)_mm512_undefined_epi32 (),\
  1274. (__mmask16)-1))
  1275. #define _mm512_mask_srai_epi32(W, U, X, C) \
  1276. ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1277. (__v16si)(__m512i)(W),\
  1278. (__mmask16)(U)))
  1279. #define _mm512_maskz_srai_epi32(U, X, C) \
  1280. ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1281. (__v16si)(__m512i)_mm512_setzero_si512 (),\
  1282. (__mmask16)(U)))
  1283. #endif
  1284. extern __inline __m512i
  1285. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1286. _mm512_sra_epi32 (__m512i __A, __m128i __B)
  1287. {
  1288. return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
  1289. (__v4si) __B,
  1290. (__v16si)
  1291. _mm512_undefined_epi32 (),
  1292. (__mmask16) -1);
  1293. }
  1294. extern __inline __m512i
  1295. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1296. _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
  1297. {
  1298. return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
  1299. (__v4si) __B,
  1300. (__v16si) __W,
  1301. (__mmask16) __U);
  1302. }
  1303. extern __inline __m512i
  1304. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1305. _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
  1306. {
  1307. return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
  1308. (__v4si) __B,
  1309. (__v16si)
  1310. _mm512_setzero_si512 (),
  1311. (__mmask16) __U);
  1312. }
  1313. #ifdef __OPTIMIZE__
  1314. extern __inline __m128d
  1315. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1316. _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
  1317. {
  1318. return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
  1319. (__v2df) __B,
  1320. __R);
  1321. }
  1322. extern __inline __m128d
  1323. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1324. _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  1325. __m128d __B, const int __R)
  1326. {
  1327. return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
  1328. (__v2df) __B,
  1329. (__v2df) __W,
  1330. (__mmask8) __U, __R);
  1331. }
  1332. extern __inline __m128d
  1333. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1334. _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  1335. const int __R)
  1336. {
  1337. return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
  1338. (__v2df) __B,
  1339. (__v2df)
  1340. _mm_setzero_pd (),
  1341. (__mmask8) __U, __R);
  1342. }
  1343. extern __inline __m128
  1344. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1345. _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
  1346. {
  1347. return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
  1348. (__v4sf) __B,
  1349. __R);
  1350. }
  1351. extern __inline __m128
  1352. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1353. _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  1354. __m128 __B, const int __R)
  1355. {
  1356. return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
  1357. (__v4sf) __B,
  1358. (__v4sf) __W,
  1359. (__mmask8) __U, __R);
  1360. }
  1361. extern __inline __m128
  1362. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1363. _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  1364. const int __R)
  1365. {
  1366. return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
  1367. (__v4sf) __B,
  1368. (__v4sf)
  1369. _mm_setzero_ps (),
  1370. (__mmask8) __U, __R);
  1371. }
  1372. extern __inline __m128d
  1373. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1374. _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
  1375. {
  1376. return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
  1377. (__v2df) __B,
  1378. __R);
  1379. }
  1380. extern __inline __m128d
  1381. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1382. _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  1383. __m128d __B, const int __R)
  1384. {
  1385. return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
  1386. (__v2df) __B,
  1387. (__v2df) __W,
  1388. (__mmask8) __U, __R);
  1389. }
  1390. extern __inline __m128d
  1391. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1392. _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  1393. const int __R)
  1394. {
  1395. return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
  1396. (__v2df) __B,
  1397. (__v2df)
  1398. _mm_setzero_pd (),
  1399. (__mmask8) __U, __R);
  1400. }
  1401. extern __inline __m128
  1402. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1403. _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
  1404. {
  1405. return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
  1406. (__v4sf) __B,
  1407. __R);
  1408. }
  1409. extern __inline __m128
  1410. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1411. _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  1412. __m128 __B, const int __R)
  1413. {
  1414. return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
  1415. (__v4sf) __B,
  1416. (__v4sf) __W,
  1417. (__mmask8) __U, __R);
  1418. }
  1419. extern __inline __m128
  1420. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1421. _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  1422. const int __R)
  1423. {
  1424. return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
  1425. (__v4sf) __B,
  1426. (__v4sf)
  1427. _mm_setzero_ps (),
  1428. (__mmask8) __U, __R);
  1429. }
  1430. #else
  1431. #define _mm_add_round_sd(A, B, C) \
  1432. (__m128d)__builtin_ia32_addsd_round(A, B, C)
  1433. #define _mm_mask_add_round_sd(W, U, A, B, C) \
  1434. (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
  1435. #define _mm_maskz_add_round_sd(U, A, B, C) \
  1436. (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  1437. #define _mm_add_round_ss(A, B, C) \
  1438. (__m128)__builtin_ia32_addss_round(A, B, C)
  1439. #define _mm_mask_add_round_ss(W, U, A, B, C) \
  1440. (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
  1441. #define _mm_maskz_add_round_ss(U, A, B, C) \
  1442. (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  1443. #define _mm_sub_round_sd(A, B, C) \
  1444. (__m128d)__builtin_ia32_subsd_round(A, B, C)
  1445. #define _mm_mask_sub_round_sd(W, U, A, B, C) \
  1446. (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
  1447. #define _mm_maskz_sub_round_sd(U, A, B, C) \
  1448. (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  1449. #define _mm_sub_round_ss(A, B, C) \
  1450. (__m128)__builtin_ia32_subss_round(A, B, C)
  1451. #define _mm_mask_sub_round_ss(W, U, A, B, C) \
  1452. (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
  1453. #define _mm_maskz_sub_round_ss(U, A, B, C) \
  1454. (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  1455. #endif
  1456. #ifdef __OPTIMIZE__
  1457. extern __inline __m512i
  1458. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1459. _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
  1460. const int __imm)
  1461. {
  1462. return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
  1463. (__v8di) __B,
  1464. (__v8di) __C, __imm,
  1465. (__mmask8) -1);
  1466. }
  1467. extern __inline __m512i
  1468. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1469. _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
  1470. __m512i __C, const int __imm)
  1471. {
  1472. return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
  1473. (__v8di) __B,
  1474. (__v8di) __C, __imm,
  1475. (__mmask8) __U);
  1476. }
  1477. extern __inline __m512i
  1478. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1479. _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
  1480. __m512i __C, const int __imm)
  1481. {
  1482. return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
  1483. (__v8di) __B,
  1484. (__v8di) __C,
  1485. __imm, (__mmask8) __U);
  1486. }
  1487. extern __inline __m512i
  1488. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1489. _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
  1490. const int __imm)
  1491. {
  1492. return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
  1493. (__v16si) __B,
  1494. (__v16si) __C,
  1495. __imm, (__mmask16) -1);
  1496. }
  1497. extern __inline __m512i
  1498. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1499. _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
  1500. __m512i __C, const int __imm)
  1501. {
  1502. return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
  1503. (__v16si) __B,
  1504. (__v16si) __C,
  1505. __imm, (__mmask16) __U);
  1506. }
  1507. extern __inline __m512i
  1508. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1509. _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
  1510. __m512i __C, const int __imm)
  1511. {
  1512. return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
  1513. (__v16si) __B,
  1514. (__v16si) __C,
  1515. __imm, (__mmask16) __U);
  1516. }
  1517. #else
  1518. #define _mm512_ternarylogic_epi64(A, B, C, I) \
  1519. ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
  1520. (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
  1521. #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
  1522. ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
  1523. (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
  1524. #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
  1525. ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
  1526. (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
  1527. #define _mm512_ternarylogic_epi32(A, B, C, I) \
  1528. ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
  1529. (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
  1530. (__mmask16)-1))
  1531. #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
  1532. ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
  1533. (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
  1534. (__mmask16)(U)))
  1535. #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
  1536. ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
  1537. (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
  1538. (__mmask16)(U)))
  1539. #endif
  1540. extern __inline __m512d
  1541. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1542. _mm512_rcp14_pd (__m512d __A)
  1543. {
  1544. return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
  1545. (__v8df)
  1546. _mm512_undefined_pd (),
  1547. (__mmask8) -1);
  1548. }
  1549. extern __inline __m512d
  1550. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1551. _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
  1552. {
  1553. return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
  1554. (__v8df) __W,
  1555. (__mmask8) __U);
  1556. }
  1557. extern __inline __m512d
  1558. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1559. _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
  1560. {
  1561. return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
  1562. (__v8df)
  1563. _mm512_setzero_pd (),
  1564. (__mmask8) __U);
  1565. }
  1566. extern __inline __m512
  1567. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1568. _mm512_rcp14_ps (__m512 __A)
  1569. {
  1570. return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
  1571. (__v16sf)
  1572. _mm512_undefined_ps (),
  1573. (__mmask16) -1);
  1574. }
  1575. extern __inline __m512
  1576. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1577. _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
  1578. {
  1579. return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
  1580. (__v16sf) __W,
  1581. (__mmask16) __U);
  1582. }
  1583. extern __inline __m512
  1584. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1585. _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
  1586. {
  1587. return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
  1588. (__v16sf)
  1589. _mm512_setzero_ps (),
  1590. (__mmask16) __U);
  1591. }
  1592. extern __inline __m128d
  1593. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1594. _mm_rcp14_sd (__m128d __A, __m128d __B)
  1595. {
  1596. return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
  1597. (__v2df) __A);
  1598. }
  1599. extern __inline __m128d
  1600. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1601. _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  1602. {
  1603. return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
  1604. (__v2df) __A,
  1605. (__v2df) __W,
  1606. (__mmask8) __U);
  1607. }
  1608. extern __inline __m128d
  1609. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1610. _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
  1611. {
  1612. return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
  1613. (__v2df) __A,
  1614. (__v2df) _mm_setzero_ps (),
  1615. (__mmask8) __U);
  1616. }
  1617. extern __inline __m128
  1618. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1619. _mm_rcp14_ss (__m128 __A, __m128 __B)
  1620. {
  1621. return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
  1622. (__v4sf) __A);
  1623. }
  1624. extern __inline __m128
  1625. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1626. _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  1627. {
  1628. return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
  1629. (__v4sf) __A,
  1630. (__v4sf) __W,
  1631. (__mmask8) __U);
  1632. }
  1633. extern __inline __m128
  1634. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1635. _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
  1636. {
  1637. return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
  1638. (__v4sf) __A,
  1639. (__v4sf) _mm_setzero_ps (),
  1640. (__mmask8) __U);
  1641. }
  1642. extern __inline __m512d
  1643. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1644. _mm512_rsqrt14_pd (__m512d __A)
  1645. {
  1646. return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
  1647. (__v8df)
  1648. _mm512_undefined_pd (),
  1649. (__mmask8) -1);
  1650. }
  1651. extern __inline __m512d
  1652. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1653. _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
  1654. {
  1655. return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
  1656. (__v8df) __W,
  1657. (__mmask8) __U);
  1658. }
  1659. extern __inline __m512d
  1660. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1661. _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
  1662. {
  1663. return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
  1664. (__v8df)
  1665. _mm512_setzero_pd (),
  1666. (__mmask8) __U);
  1667. }
  1668. extern __inline __m512
  1669. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1670. _mm512_rsqrt14_ps (__m512 __A)
  1671. {
  1672. return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
  1673. (__v16sf)
  1674. _mm512_undefined_ps (),
  1675. (__mmask16) -1);
  1676. }
  1677. extern __inline __m512
  1678. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1679. _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
  1680. {
  1681. return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
  1682. (__v16sf) __W,
  1683. (__mmask16) __U);
  1684. }
  1685. extern __inline __m512
  1686. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1687. _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
  1688. {
  1689. return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
  1690. (__v16sf)
  1691. _mm512_setzero_ps (),
  1692. (__mmask16) __U);
  1693. }
  1694. extern __inline __m128d
  1695. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1696. _mm_rsqrt14_sd (__m128d __A, __m128d __B)
  1697. {
  1698. return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
  1699. (__v2df) __A);
  1700. }
  1701. extern __inline __m128d
  1702. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1703. _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  1704. {
  1705. return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
  1706. (__v2df) __A,
  1707. (__v2df) __W,
  1708. (__mmask8) __U);
  1709. }
  1710. extern __inline __m128d
  1711. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1712. _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
  1713. {
  1714. return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
  1715. (__v2df) __A,
  1716. (__v2df) _mm_setzero_pd (),
  1717. (__mmask8) __U);
  1718. }
  1719. extern __inline __m128
  1720. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1721. _mm_rsqrt14_ss (__m128 __A, __m128 __B)
  1722. {
  1723. return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
  1724. (__v4sf) __A);
  1725. }
  1726. extern __inline __m128
  1727. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1728. _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  1729. {
  1730. return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
  1731. (__v4sf) __A,
  1732. (__v4sf) __W,
  1733. (__mmask8) __U);
  1734. }
  1735. extern __inline __m128
  1736. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1737. _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
  1738. {
  1739. return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
  1740. (__v4sf) __A,
  1741. (__v4sf) _mm_setzero_ps (),
  1742. (__mmask8) __U);
  1743. }
  1744. #ifdef __OPTIMIZE__
  1745. extern __inline __m512d
  1746. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1747. _mm512_sqrt_round_pd (__m512d __A, const int __R)
  1748. {
  1749. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  1750. (__v8df)
  1751. _mm512_undefined_pd (),
  1752. (__mmask8) -1, __R);
  1753. }
  1754. extern __inline __m512d
  1755. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1756. _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  1757. const int __R)
  1758. {
  1759. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  1760. (__v8df) __W,
  1761. (__mmask8) __U, __R);
  1762. }
  1763. extern __inline __m512d
  1764. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1765. _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
  1766. {
  1767. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  1768. (__v8df)
  1769. _mm512_setzero_pd (),
  1770. (__mmask8) __U, __R);
  1771. }
  1772. extern __inline __m512
  1773. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1774. _mm512_sqrt_round_ps (__m512 __A, const int __R)
  1775. {
  1776. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  1777. (__v16sf)
  1778. _mm512_undefined_ps (),
  1779. (__mmask16) -1, __R);
  1780. }
  1781. extern __inline __m512
  1782. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1783. _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
  1784. {
  1785. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  1786. (__v16sf) __W,
  1787. (__mmask16) __U, __R);
  1788. }
  1789. extern __inline __m512
  1790. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1791. _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
  1792. {
  1793. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  1794. (__v16sf)
  1795. _mm512_setzero_ps (),
  1796. (__mmask16) __U, __R);
  1797. }
  1798. extern __inline __m128d
  1799. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1800. _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
  1801. {
  1802. return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
  1803. (__v2df) __A,
  1804. (__v2df)
  1805. _mm_setzero_pd (),
  1806. (__mmask8) -1, __R);
  1807. }
  1808. extern __inline __m128d
  1809. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1810. _mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  1811. const int __R)
  1812. {
  1813. return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
  1814. (__v2df) __A,
  1815. (__v2df) __W,
  1816. (__mmask8) __U, __R);
  1817. }
  1818. extern __inline __m128d
  1819. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1820. _mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
  1821. {
  1822. return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
  1823. (__v2df) __A,
  1824. (__v2df)
  1825. _mm_setzero_pd (),
  1826. (__mmask8) __U, __R);
  1827. }
  1828. extern __inline __m128
  1829. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1830. _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
  1831. {
  1832. return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
  1833. (__v4sf) __A,
  1834. (__v4sf)
  1835. _mm_setzero_ps (),
  1836. (__mmask8) -1, __R);
  1837. }
  1838. extern __inline __m128
  1839. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1840. _mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  1841. const int __R)
  1842. {
  1843. return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
  1844. (__v4sf) __A,
  1845. (__v4sf) __W,
  1846. (__mmask8) __U, __R);
  1847. }
  1848. extern __inline __m128
  1849. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1850. _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
  1851. {
  1852. return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
  1853. (__v4sf) __A,
  1854. (__v4sf)
  1855. _mm_setzero_ps (),
  1856. (__mmask8) __U, __R);
  1857. }
  1858. #else
  1859. #define _mm512_sqrt_round_pd(A, C) \
  1860. (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
  1861. #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
  1862. (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
  1863. #define _mm512_maskz_sqrt_round_pd(U, A, C) \
  1864. (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
  1865. #define _mm512_sqrt_round_ps(A, C) \
  1866. (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
  1867. #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
  1868. (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
  1869. #define _mm512_maskz_sqrt_round_ps(U, A, C) \
  1870. (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
  1871. #define _mm_sqrt_round_sd(A, B, C) \
  1872. (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
  1873. (__v2df) _mm_setzero_pd (), -1, C)
  1874. #define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
  1875. (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
  1876. #define _mm_maskz_sqrt_round_sd(U, A, B, C) \
  1877. (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
  1878. (__v2df) _mm_setzero_pd (), U, C)
  1879. #define _mm_sqrt_round_ss(A, B, C) \
  1880. (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
  1881. (__v4sf) _mm_setzero_ps (), -1, C)
  1882. #define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
  1883. (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
  1884. #define _mm_maskz_sqrt_round_ss(U, A, B, C) \
  1885. (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
  1886. (__v4sf) _mm_setzero_ps (), U, C)
  1887. #endif
  1888. extern __inline __m512i
  1889. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1890. _mm512_cvtepi8_epi32 (__m128i __A)
  1891. {
  1892. return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
  1893. (__v16si)
  1894. _mm512_undefined_epi32 (),
  1895. (__mmask16) -1);
  1896. }
  1897. extern __inline __m512i
  1898. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1899. _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
  1900. {
  1901. return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
  1902. (__v16si) __W,
  1903. (__mmask16) __U);
  1904. }
  1905. extern __inline __m512i
  1906. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1907. _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
  1908. {
  1909. return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
  1910. (__v16si)
  1911. _mm512_setzero_si512 (),
  1912. (__mmask16) __U);
  1913. }
  1914. extern __inline __m512i
  1915. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1916. _mm512_cvtepi8_epi64 (__m128i __A)
  1917. {
  1918. return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
  1919. (__v8di)
  1920. _mm512_undefined_epi32 (),
  1921. (__mmask8) -1);
  1922. }
  1923. extern __inline __m512i
  1924. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1925. _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
  1926. {
  1927. return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
  1928. (__v8di) __W,
  1929. (__mmask8) __U);
  1930. }
  1931. extern __inline __m512i
  1932. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1933. _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
  1934. {
  1935. return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
  1936. (__v8di)
  1937. _mm512_setzero_si512 (),
  1938. (__mmask8) __U);
  1939. }
  1940. extern __inline __m512i
  1941. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1942. _mm512_cvtepi16_epi32 (__m256i __A)
  1943. {
  1944. return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
  1945. (__v16si)
  1946. _mm512_undefined_epi32 (),
  1947. (__mmask16) -1);
  1948. }
  1949. extern __inline __m512i
  1950. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1951. _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
  1952. {
  1953. return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
  1954. (__v16si) __W,
  1955. (__mmask16) __U);
  1956. }
  1957. extern __inline __m512i
  1958. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1959. _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
  1960. {
  1961. return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
  1962. (__v16si)
  1963. _mm512_setzero_si512 (),
  1964. (__mmask16) __U);
  1965. }
  1966. extern __inline __m512i
  1967. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1968. _mm512_cvtepi16_epi64 (__m128i __A)
  1969. {
  1970. return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
  1971. (__v8di)
  1972. _mm512_undefined_epi32 (),
  1973. (__mmask8) -1);
  1974. }
  1975. extern __inline __m512i
  1976. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1977. _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
  1978. {
  1979. return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
  1980. (__v8di) __W,
  1981. (__mmask8) __U);
  1982. }
  1983. extern __inline __m512i
  1984. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1985. _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
  1986. {
  1987. return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
  1988. (__v8di)
  1989. _mm512_setzero_si512 (),
  1990. (__mmask8) __U);
  1991. }
  1992. extern __inline __m512i
  1993. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1994. _mm512_cvtepi32_epi64 (__m256i __X)
  1995. {
  1996. return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
  1997. (__v8di)
  1998. _mm512_undefined_epi32 (),
  1999. (__mmask8) -1);
  2000. }
  2001. extern __inline __m512i
  2002. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2003. _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
  2004. {
  2005. return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
  2006. (__v8di) __W,
  2007. (__mmask8) __U);
  2008. }
  2009. extern __inline __m512i
  2010. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2011. _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
  2012. {
  2013. return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
  2014. (__v8di)
  2015. _mm512_setzero_si512 (),
  2016. (__mmask8) __U);
  2017. }
  2018. extern __inline __m512i
  2019. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2020. _mm512_cvtepu8_epi32 (__m128i __A)
  2021. {
  2022. return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
  2023. (__v16si)
  2024. _mm512_undefined_epi32 (),
  2025. (__mmask16) -1);
  2026. }
  2027. extern __inline __m512i
  2028. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2029. _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
  2030. {
  2031. return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
  2032. (__v16si) __W,
  2033. (__mmask16) __U);
  2034. }
  2035. extern __inline __m512i
  2036. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2037. _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
  2038. {
  2039. return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
  2040. (__v16si)
  2041. _mm512_setzero_si512 (),
  2042. (__mmask16) __U);
  2043. }
  2044. extern __inline __m512i
  2045. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2046. _mm512_cvtepu8_epi64 (__m128i __A)
  2047. {
  2048. return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
  2049. (__v8di)
  2050. _mm512_undefined_epi32 (),
  2051. (__mmask8) -1);
  2052. }
  2053. extern __inline __m512i
  2054. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2055. _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
  2056. {
  2057. return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
  2058. (__v8di) __W,
  2059. (__mmask8) __U);
  2060. }
  2061. extern __inline __m512i
  2062. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2063. _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
  2064. {
  2065. return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
  2066. (__v8di)
  2067. _mm512_setzero_si512 (),
  2068. (__mmask8) __U);
  2069. }
  2070. extern __inline __m512i
  2071. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2072. _mm512_cvtepu16_epi32 (__m256i __A)
  2073. {
  2074. return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
  2075. (__v16si)
  2076. _mm512_undefined_epi32 (),
  2077. (__mmask16) -1);
  2078. }
  2079. extern __inline __m512i
  2080. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2081. _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
  2082. {
  2083. return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
  2084. (__v16si) __W,
  2085. (__mmask16) __U);
  2086. }
  2087. extern __inline __m512i
  2088. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2089. _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
  2090. {
  2091. return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
  2092. (__v16si)
  2093. _mm512_setzero_si512 (),
  2094. (__mmask16) __U);
  2095. }
  2096. extern __inline __m512i
  2097. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2098. _mm512_cvtepu16_epi64 (__m128i __A)
  2099. {
  2100. return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
  2101. (__v8di)
  2102. _mm512_undefined_epi32 (),
  2103. (__mmask8) -1);
  2104. }
  2105. extern __inline __m512i
  2106. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2107. _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
  2108. {
  2109. return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
  2110. (__v8di) __W,
  2111. (__mmask8) __U);
  2112. }
  2113. extern __inline __m512i
  2114. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2115. _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
  2116. {
  2117. return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
  2118. (__v8di)
  2119. _mm512_setzero_si512 (),
  2120. (__mmask8) __U);
  2121. }
  2122. extern __inline __m512i
  2123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2124. _mm512_cvtepu32_epi64 (__m256i __X)
  2125. {
  2126. return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
  2127. (__v8di)
  2128. _mm512_undefined_epi32 (),
  2129. (__mmask8) -1);
  2130. }
  2131. extern __inline __m512i
  2132. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2133. _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
  2134. {
  2135. return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
  2136. (__v8di) __W,
  2137. (__mmask8) __U);
  2138. }
  2139. extern __inline __m512i
  2140. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2141. _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
  2142. {
  2143. return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
  2144. (__v8di)
  2145. _mm512_setzero_si512 (),
  2146. (__mmask8) __U);
  2147. }
  2148. #ifdef __OPTIMIZE__
  2149. extern __inline __m512d
  2150. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2151. _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
  2152. {
  2153. return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
  2154. (__v8df) __B,
  2155. (__v8df)
  2156. _mm512_undefined_pd (),
  2157. (__mmask8) -1, __R);
  2158. }
  2159. extern __inline __m512d
  2160. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2161. _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2162. __m512d __B, const int __R)
  2163. {
  2164. return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
  2165. (__v8df) __B,
  2166. (__v8df) __W,
  2167. (__mmask8) __U, __R);
  2168. }
  2169. extern __inline __m512d
  2170. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2171. _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2172. const int __R)
  2173. {
  2174. return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
  2175. (__v8df) __B,
  2176. (__v8df)
  2177. _mm512_setzero_pd (),
  2178. (__mmask8) __U, __R);
  2179. }
  2180. extern __inline __m512
  2181. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2182. _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
  2183. {
  2184. return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
  2185. (__v16sf) __B,
  2186. (__v16sf)
  2187. _mm512_undefined_ps (),
  2188. (__mmask16) -1, __R);
  2189. }
  2190. extern __inline __m512
  2191. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2192. _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2193. __m512 __B, const int __R)
  2194. {
  2195. return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
  2196. (__v16sf) __B,
  2197. (__v16sf) __W,
  2198. (__mmask16) __U, __R);
  2199. }
  2200. extern __inline __m512
  2201. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2202. _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2203. {
  2204. return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
  2205. (__v16sf) __B,
  2206. (__v16sf)
  2207. _mm512_setzero_ps (),
  2208. (__mmask16) __U, __R);
  2209. }
  2210. extern __inline __m512d
  2211. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2212. _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
  2213. {
  2214. return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
  2215. (__v8df) __B,
  2216. (__v8df)
  2217. _mm512_undefined_pd (),
  2218. (__mmask8) -1, __R);
  2219. }
  2220. extern __inline __m512d
  2221. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2222. _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2223. __m512d __B, const int __R)
  2224. {
  2225. return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
  2226. (__v8df) __B,
  2227. (__v8df) __W,
  2228. (__mmask8) __U, __R);
  2229. }
  2230. extern __inline __m512d
  2231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2232. _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2233. const int __R)
  2234. {
  2235. return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
  2236. (__v8df) __B,
  2237. (__v8df)
  2238. _mm512_setzero_pd (),
  2239. (__mmask8) __U, __R);
  2240. }
  2241. extern __inline __m512
  2242. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2243. _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
  2244. {
  2245. return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
  2246. (__v16sf) __B,
  2247. (__v16sf)
  2248. _mm512_undefined_ps (),
  2249. (__mmask16) -1, __R);
  2250. }
  2251. extern __inline __m512
  2252. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2253. _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2254. __m512 __B, const int __R)
  2255. {
  2256. return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
  2257. (__v16sf) __B,
  2258. (__v16sf) __W,
  2259. (__mmask16) __U, __R);
  2260. }
  2261. extern __inline __m512
  2262. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2263. _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2264. {
  2265. return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
  2266. (__v16sf) __B,
  2267. (__v16sf)
  2268. _mm512_setzero_ps (),
  2269. (__mmask16) __U, __R);
  2270. }
  2271. #else
  2272. #define _mm512_add_round_pd(A, B, C) \
  2273. (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
  2274. #define _mm512_mask_add_round_pd(W, U, A, B, C) \
  2275. (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
  2276. #define _mm512_maskz_add_round_pd(U, A, B, C) \
  2277. (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
  2278. #define _mm512_add_round_ps(A, B, C) \
  2279. (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
  2280. #define _mm512_mask_add_round_ps(W, U, A, B, C) \
  2281. (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
  2282. #define _mm512_maskz_add_round_ps(U, A, B, C) \
  2283. (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
  2284. #define _mm512_sub_round_pd(A, B, C) \
  2285. (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
  2286. #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
  2287. (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
  2288. #define _mm512_maskz_sub_round_pd(U, A, B, C) \
  2289. (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
  2290. #define _mm512_sub_round_ps(A, B, C) \
  2291. (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
  2292. #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
  2293. (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
  2294. #define _mm512_maskz_sub_round_ps(U, A, B, C) \
  2295. (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
  2296. #endif
  2297. #ifdef __OPTIMIZE__
  2298. extern __inline __m512d
  2299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2300. _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
  2301. {
  2302. return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
  2303. (__v8df) __B,
  2304. (__v8df)
  2305. _mm512_undefined_pd (),
  2306. (__mmask8) -1, __R);
  2307. }
  2308. extern __inline __m512d
  2309. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2310. _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2311. __m512d __B, const int __R)
  2312. {
  2313. return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
  2314. (__v8df) __B,
  2315. (__v8df) __W,
  2316. (__mmask8) __U, __R);
  2317. }
  2318. extern __inline __m512d
  2319. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2320. _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2321. const int __R)
  2322. {
  2323. return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
  2324. (__v8df) __B,
  2325. (__v8df)
  2326. _mm512_setzero_pd (),
  2327. (__mmask8) __U, __R);
  2328. }
  2329. extern __inline __m512
  2330. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2331. _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
  2332. {
  2333. return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
  2334. (__v16sf) __B,
  2335. (__v16sf)
  2336. _mm512_undefined_ps (),
  2337. (__mmask16) -1, __R);
  2338. }
  2339. extern __inline __m512
  2340. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2341. _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2342. __m512 __B, const int __R)
  2343. {
  2344. return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
  2345. (__v16sf) __B,
  2346. (__v16sf) __W,
  2347. (__mmask16) __U, __R);
  2348. }
  2349. extern __inline __m512
  2350. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2351. _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2352. {
  2353. return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
  2354. (__v16sf) __B,
  2355. (__v16sf)
  2356. _mm512_setzero_ps (),
  2357. (__mmask16) __U, __R);
  2358. }
  2359. extern __inline __m512d
  2360. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2361. _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
  2362. {
  2363. return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
  2364. (__v8df) __V,
  2365. (__v8df)
  2366. _mm512_undefined_pd (),
  2367. (__mmask8) -1, __R);
  2368. }
  2369. extern __inline __m512d
  2370. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2371. _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
  2372. __m512d __V, const int __R)
  2373. {
  2374. return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
  2375. (__v8df) __V,
  2376. (__v8df) __W,
  2377. (__mmask8) __U, __R);
  2378. }
  2379. extern __inline __m512d
  2380. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2381. _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
  2382. const int __R)
  2383. {
  2384. return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
  2385. (__v8df) __V,
  2386. (__v8df)
  2387. _mm512_setzero_pd (),
  2388. (__mmask8) __U, __R);
  2389. }
  2390. extern __inline __m512
  2391. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2392. _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
  2393. {
  2394. return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
  2395. (__v16sf) __B,
  2396. (__v16sf)
  2397. _mm512_undefined_ps (),
  2398. (__mmask16) -1, __R);
  2399. }
  2400. extern __inline __m512
  2401. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2402. _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2403. __m512 __B, const int __R)
  2404. {
  2405. return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
  2406. (__v16sf) __B,
  2407. (__v16sf) __W,
  2408. (__mmask16) __U, __R);
  2409. }
  2410. extern __inline __m512
  2411. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2412. _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2413. {
  2414. return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
  2415. (__v16sf) __B,
  2416. (__v16sf)
  2417. _mm512_setzero_ps (),
  2418. (__mmask16) __U, __R);
  2419. }
  2420. extern __inline __m128d
  2421. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2422. _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
  2423. {
  2424. return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
  2425. (__v2df) __B,
  2426. __R);
  2427. }
  2428. extern __inline __m128d
  2429. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2430. _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  2431. __m128d __B, const int __R)
  2432. {
  2433. return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
  2434. (__v2df) __B,
  2435. (__v2df) __W,
  2436. (__mmask8) __U, __R);
  2437. }
  2438. extern __inline __m128d
  2439. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2440. _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  2441. const int __R)
  2442. {
  2443. return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
  2444. (__v2df) __B,
  2445. (__v2df)
  2446. _mm_setzero_pd (),
  2447. (__mmask8) __U, __R);
  2448. }
  2449. extern __inline __m128
  2450. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2451. _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
  2452. {
  2453. return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
  2454. (__v4sf) __B,
  2455. __R);
  2456. }
  2457. extern __inline __m128
  2458. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2459. _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  2460. __m128 __B, const int __R)
  2461. {
  2462. return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
  2463. (__v4sf) __B,
  2464. (__v4sf) __W,
  2465. (__mmask8) __U, __R);
  2466. }
  2467. extern __inline __m128
  2468. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2469. _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  2470. const int __R)
  2471. {
  2472. return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
  2473. (__v4sf) __B,
  2474. (__v4sf)
  2475. _mm_setzero_ps (),
  2476. (__mmask8) __U, __R);
  2477. }
  2478. extern __inline __m128d
  2479. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2480. _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
  2481. {
  2482. return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
  2483. (__v2df) __B,
  2484. __R);
  2485. }
  2486. extern __inline __m128d
  2487. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2488. _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  2489. __m128d __B, const int __R)
  2490. {
  2491. return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
  2492. (__v2df) __B,
  2493. (__v2df) __W,
  2494. (__mmask8) __U, __R);
  2495. }
  2496. extern __inline __m128d
  2497. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2498. _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  2499. const int __R)
  2500. {
  2501. return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
  2502. (__v2df) __B,
  2503. (__v2df)
  2504. _mm_setzero_pd (),
  2505. (__mmask8) __U, __R);
  2506. }
  2507. extern __inline __m128
  2508. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2509. _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
  2510. {
  2511. return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
  2512. (__v4sf) __B,
  2513. __R);
  2514. }
  2515. extern __inline __m128
  2516. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2517. _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  2518. __m128 __B, const int __R)
  2519. {
  2520. return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
  2521. (__v4sf) __B,
  2522. (__v4sf) __W,
  2523. (__mmask8) __U, __R);
  2524. }
  2525. extern __inline __m128
  2526. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2527. _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  2528. const int __R)
  2529. {
  2530. return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
  2531. (__v4sf) __B,
  2532. (__v4sf)
  2533. _mm_setzero_ps (),
  2534. (__mmask8) __U, __R);
  2535. }
  2536. #else
  2537. #define _mm512_mul_round_pd(A, B, C) \
  2538. (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
  2539. #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
  2540. (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
  2541. #define _mm512_maskz_mul_round_pd(U, A, B, C) \
  2542. (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
  2543. #define _mm512_mul_round_ps(A, B, C) \
  2544. (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
  2545. #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
  2546. (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
  2547. #define _mm512_maskz_mul_round_ps(U, A, B, C) \
  2548. (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
  2549. #define _mm512_div_round_pd(A, B, C) \
  2550. (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
  2551. #define _mm512_mask_div_round_pd(W, U, A, B, C) \
  2552. (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
  2553. #define _mm512_maskz_div_round_pd(U, A, B, C) \
  2554. (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
  2555. #define _mm512_div_round_ps(A, B, C) \
  2556. (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
  2557. #define _mm512_mask_div_round_ps(W, U, A, B, C) \
  2558. (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
  2559. #define _mm512_maskz_div_round_ps(U, A, B, C) \
  2560. (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
  2561. #define _mm_mul_round_sd(A, B, C) \
  2562. (__m128d)__builtin_ia32_mulsd_round(A, B, C)
  2563. #define _mm_mask_mul_round_sd(W, U, A, B, C) \
  2564. (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
  2565. #define _mm_maskz_mul_round_sd(U, A, B, C) \
  2566. (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  2567. #define _mm_mul_round_ss(A, B, C) \
  2568. (__m128)__builtin_ia32_mulss_round(A, B, C)
  2569. #define _mm_mask_mul_round_ss(W, U, A, B, C) \
  2570. (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
  2571. #define _mm_maskz_mul_round_ss(U, A, B, C) \
  2572. (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  2573. #define _mm_div_round_sd(A, B, C) \
  2574. (__m128d)__builtin_ia32_divsd_round(A, B, C)
  2575. #define _mm_mask_div_round_sd(W, U, A, B, C) \
  2576. (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
  2577. #define _mm_maskz_div_round_sd(U, A, B, C) \
  2578. (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  2579. #define _mm_div_round_ss(A, B, C) \
  2580. (__m128)__builtin_ia32_divss_round(A, B, C)
  2581. #define _mm_mask_div_round_ss(W, U, A, B, C) \
  2582. (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
  2583. #define _mm_maskz_div_round_ss(U, A, B, C) \
  2584. (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  2585. #endif
  2586. #ifdef __OPTIMIZE__
  2587. extern __inline __m512d
  2588. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2589. _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
  2590. {
  2591. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  2592. (__v8df) __B,
  2593. (__v8df)
  2594. _mm512_undefined_pd (),
  2595. (__mmask8) -1, __R);
  2596. }
  2597. extern __inline __m512d
  2598. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2599. _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2600. __m512d __B, const int __R)
  2601. {
  2602. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  2603. (__v8df) __B,
  2604. (__v8df) __W,
  2605. (__mmask8) __U, __R);
  2606. }
  2607. extern __inline __m512d
  2608. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2609. _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2610. const int __R)
  2611. {
  2612. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  2613. (__v8df) __B,
  2614. (__v8df)
  2615. _mm512_setzero_pd (),
  2616. (__mmask8) __U, __R);
  2617. }
  2618. extern __inline __m512
  2619. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2620. _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
  2621. {
  2622. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  2623. (__v16sf) __B,
  2624. (__v16sf)
  2625. _mm512_undefined_ps (),
  2626. (__mmask16) -1, __R);
  2627. }
  2628. extern __inline __m512
  2629. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2630. _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2631. __m512 __B, const int __R)
  2632. {
  2633. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  2634. (__v16sf) __B,
  2635. (__v16sf) __W,
  2636. (__mmask16) __U, __R);
  2637. }
  2638. extern __inline __m512
  2639. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2640. _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2641. {
  2642. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  2643. (__v16sf) __B,
  2644. (__v16sf)
  2645. _mm512_setzero_ps (),
  2646. (__mmask16) __U, __R);
  2647. }
  2648. extern __inline __m512d
  2649. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2650. _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
  2651. {
  2652. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  2653. (__v8df) __B,
  2654. (__v8df)
  2655. _mm512_undefined_pd (),
  2656. (__mmask8) -1, __R);
  2657. }
  2658. extern __inline __m512d
  2659. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2660. _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2661. __m512d __B, const int __R)
  2662. {
  2663. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  2664. (__v8df) __B,
  2665. (__v8df) __W,
  2666. (__mmask8) __U, __R);
  2667. }
  2668. extern __inline __m512d
  2669. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2670. _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2671. const int __R)
  2672. {
  2673. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  2674. (__v8df) __B,
  2675. (__v8df)
  2676. _mm512_setzero_pd (),
  2677. (__mmask8) __U, __R);
  2678. }
  2679. extern __inline __m512
  2680. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2681. _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
  2682. {
  2683. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  2684. (__v16sf) __B,
  2685. (__v16sf)
  2686. _mm512_undefined_ps (),
  2687. (__mmask16) -1, __R);
  2688. }
  2689. extern __inline __m512
  2690. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2691. _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2692. __m512 __B, const int __R)
  2693. {
  2694. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  2695. (__v16sf) __B,
  2696. (__v16sf) __W,
  2697. (__mmask16) __U, __R);
  2698. }
  2699. extern __inline __m512
  2700. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2701. _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2702. {
  2703. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  2704. (__v16sf) __B,
  2705. (__v16sf)
  2706. _mm512_setzero_ps (),
  2707. (__mmask16) __U, __R);
  2708. }
  2709. #else
  2710. #define _mm512_max_round_pd(A, B, R) \
  2711. (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
  2712. #define _mm512_mask_max_round_pd(W, U, A, B, R) \
  2713. (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
  2714. #define _mm512_maskz_max_round_pd(U, A, B, R) \
  2715. (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
  2716. #define _mm512_max_round_ps(A, B, R) \
  2717. (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
  2718. #define _mm512_mask_max_round_ps(W, U, A, B, R) \
  2719. (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
  2720. #define _mm512_maskz_max_round_ps(U, A, B, R) \
  2721. (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
  2722. #define _mm512_min_round_pd(A, B, R) \
  2723. (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
  2724. #define _mm512_mask_min_round_pd(W, U, A, B, R) \
  2725. (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
  2726. #define _mm512_maskz_min_round_pd(U, A, B, R) \
  2727. (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
  2728. #define _mm512_min_round_ps(A, B, R) \
  2729. (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
  2730. #define _mm512_mask_min_round_ps(W, U, A, B, R) \
  2731. (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
  2732. #define _mm512_maskz_min_round_ps(U, A, B, R) \
  2733. (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
  2734. #endif
  2735. #ifdef __OPTIMIZE__
  2736. extern __inline __m512d
  2737. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2738. _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
  2739. {
  2740. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  2741. (__v8df) __B,
  2742. (__v8df)
  2743. _mm512_undefined_pd (),
  2744. (__mmask8) -1, __R);
  2745. }
  2746. extern __inline __m512d
  2747. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2748. _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2749. __m512d __B, const int __R)
  2750. {
  2751. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  2752. (__v8df) __B,
  2753. (__v8df) __W,
  2754. (__mmask8) __U, __R);
  2755. }
  2756. extern __inline __m512d
  2757. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2758. _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2759. const int __R)
  2760. {
  2761. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  2762. (__v8df) __B,
  2763. (__v8df)
  2764. _mm512_setzero_pd (),
  2765. (__mmask8) __U, __R);
  2766. }
  2767. extern __inline __m512
  2768. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2769. _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
  2770. {
  2771. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  2772. (__v16sf) __B,
  2773. (__v16sf)
  2774. _mm512_undefined_ps (),
  2775. (__mmask16) -1, __R);
  2776. }
  2777. extern __inline __m512
  2778. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2779. _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2780. __m512 __B, const int __R)
  2781. {
  2782. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  2783. (__v16sf) __B,
  2784. (__v16sf) __W,
  2785. (__mmask16) __U, __R);
  2786. }
  2787. extern __inline __m512
  2788. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2789. _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  2790. const int __R)
  2791. {
  2792. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  2793. (__v16sf) __B,
  2794. (__v16sf)
  2795. _mm512_setzero_ps (),
  2796. (__mmask16) __U, __R);
  2797. }
  2798. extern __inline __m128d
  2799. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2800. _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
  2801. {
  2802. return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
  2803. (__v2df) __B,
  2804. (__v2df)
  2805. _mm_setzero_pd (),
  2806. (__mmask8) -1, __R);
  2807. }
  2808. extern __inline __m128d
  2809. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2810. _mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  2811. const int __R)
  2812. {
  2813. return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
  2814. (__v2df) __B,
  2815. (__v2df) __W,
  2816. (__mmask8) __U, __R);
  2817. }
  2818. extern __inline __m128d
  2819. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2820. _mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  2821. const int __R)
  2822. {
  2823. return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
  2824. (__v2df) __B,
  2825. (__v2df)
  2826. _mm_setzero_pd (),
  2827. (__mmask8) __U, __R);
  2828. }
  2829. extern __inline __m128
  2830. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2831. _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
  2832. {
  2833. return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
  2834. (__v4sf) __B,
  2835. (__v4sf)
  2836. _mm_setzero_ps (),
  2837. (__mmask8) -1, __R);
  2838. }
  2839. extern __inline __m128
  2840. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2841. _mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  2842. const int __R)
  2843. {
  2844. return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
  2845. (__v4sf) __B,
  2846. (__v4sf) __W,
  2847. (__mmask8) __U, __R);
  2848. }
  2849. extern __inline __m128
  2850. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2851. _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
  2852. {
  2853. return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
  2854. (__v4sf) __B,
  2855. (__v4sf)
  2856. _mm_setzero_ps (),
  2857. (__mmask8) __U, __R);
  2858. }
  2859. #else
  2860. #define _mm512_scalef_round_pd(A, B, C) \
  2861. (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
  2862. #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
  2863. (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
  2864. #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
  2865. (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
  2866. #define _mm512_scalef_round_ps(A, B, C) \
  2867. (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
  2868. #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
  2869. (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
  2870. #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
  2871. (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
  2872. #define _mm_scalef_round_sd(A, B, C) \
  2873. (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
  2874. (__v2df)_mm_setzero_pd (), -1, C)
  2875. #define _mm_scalef_round_ss(A, B, C) \
  2876. (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
  2877. (__v4sf)_mm_setzero_ps (), -1, C)
  2878. #endif
  2879. #ifdef __OPTIMIZE__
  2880. extern __inline __m512d
  2881. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2882. _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  2883. {
  2884. return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
  2885. (__v8df) __B,
  2886. (__v8df) __C,
  2887. (__mmask8) -1, __R);
  2888. }
  2889. extern __inline __m512d
  2890. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2891. _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  2892. __m512d __C, const int __R)
  2893. {
  2894. return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
  2895. (__v8df) __B,
  2896. (__v8df) __C,
  2897. (__mmask8) __U, __R);
  2898. }
  2899. extern __inline __m512d
  2900. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2901. _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
  2902. __mmask8 __U, const int __R)
  2903. {
  2904. return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
  2905. (__v8df) __B,
  2906. (__v8df) __C,
  2907. (__mmask8) __U, __R);
  2908. }
  2909. extern __inline __m512d
  2910. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2911. _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2912. __m512d __C, const int __R)
  2913. {
  2914. return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
  2915. (__v8df) __B,
  2916. (__v8df) __C,
  2917. (__mmask8) __U, __R);
  2918. }
  2919. extern __inline __m512
  2920. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2921. _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  2922. {
  2923. return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
  2924. (__v16sf) __B,
  2925. (__v16sf) __C,
  2926. (__mmask16) -1, __R);
  2927. }
  2928. extern __inline __m512
  2929. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2930. _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  2931. __m512 __C, const int __R)
  2932. {
  2933. return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
  2934. (__v16sf) __B,
  2935. (__v16sf) __C,
  2936. (__mmask16) __U, __R);
  2937. }
  2938. extern __inline __m512
  2939. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2940. _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
  2941. __mmask16 __U, const int __R)
  2942. {
  2943. return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
  2944. (__v16sf) __B,
  2945. (__v16sf) __C,
  2946. (__mmask16) __U, __R);
  2947. }
  2948. extern __inline __m512
  2949. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2950. _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  2951. __m512 __C, const int __R)
  2952. {
  2953. return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
  2954. (__v16sf) __B,
  2955. (__v16sf) __C,
  2956. (__mmask16) __U, __R);
  2957. }
  2958. extern __inline __m512d
  2959. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2960. _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  2961. {
  2962. return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
  2963. (__v8df) __B,
  2964. (__v8df) __C,
  2965. (__mmask8) -1, __R);
  2966. }
  2967. extern __inline __m512d
  2968. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2969. _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  2970. __m512d __C, const int __R)
  2971. {
  2972. return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
  2973. (__v8df) __B,
  2974. (__v8df) __C,
  2975. (__mmask8) __U, __R);
  2976. }
  2977. extern __inline __m512d
  2978. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2979. _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
  2980. __mmask8 __U, const int __R)
  2981. {
  2982. return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
  2983. (__v8df) __B,
  2984. (__v8df) __C,
  2985. (__mmask8) __U, __R);
  2986. }
  2987. extern __inline __m512d
  2988. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2989. _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2990. __m512d __C, const int __R)
  2991. {
  2992. return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
  2993. (__v8df) __B,
  2994. (__v8df) __C,
  2995. (__mmask8) __U, __R);
  2996. }
  2997. extern __inline __m512
  2998. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2999. _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  3000. {
  3001. return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
  3002. (__v16sf) __B,
  3003. (__v16sf) __C,
  3004. (__mmask16) -1, __R);
  3005. }
  3006. extern __inline __m512
  3007. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3008. _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  3009. __m512 __C, const int __R)
  3010. {
  3011. return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
  3012. (__v16sf) __B,
  3013. (__v16sf) __C,
  3014. (__mmask16) __U, __R);
  3015. }
  3016. extern __inline __m512
  3017. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3018. _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
  3019. __mmask16 __U, const int __R)
  3020. {
  3021. return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
  3022. (__v16sf) __B,
  3023. (__v16sf) __C,
  3024. (__mmask16) __U, __R);
  3025. }
  3026. extern __inline __m512
  3027. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3028. _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  3029. __m512 __C, const int __R)
  3030. {
  3031. return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
  3032. (__v16sf) __B,
  3033. (__v16sf) __C,
  3034. (__mmask16) __U, __R);
  3035. }
  3036. extern __inline __m512d
  3037. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3038. _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  3039. {
  3040. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  3041. (__v8df) __B,
  3042. (__v8df) __C,
  3043. (__mmask8) -1, __R);
  3044. }
  3045. extern __inline __m512d
  3046. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3047. _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  3048. __m512d __C, const int __R)
  3049. {
  3050. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  3051. (__v8df) __B,
  3052. (__v8df) __C,
  3053. (__mmask8) __U, __R);
  3054. }
  3055. extern __inline __m512d
  3056. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3057. _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
  3058. __mmask8 __U, const int __R)
  3059. {
  3060. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
  3061. (__v8df) __B,
  3062. (__v8df) __C,
  3063. (__mmask8) __U, __R);
  3064. }
  3065. extern __inline __m512d
  3066. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3067. _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  3068. __m512d __C, const int __R)
  3069. {
  3070. return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
  3071. (__v8df) __B,
  3072. (__v8df) __C,
  3073. (__mmask8) __U, __R);
  3074. }
  3075. extern __inline __m512
  3076. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3077. _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  3078. {
  3079. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  3080. (__v16sf) __B,
  3081. (__v16sf) __C,
  3082. (__mmask16) -1, __R);
  3083. }
  3084. extern __inline __m512
  3085. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3086. _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  3087. __m512 __C, const int __R)
  3088. {
  3089. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  3090. (__v16sf) __B,
  3091. (__v16sf) __C,
  3092. (__mmask16) __U, __R);
  3093. }
  3094. extern __inline __m512
  3095. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3096. _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
  3097. __mmask16 __U, const int __R)
  3098. {
  3099. return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
  3100. (__v16sf) __B,
  3101. (__v16sf) __C,
  3102. (__mmask16) __U, __R);
  3103. }
  3104. extern __inline __m512
  3105. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3106. _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  3107. __m512 __C, const int __R)
  3108. {
  3109. return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
  3110. (__v16sf) __B,
  3111. (__v16sf) __C,
  3112. (__mmask16) __U, __R);
  3113. }
  3114. extern __inline __m512d
  3115. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3116. _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  3117. {
  3118. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  3119. (__v8df) __B,
  3120. -(__v8df) __C,
  3121. (__mmask8) -1, __R);
  3122. }
  3123. extern __inline __m512d
  3124. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3125. _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  3126. __m512d __C, const int __R)
  3127. {
  3128. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  3129. (__v8df) __B,
  3130. -(__v8df) __C,
  3131. (__mmask8) __U, __R);
  3132. }
  3133. extern __inline __m512d
  3134. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3135. _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
  3136. __mmask8 __U, const int __R)
  3137. {
  3138. return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
  3139. (__v8df) __B,
  3140. (__v8df) __C,
  3141. (__mmask8) __U, __R);
  3142. }
  3143. extern __inline __m512d
  3144. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3145. _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  3146. __m512d __C, const int __R)
  3147. {
  3148. return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
  3149. (__v8df) __B,
  3150. -(__v8df) __C,
  3151. (__mmask8) __U, __R);
  3152. }
  3153. extern __inline __m512
  3154. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3155. _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  3156. {
  3157. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  3158. (__v16sf) __B,
  3159. -(__v16sf) __C,
  3160. (__mmask16) -1, __R);
  3161. }
  3162. extern __inline __m512
  3163. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3164. _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  3165. __m512 __C, const int __R)
  3166. {
  3167. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  3168. (__v16sf) __B,
  3169. -(__v16sf) __C,
  3170. (__mmask16) __U, __R);
  3171. }
  3172. extern __inline __m512
  3173. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3174. _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
  3175. __mmask16 __U, const int __R)
  3176. {
  3177. return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
  3178. (__v16sf) __B,
  3179. (__v16sf) __C,
  3180. (__mmask16) __U, __R);
  3181. }
  3182. extern __inline __m512
  3183. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3184. _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  3185. __m512 __C, const int __R)
  3186. {
  3187. return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
  3188. (__v16sf) __B,
  3189. -(__v16sf) __C,
  3190. (__mmask16) __U, __R);
  3191. }
  3192. extern __inline __m512d
  3193. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3194. _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  3195. {
  3196. return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
  3197. (__v8df) __B,
  3198. (__v8df) __C,
  3199. (__mmask8) -1, __R);
  3200. }
  3201. extern __inline __m512d
  3202. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3203. _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  3204. __m512d __C, const int __R)
  3205. {
  3206. return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
  3207. (__v8df) __B,
  3208. (__v8df) __C,
  3209. (__mmask8) __U, __R);
  3210. }
  3211. extern __inline __m512d
  3212. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3213. _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
  3214. __mmask8 __U, const int __R)
  3215. {
  3216. return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
  3217. (__v8df) __B,
  3218. (__v8df) __C,
  3219. (__mmask8) __U, __R);
  3220. }
  3221. extern __inline __m512d
  3222. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3223. _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  3224. __m512d __C, const int __R)
  3225. {
  3226. return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
  3227. (__v8df) __B,
  3228. (__v8df) __C,
  3229. (__mmask8) __U, __R);
  3230. }
  3231. extern __inline __m512
  3232. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3233. _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  3234. {
  3235. return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
  3236. (__v16sf) __B,
  3237. (__v16sf) __C,
  3238. (__mmask16) -1, __R);
  3239. }
  3240. extern __inline __m512
  3241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3242. _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  3243. __m512 __C, const int __R)
  3244. {
  3245. return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
  3246. (__v16sf) __B,
  3247. (__v16sf) __C,
  3248. (__mmask16) __U, __R);
  3249. }
  3250. extern __inline __m512
  3251. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3252. _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
  3253. __mmask16 __U, const int __R)
  3254. {
  3255. return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
  3256. (__v16sf) __B,
  3257. (__v16sf) __C,
  3258. (__mmask16) __U, __R);
  3259. }
  3260. extern __inline __m512
  3261. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3262. _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  3263. __m512 __C, const int __R)
  3264. {
  3265. return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
  3266. (__v16sf) __B,
  3267. (__v16sf) __C,
  3268. (__mmask16) __U, __R);
  3269. }
  3270. extern __inline __m512d
  3271. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3272. _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  3273. {
  3274. return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
  3275. (__v8df) __B,
  3276. (__v8df) __C,
  3277. (__mmask8) -1, __R);
  3278. }
  3279. extern __inline __m512d
  3280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3281. _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  3282. __m512d __C, const int __R)
  3283. {
  3284. return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
  3285. (__v8df) __B,
  3286. (__v8df) __C,
  3287. (__mmask8) __U, __R);
  3288. }
  3289. extern __inline __m512d
  3290. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3291. _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
  3292. __mmask8 __U, const int __R)
  3293. {
  3294. return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
  3295. (__v8df) __B,
  3296. (__v8df) __C,
  3297. (__mmask8) __U, __R);
  3298. }
  3299. extern __inline __m512d
  3300. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3301. _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  3302. __m512d __C, const int __R)
  3303. {
  3304. return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
  3305. (__v8df) __B,
  3306. (__v8df) __C,
  3307. (__mmask8) __U, __R);
  3308. }
  3309. extern __inline __m512
  3310. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3311. _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  3312. {
  3313. return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
  3314. (__v16sf) __B,
  3315. (__v16sf) __C,
  3316. (__mmask16) -1, __R);
  3317. }
  3318. extern __inline __m512
  3319. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3320. _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  3321. __m512 __C, const int __R)
  3322. {
  3323. return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
  3324. (__v16sf) __B,
  3325. (__v16sf) __C,
  3326. (__mmask16) __U, __R);
  3327. }
  3328. extern __inline __m512
  3329. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3330. _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
  3331. __mmask16 __U, const int __R)
  3332. {
  3333. return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
  3334. (__v16sf) __B,
  3335. (__v16sf) __C,
  3336. (__mmask16) __U, __R);
  3337. }
  3338. extern __inline __m512
  3339. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3340. _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  3341. __m512 __C, const int __R)
  3342. {
  3343. return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
  3344. (__v16sf) __B,
  3345. (__v16sf) __C,
  3346. (__mmask16) __U, __R);
  3347. }
  3348. #else
  3349. #define _mm512_fmadd_round_pd(A, B, C, R) \
  3350. (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
  3351. #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
  3352. (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
  3353. #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
  3354. (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
  3355. #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
  3356. (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
  3357. #define _mm512_fmadd_round_ps(A, B, C, R) \
  3358. (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
  3359. #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
  3360. (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
  3361. #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
  3362. (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
  3363. #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
  3364. (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
  3365. #define _mm512_fmsub_round_pd(A, B, C, R) \
  3366. (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R)
  3367. #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
  3368. (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R)
  3369. #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
  3370. (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
  3371. #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
  3372. (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R)
  3373. #define _mm512_fmsub_round_ps(A, B, C, R) \
  3374. (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R)
  3375. #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
  3376. (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R)
  3377. #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
  3378. (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
  3379. #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
  3380. (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R)
  3381. #define _mm512_fmaddsub_round_pd(A, B, C, R) \
  3382. (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
  3383. #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
  3384. (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, U, R)
  3385. #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
  3386. (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
  3387. #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
  3388. (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
  3389. #define _mm512_fmaddsub_round_ps(A, B, C, R) \
  3390. (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
  3391. #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
  3392. (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
  3393. #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
  3394. (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
  3395. #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
  3396. (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
  3397. #define _mm512_fmsubadd_round_pd(A, B, C, R) \
  3398. (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
  3399. #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
  3400. (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
  3401. #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
  3402. (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
  3403. #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
  3404. (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
  3405. #define _mm512_fmsubadd_round_ps(A, B, C, R) \
  3406. (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
  3407. #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
  3408. (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
  3409. #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
  3410. (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
  3411. #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
  3412. (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
  3413. #define _mm512_fnmadd_round_pd(A, B, C, R) \
  3414. (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R)
  3415. #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
  3416. (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R)
  3417. #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
  3418. (__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R)
  3419. #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
  3420. (__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R)
  3421. #define _mm512_fnmadd_round_ps(A, B, C, R) \
  3422. (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R)
  3423. #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
  3424. (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R)
  3425. #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
  3426. (__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R)
  3427. #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
  3428. (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
  3429. #define _mm512_fnmsub_round_pd(A, B, C, R) \
  3430. (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R)
  3431. #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
  3432. (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
  3433. #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
  3434. (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
  3435. #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
  3436. (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R)
  3437. #define _mm512_fnmsub_round_ps(A, B, C, R) \
  3438. (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R)
  3439. #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
  3440. (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
  3441. #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
  3442. (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
  3443. #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
  3444. (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R)
  3445. #endif
  3446. extern __inline __m512i
  3447. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3448. _mm512_abs_epi64 (__m512i __A)
  3449. {
  3450. return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
  3451. (__v8di)
  3452. _mm512_undefined_epi32 (),
  3453. (__mmask8) -1);
  3454. }
  3455. extern __inline __m512i
  3456. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3457. _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
  3458. {
  3459. return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
  3460. (__v8di) __W,
  3461. (__mmask8) __U);
  3462. }
  3463. extern __inline __m512i
  3464. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3465. _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
  3466. {
  3467. return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
  3468. (__v8di)
  3469. _mm512_setzero_si512 (),
  3470. (__mmask8) __U);
  3471. }
  3472. extern __inline __m512i
  3473. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3474. _mm512_abs_epi32 (__m512i __A)
  3475. {
  3476. return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
  3477. (__v16si)
  3478. _mm512_undefined_epi32 (),
  3479. (__mmask16) -1);
  3480. }
  3481. extern __inline __m512i
  3482. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3483. _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
  3484. {
  3485. return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
  3486. (__v16si) __W,
  3487. (__mmask16) __U);
  3488. }
  3489. extern __inline __m512i
  3490. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3491. _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
  3492. {
  3493. return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
  3494. (__v16si)
  3495. _mm512_setzero_si512 (),
  3496. (__mmask16) __U);
  3497. }
  3498. extern __inline __m512
  3499. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3500. _mm512_broadcastss_ps (__m128 __A)
  3501. {
  3502. return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
  3503. (__v16sf)
  3504. _mm512_undefined_ps (),
  3505. (__mmask16) -1);
  3506. }
  3507. extern __inline __m512
  3508. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3509. _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
  3510. {
  3511. return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
  3512. (__v16sf) __O, __M);
  3513. }
  3514. extern __inline __m512
  3515. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3516. _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
  3517. {
  3518. return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
  3519. (__v16sf)
  3520. _mm512_setzero_ps (),
  3521. __M);
  3522. }
  3523. extern __inline __m512d
  3524. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3525. _mm512_broadcastsd_pd (__m128d __A)
  3526. {
  3527. return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
  3528. (__v8df)
  3529. _mm512_undefined_pd (),
  3530. (__mmask8) -1);
  3531. }
  3532. extern __inline __m512d
  3533. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3534. _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
  3535. {
  3536. return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
  3537. (__v8df) __O, __M);
  3538. }
  3539. extern __inline __m512d
  3540. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3541. _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
  3542. {
  3543. return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
  3544. (__v8df)
  3545. _mm512_setzero_pd (),
  3546. __M);
  3547. }
  3548. extern __inline __m512i
  3549. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3550. _mm512_broadcastd_epi32 (__m128i __A)
  3551. {
  3552. return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
  3553. (__v16si)
  3554. _mm512_undefined_epi32 (),
  3555. (__mmask16) -1);
  3556. }
  3557. extern __inline __m512i
  3558. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3559. _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
  3560. {
  3561. return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
  3562. (__v16si) __O, __M);
  3563. }
  3564. extern __inline __m512i
  3565. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3566. _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
  3567. {
  3568. return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
  3569. (__v16si)
  3570. _mm512_setzero_si512 (),
  3571. __M);
  3572. }
  3573. extern __inline __m512i
  3574. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3575. _mm512_set1_epi32 (int __A)
  3576. {
  3577. return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
  3578. (__v16si)
  3579. _mm512_undefined_epi32 (),
  3580. (__mmask16)(-1));
  3581. }
  3582. extern __inline __m512i
  3583. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3584. _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
  3585. {
  3586. return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
  3587. __M);
  3588. }
  3589. extern __inline __m512i
  3590. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3591. _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
  3592. {
  3593. return (__m512i)
  3594. __builtin_ia32_pbroadcastd512_gpr_mask (__A,
  3595. (__v16si) _mm512_setzero_si512 (),
  3596. __M);
  3597. }
  3598. extern __inline __m512i
  3599. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3600. _mm512_broadcastq_epi64 (__m128i __A)
  3601. {
  3602. return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
  3603. (__v8di)
  3604. _mm512_undefined_epi32 (),
  3605. (__mmask8) -1);
  3606. }
  3607. extern __inline __m512i
  3608. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3609. _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
  3610. {
  3611. return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
  3612. (__v8di) __O, __M);
  3613. }
  3614. extern __inline __m512i
  3615. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3616. _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
  3617. {
  3618. return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
  3619. (__v8di)
  3620. _mm512_setzero_si512 (),
  3621. __M);
  3622. }
  3623. extern __inline __m512i
  3624. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3625. _mm512_set1_epi64 (long long __A)
  3626. {
  3627. return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
  3628. (__v8di)
  3629. _mm512_undefined_epi32 (),
  3630. (__mmask8)(-1));
  3631. }
  3632. extern __inline __m512i
  3633. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3634. _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
  3635. {
  3636. return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
  3637. __M);
  3638. }
  3639. extern __inline __m512i
  3640. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3641. _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
  3642. {
  3643. return (__m512i)
  3644. __builtin_ia32_pbroadcastq512_gpr_mask (__A,
  3645. (__v8di) _mm512_setzero_si512 (),
  3646. __M);
  3647. }
  3648. extern __inline __m512
  3649. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3650. _mm512_broadcast_f32x4 (__m128 __A)
  3651. {
  3652. return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
  3653. (__v16sf)
  3654. _mm512_undefined_ps (),
  3655. (__mmask16) -1);
  3656. }
  3657. extern __inline __m512
  3658. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3659. _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
  3660. {
  3661. return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
  3662. (__v16sf) __O,
  3663. __M);
  3664. }
  3665. extern __inline __m512
  3666. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3667. _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
  3668. {
  3669. return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
  3670. (__v16sf)
  3671. _mm512_setzero_ps (),
  3672. __M);
  3673. }
  3674. extern __inline __m512i
  3675. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3676. _mm512_broadcast_i32x4 (__m128i __A)
  3677. {
  3678. return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
  3679. (__v16si)
  3680. _mm512_undefined_epi32 (),
  3681. (__mmask16) -1);
  3682. }
  3683. extern __inline __m512i
  3684. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3685. _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
  3686. {
  3687. return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
  3688. (__v16si) __O,
  3689. __M);
  3690. }
  3691. extern __inline __m512i
  3692. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3693. _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
  3694. {
  3695. return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
  3696. (__v16si)
  3697. _mm512_setzero_si512 (),
  3698. __M);
  3699. }
  3700. extern __inline __m512d
  3701. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3702. _mm512_broadcast_f64x4 (__m256d __A)
  3703. {
  3704. return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
  3705. (__v8df)
  3706. _mm512_undefined_pd (),
  3707. (__mmask8) -1);
  3708. }
  3709. extern __inline __m512d
  3710. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3711. _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
  3712. {
  3713. return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
  3714. (__v8df) __O,
  3715. __M);
  3716. }
  3717. extern __inline __m512d
  3718. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3719. _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
  3720. {
  3721. return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
  3722. (__v8df)
  3723. _mm512_setzero_pd (),
  3724. __M);
  3725. }
  3726. extern __inline __m512i
  3727. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3728. _mm512_broadcast_i64x4 (__m256i __A)
  3729. {
  3730. return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
  3731. (__v8di)
  3732. _mm512_undefined_epi32 (),
  3733. (__mmask8) -1);
  3734. }
  3735. extern __inline __m512i
  3736. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3737. _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
  3738. {
  3739. return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
  3740. (__v8di) __O,
  3741. __M);
  3742. }
  3743. extern __inline __m512i
  3744. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3745. _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
  3746. {
  3747. return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
  3748. (__v8di)
  3749. _mm512_setzero_si512 (),
  3750. __M);
  3751. }
  3752. typedef enum
  3753. {
  3754. _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
  3755. _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
  3756. _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
  3757. _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
  3758. _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
  3759. _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
  3760. _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
  3761. _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
  3762. _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
  3763. _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
  3764. _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
  3765. _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
  3766. _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
  3767. _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
  3768. _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
  3769. _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
  3770. _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
  3771. _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
  3772. _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
  3773. _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
  3774. _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
  3775. _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
  3776. _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
  3777. _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
  3778. _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
  3779. _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
  3780. _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
  3781. _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
  3782. _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
  3783. _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
  3784. _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
  3785. _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
  3786. _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
  3787. _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
  3788. _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
  3789. _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
  3790. _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
  3791. _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
  3792. _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
  3793. _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
  3794. _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
  3795. _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
  3796. _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
  3797. _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
  3798. _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
  3799. _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
  3800. _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
  3801. _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
  3802. _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
  3803. _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
  3804. _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
  3805. _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
  3806. _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
  3807. _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
  3808. _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
  3809. _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
  3810. _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
  3811. _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
  3812. _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
  3813. _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
  3814. _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
  3815. _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
  3816. _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
  3817. _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
  3818. _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
  3819. _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
  3820. _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
  3821. _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
  3822. _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
  3823. _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
  3824. _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
  3825. _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
  3826. _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
  3827. _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
  3828. _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
  3829. _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
  3830. _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
  3831. _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
  3832. _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
  3833. _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
  3834. _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
  3835. _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
  3836. _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
  3837. _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
  3838. _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
  3839. _MM_PERM_DDDD = 0xFF
  3840. } _MM_PERM_ENUM;
  3841. #ifdef __OPTIMIZE__
  3842. extern __inline __m512i
  3843. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3844. _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
  3845. {
  3846. return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
  3847. __mask,
  3848. (__v16si)
  3849. _mm512_undefined_epi32 (),
  3850. (__mmask16) -1);
  3851. }
  3852. extern __inline __m512i
  3853. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3854. _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  3855. _MM_PERM_ENUM __mask)
  3856. {
  3857. return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
  3858. __mask,
  3859. (__v16si) __W,
  3860. (__mmask16) __U);
  3861. }
  3862. extern __inline __m512i
  3863. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3864. _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
  3865. {
  3866. return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
  3867. __mask,
  3868. (__v16si)
  3869. _mm512_setzero_si512 (),
  3870. (__mmask16) __U);
  3871. }
  3872. extern __inline __m512i
  3873. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3874. _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
  3875. {
  3876. return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
  3877. (__v8di) __B, __imm,
  3878. (__v8di)
  3879. _mm512_undefined_epi32 (),
  3880. (__mmask8) -1);
  3881. }
  3882. extern __inline __m512i
  3883. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3884. _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
  3885. __m512i __B, const int __imm)
  3886. {
  3887. return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
  3888. (__v8di) __B, __imm,
  3889. (__v8di) __W,
  3890. (__mmask8) __U);
  3891. }
  3892. extern __inline __m512i
  3893. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3894. _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
  3895. const int __imm)
  3896. {
  3897. return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
  3898. (__v8di) __B, __imm,
  3899. (__v8di)
  3900. _mm512_setzero_si512 (),
  3901. (__mmask8) __U);
  3902. }
  3903. extern __inline __m512i
  3904. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3905. _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
  3906. {
  3907. return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
  3908. (__v16si) __B,
  3909. __imm,
  3910. (__v16si)
  3911. _mm512_undefined_epi32 (),
  3912. (__mmask16) -1);
  3913. }
  3914. extern __inline __m512i
  3915. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3916. _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
  3917. __m512i __B, const int __imm)
  3918. {
  3919. return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
  3920. (__v16si) __B,
  3921. __imm,
  3922. (__v16si) __W,
  3923. (__mmask16) __U);
  3924. }
  3925. extern __inline __m512i
  3926. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3927. _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
  3928. const int __imm)
  3929. {
  3930. return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
  3931. (__v16si) __B,
  3932. __imm,
  3933. (__v16si)
  3934. _mm512_setzero_si512 (),
  3935. (__mmask16) __U);
  3936. }
  3937. extern __inline __m512d
  3938. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3939. _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
  3940. {
  3941. return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
  3942. (__v8df) __B, __imm,
  3943. (__v8df)
  3944. _mm512_undefined_pd (),
  3945. (__mmask8) -1);
  3946. }
  3947. extern __inline __m512d
  3948. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3949. _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
  3950. __m512d __B, const int __imm)
  3951. {
  3952. return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
  3953. (__v8df) __B, __imm,
  3954. (__v8df) __W,
  3955. (__mmask8) __U);
  3956. }
  3957. extern __inline __m512d
  3958. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3959. _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
  3960. const int __imm)
  3961. {
  3962. return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
  3963. (__v8df) __B, __imm,
  3964. (__v8df)
  3965. _mm512_setzero_pd (),
  3966. (__mmask8) __U);
  3967. }
  3968. extern __inline __m512
  3969. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3970. _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
  3971. {
  3972. return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
  3973. (__v16sf) __B, __imm,
  3974. (__v16sf)
  3975. _mm512_undefined_ps (),
  3976. (__mmask16) -1);
  3977. }
  3978. extern __inline __m512
  3979. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3980. _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
  3981. __m512 __B, const int __imm)
  3982. {
  3983. return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
  3984. (__v16sf) __B, __imm,
  3985. (__v16sf) __W,
  3986. (__mmask16) __U);
  3987. }
  3988. extern __inline __m512
  3989. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3990. _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
  3991. const int __imm)
  3992. {
  3993. return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
  3994. (__v16sf) __B, __imm,
  3995. (__v16sf)
  3996. _mm512_setzero_ps (),
  3997. (__mmask16) __U);
  3998. }
  3999. #else
  4000. #define _mm512_shuffle_epi32(X, C) \
  4001. ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
  4002. (__v16si)(__m512i)_mm512_undefined_epi32 (),\
  4003. (__mmask16)-1))
  4004. #define _mm512_mask_shuffle_epi32(W, U, X, C) \
  4005. ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
  4006. (__v16si)(__m512i)(W),\
  4007. (__mmask16)(U)))
  4008. #define _mm512_maskz_shuffle_epi32(U, X, C) \
  4009. ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
  4010. (__v16si)(__m512i)_mm512_setzero_si512 (),\
  4011. (__mmask16)(U)))
  4012. #define _mm512_shuffle_i64x2(X, Y, C) \
  4013. ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
  4014. (__v8di)(__m512i)(Y), (int)(C),\
  4015. (__v8di)(__m512i)_mm512_undefined_epi32 (),\
  4016. (__mmask8)-1))
  4017. #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
  4018. ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
  4019. (__v8di)(__m512i)(Y), (int)(C),\
  4020. (__v8di)(__m512i)(W),\
  4021. (__mmask8)(U)))
  4022. #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
  4023. ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
  4024. (__v8di)(__m512i)(Y), (int)(C),\
  4025. (__v8di)(__m512i)_mm512_setzero_si512 (),\
  4026. (__mmask8)(U)))
  4027. #define _mm512_shuffle_i32x4(X, Y, C) \
  4028. ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
  4029. (__v16si)(__m512i)(Y), (int)(C),\
  4030. (__v16si)(__m512i)_mm512_undefined_epi32 (),\
  4031. (__mmask16)-1))
  4032. #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
  4033. ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
  4034. (__v16si)(__m512i)(Y), (int)(C),\
  4035. (__v16si)(__m512i)(W),\
  4036. (__mmask16)(U)))
  4037. #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
  4038. ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
  4039. (__v16si)(__m512i)(Y), (int)(C),\
  4040. (__v16si)(__m512i)_mm512_setzero_si512 (),\
  4041. (__mmask16)(U)))
  4042. #define _mm512_shuffle_f64x2(X, Y, C) \
  4043. ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
  4044. (__v8df)(__m512d)(Y), (int)(C),\
  4045. (__v8df)(__m512d)_mm512_undefined_pd(),\
  4046. (__mmask8)-1))
  4047. #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
  4048. ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
  4049. (__v8df)(__m512d)(Y), (int)(C),\
  4050. (__v8df)(__m512d)(W),\
  4051. (__mmask8)(U)))
  4052. #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
  4053. ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
  4054. (__v8df)(__m512d)(Y), (int)(C),\
  4055. (__v8df)(__m512d)_mm512_setzero_pd(),\
  4056. (__mmask8)(U)))
  4057. #define _mm512_shuffle_f32x4(X, Y, C) \
  4058. ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
  4059. (__v16sf)(__m512)(Y), (int)(C),\
  4060. (__v16sf)(__m512)_mm512_undefined_ps(),\
  4061. (__mmask16)-1))
  4062. #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
  4063. ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
  4064. (__v16sf)(__m512)(Y), (int)(C),\
  4065. (__v16sf)(__m512)(W),\
  4066. (__mmask16)(U)))
  4067. #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
  4068. ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
  4069. (__v16sf)(__m512)(Y), (int)(C),\
  4070. (__v16sf)(__m512)_mm512_setzero_ps(),\
  4071. (__mmask16)(U)))
  4072. #endif
  4073. extern __inline __m512i
  4074. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4075. _mm512_rolv_epi32 (__m512i __A, __m512i __B)
  4076. {
  4077. return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
  4078. (__v16si) __B,
  4079. (__v16si)
  4080. _mm512_undefined_epi32 (),
  4081. (__mmask16) -1);
  4082. }
  4083. extern __inline __m512i
  4084. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4085. _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  4086. {
  4087. return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
  4088. (__v16si) __B,
  4089. (__v16si) __W,
  4090. (__mmask16) __U);
  4091. }
  4092. extern __inline __m512i
  4093. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4094. _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  4095. {
  4096. return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
  4097. (__v16si) __B,
  4098. (__v16si)
  4099. _mm512_setzero_si512 (),
  4100. (__mmask16) __U);
  4101. }
  4102. extern __inline __m512i
  4103. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4104. _mm512_rorv_epi32 (__m512i __A, __m512i __B)
  4105. {
  4106. return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
  4107. (__v16si) __B,
  4108. (__v16si)
  4109. _mm512_undefined_epi32 (),
  4110. (__mmask16) -1);
  4111. }
  4112. extern __inline __m512i
  4113. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4114. _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  4115. {
  4116. return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
  4117. (__v16si) __B,
  4118. (__v16si) __W,
  4119. (__mmask16) __U);
  4120. }
  4121. extern __inline __m512i
  4122. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4123. _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  4124. {
  4125. return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
  4126. (__v16si) __B,
  4127. (__v16si)
  4128. _mm512_setzero_si512 (),
  4129. (__mmask16) __U);
  4130. }
  4131. extern __inline __m512i
  4132. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4133. _mm512_rolv_epi64 (__m512i __A, __m512i __B)
  4134. {
  4135. return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
  4136. (__v8di) __B,
  4137. (__v8di)
  4138. _mm512_undefined_epi32 (),
  4139. (__mmask8) -1);
  4140. }
  4141. extern __inline __m512i
  4142. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4143. _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  4144. {
  4145. return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
  4146. (__v8di) __B,
  4147. (__v8di) __W,
  4148. (__mmask8) __U);
  4149. }
  4150. extern __inline __m512i
  4151. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4152. _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  4153. {
  4154. return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
  4155. (__v8di) __B,
  4156. (__v8di)
  4157. _mm512_setzero_si512 (),
  4158. (__mmask8) __U);
  4159. }
  4160. extern __inline __m512i
  4161. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4162. _mm512_rorv_epi64 (__m512i __A, __m512i __B)
  4163. {
  4164. return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
  4165. (__v8di) __B,
  4166. (__v8di)
  4167. _mm512_undefined_epi32 (),
  4168. (__mmask8) -1);
  4169. }
  4170. extern __inline __m512i
  4171. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4172. _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  4173. {
  4174. return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
  4175. (__v8di) __B,
  4176. (__v8di) __W,
  4177. (__mmask8) __U);
  4178. }
  4179. extern __inline __m512i
  4180. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4181. _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  4182. {
  4183. return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
  4184. (__v8di) __B,
  4185. (__v8di)
  4186. _mm512_setzero_si512 (),
  4187. (__mmask8) __U);
  4188. }
  4189. #ifdef __OPTIMIZE__
  4190. extern __inline __m256i
  4191. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4192. _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
  4193. {
  4194. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  4195. (__v8si)
  4196. _mm256_undefined_si256 (),
  4197. (__mmask8) -1, __R);
  4198. }
  4199. extern __inline __m256i
  4200. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4201. _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
  4202. const int __R)
  4203. {
  4204. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  4205. (__v8si) __W,
  4206. (__mmask8) __U, __R);
  4207. }
  4208. extern __inline __m256i
  4209. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4210. _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
  4211. {
  4212. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  4213. (__v8si)
  4214. _mm256_setzero_si256 (),
  4215. (__mmask8) __U, __R);
  4216. }
  4217. extern __inline __m256i
  4218. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4219. _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
  4220. {
  4221. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  4222. (__v8si)
  4223. _mm256_undefined_si256 (),
  4224. (__mmask8) -1, __R);
  4225. }
  4226. extern __inline __m256i
  4227. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4228. _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
  4229. const int __R)
  4230. {
  4231. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  4232. (__v8si) __W,
  4233. (__mmask8) __U, __R);
  4234. }
  4235. extern __inline __m256i
  4236. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4237. _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
  4238. {
  4239. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  4240. (__v8si)
  4241. _mm256_setzero_si256 (),
  4242. (__mmask8) __U, __R);
  4243. }
  4244. #else
  4245. #define _mm512_cvtt_roundpd_epi32(A, B) \
  4246. ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
  4247. #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
  4248. ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
  4249. #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
  4250. ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
  4251. #define _mm512_cvtt_roundpd_epu32(A, B) \
  4252. ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
  4253. #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
  4254. ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
  4255. #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
  4256. ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
  4257. #endif
  4258. #ifdef __OPTIMIZE__
  4259. extern __inline __m256i
  4260. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4261. _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
  4262. {
  4263. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  4264. (__v8si)
  4265. _mm256_undefined_si256 (),
  4266. (__mmask8) -1, __R);
  4267. }
  4268. extern __inline __m256i
  4269. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4270. _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
  4271. const int __R)
  4272. {
  4273. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  4274. (__v8si) __W,
  4275. (__mmask8) __U, __R);
  4276. }
  4277. extern __inline __m256i
  4278. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4279. _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
  4280. {
  4281. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  4282. (__v8si)
  4283. _mm256_setzero_si256 (),
  4284. (__mmask8) __U, __R);
  4285. }
  4286. extern __inline __m256i
  4287. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4288. _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
  4289. {
  4290. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  4291. (__v8si)
  4292. _mm256_undefined_si256 (),
  4293. (__mmask8) -1, __R);
  4294. }
  4295. extern __inline __m256i
  4296. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4297. _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
  4298. const int __R)
  4299. {
  4300. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  4301. (__v8si) __W,
  4302. (__mmask8) __U, __R);
  4303. }
  4304. extern __inline __m256i
  4305. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4306. _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
  4307. {
  4308. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  4309. (__v8si)
  4310. _mm256_setzero_si256 (),
  4311. (__mmask8) __U, __R);
  4312. }
  4313. #else
  4314. #define _mm512_cvt_roundpd_epi32(A, B) \
  4315. ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
  4316. #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
  4317. ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
  4318. #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
  4319. ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
  4320. #define _mm512_cvt_roundpd_epu32(A, B) \
  4321. ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
  4322. #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
  4323. ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
  4324. #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
  4325. ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
  4326. #endif
  4327. #ifdef __OPTIMIZE__
  4328. extern __inline __m512i
  4329. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4330. _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
  4331. {
  4332. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  4333. (__v16si)
  4334. _mm512_undefined_epi32 (),
  4335. (__mmask16) -1, __R);
  4336. }
  4337. extern __inline __m512i
  4338. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4339. _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
  4340. const int __R)
  4341. {
  4342. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  4343. (__v16si) __W,
  4344. (__mmask16) __U, __R);
  4345. }
  4346. extern __inline __m512i
  4347. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4348. _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
  4349. {
  4350. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  4351. (__v16si)
  4352. _mm512_setzero_si512 (),
  4353. (__mmask16) __U, __R);
  4354. }
  4355. extern __inline __m512i
  4356. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4357. _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
  4358. {
  4359. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  4360. (__v16si)
  4361. _mm512_undefined_epi32 (),
  4362. (__mmask16) -1, __R);
  4363. }
  4364. extern __inline __m512i
  4365. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4366. _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
  4367. const int __R)
  4368. {
  4369. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  4370. (__v16si) __W,
  4371. (__mmask16) __U, __R);
  4372. }
  4373. extern __inline __m512i
  4374. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4375. _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
  4376. {
  4377. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  4378. (__v16si)
  4379. _mm512_setzero_si512 (),
  4380. (__mmask16) __U, __R);
  4381. }
  4382. #else
  4383. #define _mm512_cvtt_roundps_epi32(A, B) \
  4384. ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
  4385. #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
  4386. ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
  4387. #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
  4388. ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
  4389. #define _mm512_cvtt_roundps_epu32(A, B) \
  4390. ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
  4391. #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
  4392. ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
  4393. #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
  4394. ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
  4395. #endif
  4396. #ifdef __OPTIMIZE__
  4397. extern __inline __m512i
  4398. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4399. _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
  4400. {
  4401. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  4402. (__v16si)
  4403. _mm512_undefined_epi32 (),
  4404. (__mmask16) -1, __R);
  4405. }
  4406. extern __inline __m512i
  4407. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4408. _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
  4409. const int __R)
  4410. {
  4411. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  4412. (__v16si) __W,
  4413. (__mmask16) __U, __R);
  4414. }
  4415. extern __inline __m512i
  4416. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4417. _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
  4418. {
  4419. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  4420. (__v16si)
  4421. _mm512_setzero_si512 (),
  4422. (__mmask16) __U, __R);
  4423. }
  4424. extern __inline __m512i
  4425. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4426. _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
  4427. {
  4428. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  4429. (__v16si)
  4430. _mm512_undefined_epi32 (),
  4431. (__mmask16) -1, __R);
  4432. }
  4433. extern __inline __m512i
  4434. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4435. _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
  4436. const int __R)
  4437. {
  4438. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  4439. (__v16si) __W,
  4440. (__mmask16) __U, __R);
  4441. }
  4442. extern __inline __m512i
  4443. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4444. _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
  4445. {
  4446. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  4447. (__v16si)
  4448. _mm512_setzero_si512 (),
  4449. (__mmask16) __U, __R);
  4450. }
  4451. #else
  4452. #define _mm512_cvt_roundps_epi32(A, B) \
  4453. ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
  4454. #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
  4455. ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
  4456. #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
  4457. ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
  4458. #define _mm512_cvt_roundps_epu32(A, B) \
  4459. ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
  4460. #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
  4461. ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
  4462. #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
  4463. ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
  4464. #endif
  4465. extern __inline __m128d
  4466. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4467. _mm_cvtu32_sd (__m128d __A, unsigned __B)
  4468. {
  4469. return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
  4470. }
  4471. #ifdef __x86_64__
  4472. #ifdef __OPTIMIZE__
  4473. extern __inline __m128d
  4474. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4475. _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
  4476. {
  4477. return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
  4478. }
  4479. extern __inline __m128d
  4480. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4481. _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
  4482. {
  4483. return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
  4484. }
  4485. extern __inline __m128d
  4486. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4487. _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
  4488. {
  4489. return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
  4490. }
  4491. #else
  4492. #define _mm_cvt_roundu64_sd(A, B, C) \
  4493. (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
  4494. #define _mm_cvt_roundi64_sd(A, B, C) \
  4495. (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
  4496. #define _mm_cvt_roundsi64_sd(A, B, C) \
  4497. (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
  4498. #endif
  4499. #endif
  4500. #ifdef __OPTIMIZE__
  4501. extern __inline __m128
  4502. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4503. _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
  4504. {
  4505. return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
  4506. }
  4507. extern __inline __m128
  4508. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4509. _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
  4510. {
  4511. return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
  4512. }
  4513. extern __inline __m128
  4514. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4515. _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
  4516. {
  4517. return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
  4518. }
  4519. #else
  4520. #define _mm_cvt_roundu32_ss(A, B, C) \
  4521. (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
  4522. #define _mm_cvt_roundi32_ss(A, B, C) \
  4523. (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
  4524. #define _mm_cvt_roundsi32_ss(A, B, C) \
  4525. (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
  4526. #endif
  4527. #ifdef __x86_64__
  4528. #ifdef __OPTIMIZE__
  4529. extern __inline __m128
  4530. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4531. _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
  4532. {
  4533. return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
  4534. }
  4535. extern __inline __m128
  4536. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4537. _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
  4538. {
  4539. return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
  4540. }
  4541. extern __inline __m128
  4542. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4543. _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
  4544. {
  4545. return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
  4546. }
  4547. #else
  4548. #define _mm_cvt_roundu64_ss(A, B, C) \
  4549. (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
  4550. #define _mm_cvt_roundi64_ss(A, B, C) \
  4551. (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
  4552. #define _mm_cvt_roundsi64_ss(A, B, C) \
  4553. (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
  4554. #endif
  4555. #endif
  4556. extern __inline __m128i
  4557. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4558. _mm512_cvtepi32_epi8 (__m512i __A)
  4559. {
  4560. return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
  4561. (__v16qi)
  4562. _mm_undefined_si128 (),
  4563. (__mmask16) -1);
  4564. }
  4565. extern __inline void
  4566. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4567. _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
  4568. {
  4569. __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
  4570. }
  4571. extern __inline __m128i
  4572. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4573. _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
  4574. {
  4575. return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
  4576. (__v16qi) __O, __M);
  4577. }
  4578. extern __inline __m128i
  4579. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4580. _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
  4581. {
  4582. return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
  4583. (__v16qi)
  4584. _mm_setzero_si128 (),
  4585. __M);
  4586. }
  4587. extern __inline __m128i
  4588. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4589. _mm512_cvtsepi32_epi8 (__m512i __A)
  4590. {
  4591. return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
  4592. (__v16qi)
  4593. _mm_undefined_si128 (),
  4594. (__mmask16) -1);
  4595. }
  4596. extern __inline void
  4597. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4598. _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
  4599. {
  4600. __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
  4601. }
  4602. extern __inline __m128i
  4603. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4604. _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
  4605. {
  4606. return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
  4607. (__v16qi) __O, __M);
  4608. }
  4609. extern __inline __m128i
  4610. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4611. _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
  4612. {
  4613. return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
  4614. (__v16qi)
  4615. _mm_setzero_si128 (),
  4616. __M);
  4617. }
  4618. extern __inline __m128i
  4619. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4620. _mm512_cvtusepi32_epi8 (__m512i __A)
  4621. {
  4622. return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
  4623. (__v16qi)
  4624. _mm_undefined_si128 (),
  4625. (__mmask16) -1);
  4626. }
  4627. extern __inline void
  4628. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4629. _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
  4630. {
  4631. __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
  4632. }
  4633. extern __inline __m128i
  4634. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4635. _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
  4636. {
  4637. return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
  4638. (__v16qi) __O,
  4639. __M);
  4640. }
  4641. extern __inline __m128i
  4642. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4643. _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
  4644. {
  4645. return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
  4646. (__v16qi)
  4647. _mm_setzero_si128 (),
  4648. __M);
  4649. }
  4650. extern __inline __m256i
  4651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4652. _mm512_cvtepi32_epi16 (__m512i __A)
  4653. {
  4654. return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
  4655. (__v16hi)
  4656. _mm256_undefined_si256 (),
  4657. (__mmask16) -1);
  4658. }
  4659. extern __inline void
  4660. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4661. _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
  4662. {
  4663. __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
  4664. }
  4665. extern __inline __m256i
  4666. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4667. _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
  4668. {
  4669. return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
  4670. (__v16hi) __O, __M);
  4671. }
  4672. extern __inline __m256i
  4673. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4674. _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
  4675. {
  4676. return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
  4677. (__v16hi)
  4678. _mm256_setzero_si256 (),
  4679. __M);
  4680. }
  4681. extern __inline __m256i
  4682. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4683. _mm512_cvtsepi32_epi16 (__m512i __A)
  4684. {
  4685. return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
  4686. (__v16hi)
  4687. _mm256_undefined_si256 (),
  4688. (__mmask16) -1);
  4689. }
  4690. extern __inline void
  4691. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4692. _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
  4693. {
  4694. __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
  4695. }
  4696. extern __inline __m256i
  4697. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4698. _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
  4699. {
  4700. return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
  4701. (__v16hi) __O, __M);
  4702. }
  4703. extern __inline __m256i
  4704. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4705. _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
  4706. {
  4707. return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
  4708. (__v16hi)
  4709. _mm256_setzero_si256 (),
  4710. __M);
  4711. }
  4712. extern __inline __m256i
  4713. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4714. _mm512_cvtusepi32_epi16 (__m512i __A)
  4715. {
  4716. return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
  4717. (__v16hi)
  4718. _mm256_undefined_si256 (),
  4719. (__mmask16) -1);
  4720. }
  4721. extern __inline void
  4722. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4723. _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
  4724. {
  4725. __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
  4726. }
  4727. extern __inline __m256i
  4728. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4729. _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
  4730. {
  4731. return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
  4732. (__v16hi) __O,
  4733. __M);
  4734. }
  4735. extern __inline __m256i
  4736. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4737. _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
  4738. {
  4739. return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
  4740. (__v16hi)
  4741. _mm256_setzero_si256 (),
  4742. __M);
  4743. }
  4744. extern __inline __m256i
  4745. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4746. _mm512_cvtepi64_epi32 (__m512i __A)
  4747. {
  4748. return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
  4749. (__v8si)
  4750. _mm256_undefined_si256 (),
  4751. (__mmask8) -1);
  4752. }
  4753. extern __inline void
  4754. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4755. _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
  4756. {
  4757. __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
  4758. }
  4759. extern __inline __m256i
  4760. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4761. _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
  4762. {
  4763. return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
  4764. (__v8si) __O, __M);
  4765. }
  4766. extern __inline __m256i
  4767. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4768. _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
  4769. {
  4770. return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
  4771. (__v8si)
  4772. _mm256_setzero_si256 (),
  4773. __M);
  4774. }
  4775. extern __inline __m256i
  4776. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4777. _mm512_cvtsepi64_epi32 (__m512i __A)
  4778. {
  4779. return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
  4780. (__v8si)
  4781. _mm256_undefined_si256 (),
  4782. (__mmask8) -1);
  4783. }
  4784. extern __inline void
  4785. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4786. _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
  4787. {
  4788. __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
  4789. }
  4790. extern __inline __m256i
  4791. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4792. _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
  4793. {
  4794. return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
  4795. (__v8si) __O, __M);
  4796. }
  4797. extern __inline __m256i
  4798. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4799. _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
  4800. {
  4801. return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
  4802. (__v8si)
  4803. _mm256_setzero_si256 (),
  4804. __M);
  4805. }
  4806. extern __inline __m256i
  4807. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4808. _mm512_cvtusepi64_epi32 (__m512i __A)
  4809. {
  4810. return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
  4811. (__v8si)
  4812. _mm256_undefined_si256 (),
  4813. (__mmask8) -1);
  4814. }
  4815. extern __inline void
  4816. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4817. _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
  4818. {
  4819. __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
  4820. }
  4821. extern __inline __m256i
  4822. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4823. _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
  4824. {
  4825. return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
  4826. (__v8si) __O, __M);
  4827. }
  4828. extern __inline __m256i
  4829. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4830. _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
  4831. {
  4832. return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
  4833. (__v8si)
  4834. _mm256_setzero_si256 (),
  4835. __M);
  4836. }
  4837. extern __inline __m128i
  4838. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4839. _mm512_cvtepi64_epi16 (__m512i __A)
  4840. {
  4841. return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
  4842. (__v8hi)
  4843. _mm_undefined_si128 (),
  4844. (__mmask8) -1);
  4845. }
  4846. extern __inline void
  4847. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4848. _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
  4849. {
  4850. __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
  4851. }
  4852. extern __inline __m128i
  4853. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4854. _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
  4855. {
  4856. return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
  4857. (__v8hi) __O, __M);
  4858. }
  4859. extern __inline __m128i
  4860. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4861. _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
  4862. {
  4863. return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
  4864. (__v8hi)
  4865. _mm_setzero_si128 (),
  4866. __M);
  4867. }
  4868. extern __inline __m128i
  4869. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4870. _mm512_cvtsepi64_epi16 (__m512i __A)
  4871. {
  4872. return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
  4873. (__v8hi)
  4874. _mm_undefined_si128 (),
  4875. (__mmask8) -1);
  4876. }
  4877. extern __inline void
  4878. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4879. _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
  4880. {
  4881. __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
  4882. }
  4883. extern __inline __m128i
  4884. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4885. _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
  4886. {
  4887. return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
  4888. (__v8hi) __O, __M);
  4889. }
  4890. extern __inline __m128i
  4891. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4892. _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
  4893. {
  4894. return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
  4895. (__v8hi)
  4896. _mm_setzero_si128 (),
  4897. __M);
  4898. }
  4899. extern __inline __m128i
  4900. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4901. _mm512_cvtusepi64_epi16 (__m512i __A)
  4902. {
  4903. return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
  4904. (__v8hi)
  4905. _mm_undefined_si128 (),
  4906. (__mmask8) -1);
  4907. }
  4908. extern __inline void
  4909. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4910. _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
  4911. {
  4912. __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
  4913. }
  4914. extern __inline __m128i
  4915. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4916. _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
  4917. {
  4918. return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
  4919. (__v8hi) __O, __M);
  4920. }
  4921. extern __inline __m128i
  4922. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4923. _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
  4924. {
  4925. return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
  4926. (__v8hi)
  4927. _mm_setzero_si128 (),
  4928. __M);
  4929. }
  4930. extern __inline __m128i
  4931. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4932. _mm512_cvtepi64_epi8 (__m512i __A)
  4933. {
  4934. return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
  4935. (__v16qi)
  4936. _mm_undefined_si128 (),
  4937. (__mmask8) -1);
  4938. }
  4939. extern __inline void
  4940. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4941. _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
  4942. {
  4943. __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
  4944. }
  4945. extern __inline __m128i
  4946. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4947. _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
  4948. {
  4949. return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
  4950. (__v16qi) __O, __M);
  4951. }
  4952. extern __inline __m128i
  4953. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4954. _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
  4955. {
  4956. return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
  4957. (__v16qi)
  4958. _mm_setzero_si128 (),
  4959. __M);
  4960. }
  4961. extern __inline __m128i
  4962. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4963. _mm512_cvtsepi64_epi8 (__m512i __A)
  4964. {
  4965. return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
  4966. (__v16qi)
  4967. _mm_undefined_si128 (),
  4968. (__mmask8) -1);
  4969. }
  4970. extern __inline void
  4971. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4972. _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
  4973. {
  4974. __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
  4975. }
  4976. extern __inline __m128i
  4977. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4978. _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
  4979. {
  4980. return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
  4981. (__v16qi) __O, __M);
  4982. }
  4983. extern __inline __m128i
  4984. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4985. _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
  4986. {
  4987. return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
  4988. (__v16qi)
  4989. _mm_setzero_si128 (),
  4990. __M);
  4991. }
  4992. extern __inline __m128i
  4993. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4994. _mm512_cvtusepi64_epi8 (__m512i __A)
  4995. {
  4996. return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
  4997. (__v16qi)
  4998. _mm_undefined_si128 (),
  4999. (__mmask8) -1);
  5000. }
  5001. extern __inline void
  5002. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5003. _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
  5004. {
  5005. __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
  5006. }
  5007. extern __inline __m128i
  5008. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5009. _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
  5010. {
  5011. return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
  5012. (__v16qi) __O,
  5013. __M);
  5014. }
  5015. extern __inline __m128i
  5016. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5017. _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
  5018. {
  5019. return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
  5020. (__v16qi)
  5021. _mm_setzero_si128 (),
  5022. __M);
  5023. }
  5024. extern __inline __m512d
  5025. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5026. _mm512_cvtepi32_pd (__m256i __A)
  5027. {
  5028. return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
  5029. (__v8df)
  5030. _mm512_undefined_pd (),
  5031. (__mmask8) -1);
  5032. }
  5033. extern __inline __m512d
  5034. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5035. _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
  5036. {
  5037. return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
  5038. (__v8df) __W,
  5039. (__mmask8) __U);
  5040. }
  5041. extern __inline __m512d
  5042. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5043. _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
  5044. {
  5045. return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
  5046. (__v8df)
  5047. _mm512_setzero_pd (),
  5048. (__mmask8) __U);
  5049. }
  5050. extern __inline __m512d
  5051. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5052. _mm512_cvtepu32_pd (__m256i __A)
  5053. {
  5054. return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
  5055. (__v8df)
  5056. _mm512_undefined_pd (),
  5057. (__mmask8) -1);
  5058. }
  5059. extern __inline __m512d
  5060. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5061. _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
  5062. {
  5063. return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
  5064. (__v8df) __W,
  5065. (__mmask8) __U);
  5066. }
  5067. extern __inline __m512d
  5068. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5069. _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
  5070. {
  5071. return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
  5072. (__v8df)
  5073. _mm512_setzero_pd (),
  5074. (__mmask8) __U);
  5075. }
  5076. #ifdef __OPTIMIZE__
  5077. extern __inline __m512
  5078. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5079. _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
  5080. {
  5081. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  5082. (__v16sf)
  5083. _mm512_undefined_ps (),
  5084. (__mmask16) -1, __R);
  5085. }
  5086. extern __inline __m512
  5087. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5088. _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
  5089. const int __R)
  5090. {
  5091. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  5092. (__v16sf) __W,
  5093. (__mmask16) __U, __R);
  5094. }
  5095. extern __inline __m512
  5096. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5097. _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
  5098. {
  5099. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  5100. (__v16sf)
  5101. _mm512_setzero_ps (),
  5102. (__mmask16) __U, __R);
  5103. }
  5104. extern __inline __m512
  5105. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5106. _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
  5107. {
  5108. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  5109. (__v16sf)
  5110. _mm512_undefined_ps (),
  5111. (__mmask16) -1, __R);
  5112. }
  5113. extern __inline __m512
  5114. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5115. _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
  5116. const int __R)
  5117. {
  5118. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  5119. (__v16sf) __W,
  5120. (__mmask16) __U, __R);
  5121. }
  5122. extern __inline __m512
  5123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5124. _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
  5125. {
  5126. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  5127. (__v16sf)
  5128. _mm512_setzero_ps (),
  5129. (__mmask16) __U, __R);
  5130. }
  5131. #else
  5132. #define _mm512_cvt_roundepi32_ps(A, B) \
  5133. (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
  5134. #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
  5135. (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
  5136. #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
  5137. (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
  5138. #define _mm512_cvt_roundepu32_ps(A, B) \
  5139. (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
  5140. #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
  5141. (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
  5142. #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
  5143. (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
  5144. #endif
  5145. #ifdef __OPTIMIZE__
  5146. extern __inline __m256d
  5147. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5148. _mm512_extractf64x4_pd (__m512d __A, const int __imm)
  5149. {
  5150. return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
  5151. __imm,
  5152. (__v4df)
  5153. _mm256_undefined_pd (),
  5154. (__mmask8) -1);
  5155. }
  5156. extern __inline __m256d
  5157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5158. _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
  5159. const int __imm)
  5160. {
  5161. return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
  5162. __imm,
  5163. (__v4df) __W,
  5164. (__mmask8) __U);
  5165. }
  5166. extern __inline __m256d
  5167. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5168. _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
  5169. {
  5170. return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
  5171. __imm,
  5172. (__v4df)
  5173. _mm256_setzero_pd (),
  5174. (__mmask8) __U);
  5175. }
  5176. extern __inline __m128
  5177. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5178. _mm512_extractf32x4_ps (__m512 __A, const int __imm)
  5179. {
  5180. return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
  5181. __imm,
  5182. (__v4sf)
  5183. _mm_undefined_ps (),
  5184. (__mmask8) -1);
  5185. }
  5186. extern __inline __m128
  5187. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5188. _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
  5189. const int __imm)
  5190. {
  5191. return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
  5192. __imm,
  5193. (__v4sf) __W,
  5194. (__mmask8) __U);
  5195. }
  5196. extern __inline __m128
  5197. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5198. _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
  5199. {
  5200. return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
  5201. __imm,
  5202. (__v4sf)
  5203. _mm_setzero_ps (),
  5204. (__mmask8) __U);
  5205. }
  5206. extern __inline __m256i
  5207. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5208. _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
  5209. {
  5210. return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
  5211. __imm,
  5212. (__v4di)
  5213. _mm256_undefined_si256 (),
  5214. (__mmask8) -1);
  5215. }
  5216. extern __inline __m256i
  5217. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5218. _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
  5219. const int __imm)
  5220. {
  5221. return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
  5222. __imm,
  5223. (__v4di) __W,
  5224. (__mmask8) __U);
  5225. }
  5226. extern __inline __m256i
  5227. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5228. _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
  5229. {
  5230. return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
  5231. __imm,
  5232. (__v4di)
  5233. _mm256_setzero_si256 (),
  5234. (__mmask8) __U);
  5235. }
  5236. extern __inline __m128i
  5237. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5238. _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
  5239. {
  5240. return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
  5241. __imm,
  5242. (__v4si)
  5243. _mm_undefined_si128 (),
  5244. (__mmask8) -1);
  5245. }
  5246. extern __inline __m128i
  5247. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5248. _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
  5249. const int __imm)
  5250. {
  5251. return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
  5252. __imm,
  5253. (__v4si) __W,
  5254. (__mmask8) __U);
  5255. }
  5256. extern __inline __m128i
  5257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5258. _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
  5259. {
  5260. return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
  5261. __imm,
  5262. (__v4si)
  5263. _mm_setzero_si128 (),
  5264. (__mmask8) __U);
  5265. }
  5266. #else
  5267. #define _mm512_extractf64x4_pd(X, C) \
  5268. ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
  5269. (int) (C),\
  5270. (__v4df)(__m256d)_mm256_undefined_pd(),\
  5271. (__mmask8)-1))
  5272. #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
  5273. ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
  5274. (int) (C),\
  5275. (__v4df)(__m256d)(W),\
  5276. (__mmask8)(U)))
  5277. #define _mm512_maskz_extractf64x4_pd(U, X, C) \
  5278. ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
  5279. (int) (C),\
  5280. (__v4df)(__m256d)_mm256_setzero_pd(),\
  5281. (__mmask8)(U)))
  5282. #define _mm512_extractf32x4_ps(X, C) \
  5283. ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
  5284. (int) (C),\
  5285. (__v4sf)(__m128)_mm_undefined_ps(),\
  5286. (__mmask8)-1))
  5287. #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
  5288. ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
  5289. (int) (C),\
  5290. (__v4sf)(__m128)(W),\
  5291. (__mmask8)(U)))
  5292. #define _mm512_maskz_extractf32x4_ps(U, X, C) \
  5293. ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
  5294. (int) (C),\
  5295. (__v4sf)(__m128)_mm_setzero_ps(),\
  5296. (__mmask8)(U)))
  5297. #define _mm512_extracti64x4_epi64(X, C) \
  5298. ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
  5299. (int) (C),\
  5300. (__v4di)(__m256i)_mm256_undefined_si256 (),\
  5301. (__mmask8)-1))
  5302. #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
  5303. ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
  5304. (int) (C),\
  5305. (__v4di)(__m256i)(W),\
  5306. (__mmask8)(U)))
  5307. #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
  5308. ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
  5309. (int) (C),\
  5310. (__v4di)(__m256i)_mm256_setzero_si256 (),\
  5311. (__mmask8)(U)))
  5312. #define _mm512_extracti32x4_epi32(X, C) \
  5313. ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
  5314. (int) (C),\
  5315. (__v4si)(__m128i)_mm_undefined_si128 (),\
  5316. (__mmask8)-1))
  5317. #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
  5318. ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
  5319. (int) (C),\
  5320. (__v4si)(__m128i)(W),\
  5321. (__mmask8)(U)))
  5322. #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
  5323. ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
  5324. (int) (C),\
  5325. (__v4si)(__m128i)_mm_setzero_si128 (),\
  5326. (__mmask8)(U)))
  5327. #endif
  5328. #ifdef __OPTIMIZE__
  5329. extern __inline __m512i
  5330. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5331. _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
  5332. {
  5333. return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
  5334. (__v4si) __B,
  5335. __imm,
  5336. (__v16si) __A, -1);
  5337. }
  5338. extern __inline __m512
  5339. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5340. _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
  5341. {
  5342. return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
  5343. (__v4sf) __B,
  5344. __imm,
  5345. (__v16sf) __A, -1);
  5346. }
  5347. extern __inline __m512i
  5348. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5349. _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
  5350. {
  5351. return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
  5352. (__v4di) __B,
  5353. __imm,
  5354. (__v8di)
  5355. _mm512_undefined_epi32 (),
  5356. (__mmask8) -1);
  5357. }
  5358. extern __inline __m512i
  5359. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5360. _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
  5361. __m256i __B, const int __imm)
  5362. {
  5363. return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
  5364. (__v4di) __B,
  5365. __imm,
  5366. (__v8di) __W,
  5367. (__mmask8) __U);
  5368. }
  5369. extern __inline __m512i
  5370. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5371. _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
  5372. const int __imm)
  5373. {
  5374. return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
  5375. (__v4di) __B,
  5376. __imm,
  5377. (__v8di)
  5378. _mm512_setzero_si512 (),
  5379. (__mmask8) __U);
  5380. }
  5381. extern __inline __m512d
  5382. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5383. _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
  5384. {
  5385. return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
  5386. (__v4df) __B,
  5387. __imm,
  5388. (__v8df)
  5389. _mm512_undefined_pd (),
  5390. (__mmask8) -1);
  5391. }
  5392. extern __inline __m512d
  5393. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5394. _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
  5395. __m256d __B, const int __imm)
  5396. {
  5397. return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
  5398. (__v4df) __B,
  5399. __imm,
  5400. (__v8df) __W,
  5401. (__mmask8) __U);
  5402. }
  5403. extern __inline __m512d
  5404. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5405. _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
  5406. const int __imm)
  5407. {
  5408. return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
  5409. (__v4df) __B,
  5410. __imm,
  5411. (__v8df)
  5412. _mm512_setzero_pd (),
  5413. (__mmask8) __U);
  5414. }
  5415. #else
  5416. #define _mm512_insertf32x4(X, Y, C) \
  5417. ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
  5418. (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
  5419. #define _mm512_inserti32x4(X, Y, C) \
  5420. ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
  5421. (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
  5422. #define _mm512_insertf64x4(X, Y, C) \
  5423. ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
  5424. (__v4df)(__m256d) (Y), (int) (C), \
  5425. (__v8df)(__m512d)_mm512_undefined_pd(), \
  5426. (__mmask8)-1))
  5427. #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
  5428. ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
  5429. (__v4df)(__m256d) (Y), (int) (C), \
  5430. (__v8df)(__m512d)(W), \
  5431. (__mmask8)(U)))
  5432. #define _mm512_maskz_insertf64x4(U, X, Y, C) \
  5433. ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
  5434. (__v4df)(__m256d) (Y), (int) (C), \
  5435. (__v8df)(__m512d)_mm512_setzero_pd(), \
  5436. (__mmask8)(U)))
  5437. #define _mm512_inserti64x4(X, Y, C) \
  5438. ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
  5439. (__v4di)(__m256i) (Y), (int) (C), \
  5440. (__v8di)(__m512i)_mm512_undefined_epi32 (), \
  5441. (__mmask8)-1))
  5442. #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
  5443. ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
  5444. (__v4di)(__m256i) (Y), (int) (C),\
  5445. (__v8di)(__m512i)(W),\
  5446. (__mmask8)(U)))
  5447. #define _mm512_maskz_inserti64x4(U, X, Y, C) \
  5448. ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
  5449. (__v4di)(__m256i) (Y), (int) (C), \
  5450. (__v8di)(__m512i)_mm512_setzero_si512 (), \
  5451. (__mmask8)(U)))
  5452. #endif
  5453. extern __inline __m512d
  5454. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5455. _mm512_loadu_pd (void const *__P)
  5456. {
  5457. return *(__m512d_u *)__P;
  5458. }
  5459. extern __inline __m512d
  5460. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5461. _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
  5462. {
  5463. return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
  5464. (__v8df) __W,
  5465. (__mmask8) __U);
  5466. }
  5467. extern __inline __m512d
  5468. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5469. _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
  5470. {
  5471. return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
  5472. (__v8df)
  5473. _mm512_setzero_pd (),
  5474. (__mmask8) __U);
  5475. }
  5476. extern __inline void
  5477. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5478. _mm512_storeu_pd (void *__P, __m512d __A)
  5479. {
  5480. *(__m512d_u *)__P = __A;
  5481. }
  5482. extern __inline void
  5483. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5484. _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
  5485. {
  5486. __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
  5487. (__mmask8) __U);
  5488. }
  5489. extern __inline __m512
  5490. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5491. _mm512_loadu_ps (void const *__P)
  5492. {
  5493. return *(__m512_u *)__P;
  5494. }
  5495. extern __inline __m512
  5496. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5497. _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
  5498. {
  5499. return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
  5500. (__v16sf) __W,
  5501. (__mmask16) __U);
  5502. }
  5503. extern __inline __m512
  5504. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5505. _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
  5506. {
  5507. return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
  5508. (__v16sf)
  5509. _mm512_setzero_ps (),
  5510. (__mmask16) __U);
  5511. }
  5512. extern __inline void
  5513. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5514. _mm512_storeu_ps (void *__P, __m512 __A)
  5515. {
  5516. *(__m512_u *)__P = __A;
  5517. }
  5518. extern __inline void
  5519. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5520. _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
  5521. {
  5522. __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
  5523. (__mmask16) __U);
  5524. }
  5525. extern __inline __m128
  5526. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5527. _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
  5528. {
  5529. return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
  5530. }
  5531. extern __inline __m128
  5532. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5533. _mm_maskz_load_ss (__mmask8 __U, const float *__P)
  5534. {
  5535. return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (),
  5536. __U);
  5537. }
  5538. extern __inline __m128d
  5539. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5540. _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
  5541. {
  5542. return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
  5543. }
  5544. extern __inline __m128d
  5545. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5546. _mm_maskz_load_sd (__mmask8 __U, const double *__P)
  5547. {
  5548. return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (),
  5549. __U);
  5550. }
  5551. extern __inline __m128
  5552. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5553. _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  5554. {
  5555. return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
  5556. (__v4sf) __W, __U);
  5557. }
  5558. extern __inline __m128
  5559. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5560. _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
  5561. {
  5562. return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
  5563. (__v4sf) _mm_setzero_ps (), __U);
  5564. }
  5565. extern __inline __m128d
  5566. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5567. _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  5568. {
  5569. return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
  5570. (__v2df) __W, __U);
  5571. }
  5572. extern __inline __m128d
  5573. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5574. _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
  5575. {
  5576. return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
  5577. (__v2df) _mm_setzero_pd (),
  5578. __U);
  5579. }
  5580. extern __inline void
  5581. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5582. _mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
  5583. {
  5584. __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
  5585. }
  5586. extern __inline void
  5587. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5588. _mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
  5589. {
  5590. __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
  5591. }
  5592. extern __inline __m512i
  5593. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5594. _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
  5595. {
  5596. return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
  5597. (__v8di) __W,
  5598. (__mmask8) __U);
  5599. }
  5600. extern __inline __m512i
  5601. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5602. _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
  5603. {
  5604. return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
  5605. (__v8di)
  5606. _mm512_setzero_si512 (),
  5607. (__mmask8) __U);
  5608. }
  5609. extern __inline void
  5610. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5611. _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
  5612. {
  5613. __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
  5614. (__mmask8) __U);
  5615. }
  5616. extern __inline __m512i
  5617. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5618. _mm512_loadu_si512 (void const *__P)
  5619. {
  5620. return *(__m512i_u *)__P;
  5621. }
  5622. extern __inline __m512i
  5623. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5624. _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
  5625. {
  5626. return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
  5627. (__v16si) __W,
  5628. (__mmask16) __U);
  5629. }
  5630. extern __inline __m512i
  5631. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5632. _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
  5633. {
  5634. return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
  5635. (__v16si)
  5636. _mm512_setzero_si512 (),
  5637. (__mmask16) __U);
  5638. }
  5639. extern __inline void
  5640. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5641. _mm512_storeu_si512 (void *__P, __m512i __A)
  5642. {
  5643. *(__m512i_u *)__P = __A;
  5644. }
  5645. extern __inline void
  5646. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5647. _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
  5648. {
  5649. __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
  5650. (__mmask16) __U);
  5651. }
  5652. extern __inline __m512d
  5653. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5654. _mm512_permutevar_pd (__m512d __A, __m512i __C)
  5655. {
  5656. return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
  5657. (__v8di) __C,
  5658. (__v8df)
  5659. _mm512_undefined_pd (),
  5660. (__mmask8) -1);
  5661. }
  5662. extern __inline __m512d
  5663. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5664. _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
  5665. {
  5666. return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
  5667. (__v8di) __C,
  5668. (__v8df) __W,
  5669. (__mmask8) __U);
  5670. }
  5671. extern __inline __m512d
  5672. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5673. _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
  5674. {
  5675. return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
  5676. (__v8di) __C,
  5677. (__v8df)
  5678. _mm512_setzero_pd (),
  5679. (__mmask8) __U);
  5680. }
  5681. extern __inline __m512
  5682. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5683. _mm512_permutevar_ps (__m512 __A, __m512i __C)
  5684. {
  5685. return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
  5686. (__v16si) __C,
  5687. (__v16sf)
  5688. _mm512_undefined_ps (),
  5689. (__mmask16) -1);
  5690. }
  5691. extern __inline __m512
  5692. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5693. _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
  5694. {
  5695. return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
  5696. (__v16si) __C,
  5697. (__v16sf) __W,
  5698. (__mmask16) __U);
  5699. }
  5700. extern __inline __m512
  5701. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5702. _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
  5703. {
  5704. return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
  5705. (__v16si) __C,
  5706. (__v16sf)
  5707. _mm512_setzero_ps (),
  5708. (__mmask16) __U);
  5709. }
  5710. extern __inline __m512i
  5711. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5712. _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
  5713. {
  5714. return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
  5715. /* idx */ ,
  5716. (__v8di) __A,
  5717. (__v8di) __B,
  5718. (__mmask8) -1);
  5719. }
  5720. extern __inline __m512i
  5721. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5722. _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
  5723. __m512i __B)
  5724. {
  5725. return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
  5726. /* idx */ ,
  5727. (__v8di) __A,
  5728. (__v8di) __B,
  5729. (__mmask8) __U);
  5730. }
  5731. extern __inline __m512i
  5732. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5733. _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
  5734. __mmask8 __U, __m512i __B)
  5735. {
  5736. return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
  5737. (__v8di) __I
  5738. /* idx */ ,
  5739. (__v8di) __B,
  5740. (__mmask8) __U);
  5741. }
  5742. extern __inline __m512i
  5743. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5744. _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
  5745. __m512i __I, __m512i __B)
  5746. {
  5747. return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
  5748. /* idx */ ,
  5749. (__v8di) __A,
  5750. (__v8di) __B,
  5751. (__mmask8) __U);
  5752. }
  5753. extern __inline __m512i
  5754. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5755. _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
  5756. {
  5757. return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
  5758. /* idx */ ,
  5759. (__v16si) __A,
  5760. (__v16si) __B,
  5761. (__mmask16) -1);
  5762. }
  5763. extern __inline __m512i
  5764. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5765. _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
  5766. __m512i __I, __m512i __B)
  5767. {
  5768. return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
  5769. /* idx */ ,
  5770. (__v16si) __A,
  5771. (__v16si) __B,
  5772. (__mmask16) __U);
  5773. }
  5774. extern __inline __m512i
  5775. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5776. _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
  5777. __mmask16 __U, __m512i __B)
  5778. {
  5779. return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
  5780. (__v16si) __I
  5781. /* idx */ ,
  5782. (__v16si) __B,
  5783. (__mmask16) __U);
  5784. }
  5785. extern __inline __m512i
  5786. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5787. _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
  5788. __m512i __I, __m512i __B)
  5789. {
  5790. return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
  5791. /* idx */ ,
  5792. (__v16si) __A,
  5793. (__v16si) __B,
  5794. (__mmask16) __U);
  5795. }
  5796. extern __inline __m512d
  5797. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5798. _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
  5799. {
  5800. return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
  5801. /* idx */ ,
  5802. (__v8df) __A,
  5803. (__v8df) __B,
  5804. (__mmask8) -1);
  5805. }
  5806. extern __inline __m512d
  5807. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5808. _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
  5809. __m512d __B)
  5810. {
  5811. return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
  5812. /* idx */ ,
  5813. (__v8df) __A,
  5814. (__v8df) __B,
  5815. (__mmask8) __U);
  5816. }
  5817. extern __inline __m512d
  5818. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5819. _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
  5820. __m512d __B)
  5821. {
  5822. return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
  5823. (__v8di) __I
  5824. /* idx */ ,
  5825. (__v8df) __B,
  5826. (__mmask8) __U);
  5827. }
  5828. extern __inline __m512d
  5829. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5830. _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
  5831. __m512d __B)
  5832. {
  5833. return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
  5834. /* idx */ ,
  5835. (__v8df) __A,
  5836. (__v8df) __B,
  5837. (__mmask8) __U);
  5838. }
  5839. extern __inline __m512
  5840. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5841. _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
  5842. {
  5843. return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
  5844. /* idx */ ,
  5845. (__v16sf) __A,
  5846. (__v16sf) __B,
  5847. (__mmask16) -1);
  5848. }
  5849. extern __inline __m512
  5850. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5851. _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
  5852. {
  5853. return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
  5854. /* idx */ ,
  5855. (__v16sf) __A,
  5856. (__v16sf) __B,
  5857. (__mmask16) __U);
  5858. }
  5859. extern __inline __m512
  5860. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5861. _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
  5862. __m512 __B)
  5863. {
  5864. return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
  5865. (__v16si) __I
  5866. /* idx */ ,
  5867. (__v16sf) __B,
  5868. (__mmask16) __U);
  5869. }
  5870. extern __inline __m512
  5871. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5872. _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
  5873. __m512 __B)
  5874. {
  5875. return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
  5876. /* idx */ ,
  5877. (__v16sf) __A,
  5878. (__v16sf) __B,
  5879. (__mmask16) __U);
  5880. }
  5881. #ifdef __OPTIMIZE__
  5882. extern __inline __m512d
  5883. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5884. _mm512_permute_pd (__m512d __X, const int __C)
  5885. {
  5886. return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
  5887. (__v8df)
  5888. _mm512_undefined_pd (),
  5889. (__mmask8) -1);
  5890. }
  5891. extern __inline __m512d
  5892. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5893. _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
  5894. {
  5895. return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
  5896. (__v8df) __W,
  5897. (__mmask8) __U);
  5898. }
  5899. extern __inline __m512d
  5900. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5901. _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
  5902. {
  5903. return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
  5904. (__v8df)
  5905. _mm512_setzero_pd (),
  5906. (__mmask8) __U);
  5907. }
  5908. extern __inline __m512
  5909. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5910. _mm512_permute_ps (__m512 __X, const int __C)
  5911. {
  5912. return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
  5913. (__v16sf)
  5914. _mm512_undefined_ps (),
  5915. (__mmask16) -1);
  5916. }
  5917. extern __inline __m512
  5918. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5919. _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
  5920. {
  5921. return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
  5922. (__v16sf) __W,
  5923. (__mmask16) __U);
  5924. }
  5925. extern __inline __m512
  5926. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5927. _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
  5928. {
  5929. return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
  5930. (__v16sf)
  5931. _mm512_setzero_ps (),
  5932. (__mmask16) __U);
  5933. }
  5934. #else
  5935. #define _mm512_permute_pd(X, C) \
  5936. ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
  5937. (__v8df)(__m512d)_mm512_undefined_pd(),\
  5938. (__mmask8)(-1)))
  5939. #define _mm512_mask_permute_pd(W, U, X, C) \
  5940. ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
  5941. (__v8df)(__m512d)(W), \
  5942. (__mmask8)(U)))
  5943. #define _mm512_maskz_permute_pd(U, X, C) \
  5944. ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
  5945. (__v8df)(__m512d)_mm512_setzero_pd(), \
  5946. (__mmask8)(U)))
  5947. #define _mm512_permute_ps(X, C) \
  5948. ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
  5949. (__v16sf)(__m512)_mm512_undefined_ps(),\
  5950. (__mmask16)(-1)))
  5951. #define _mm512_mask_permute_ps(W, U, X, C) \
  5952. ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
  5953. (__v16sf)(__m512)(W), \
  5954. (__mmask16)(U)))
  5955. #define _mm512_maskz_permute_ps(U, X, C) \
  5956. ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
  5957. (__v16sf)(__m512)_mm512_setzero_ps(), \
  5958. (__mmask16)(U)))
  5959. #endif
  5960. #ifdef __OPTIMIZE__
  5961. extern __inline __m512i
  5962. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5963. _mm512_permutex_epi64 (__m512i __X, const int __I)
  5964. {
  5965. return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
  5966. (__v8di)
  5967. _mm512_undefined_epi32 (),
  5968. (__mmask8) (-1));
  5969. }
  5970. extern __inline __m512i
  5971. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5972. _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
  5973. __m512i __X, const int __I)
  5974. {
  5975. return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
  5976. (__v8di) __W,
  5977. (__mmask8) __M);
  5978. }
  5979. extern __inline __m512i
  5980. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5981. _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
  5982. {
  5983. return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
  5984. (__v8di)
  5985. _mm512_setzero_si512 (),
  5986. (__mmask8) __M);
  5987. }
  5988. extern __inline __m512d
  5989. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5990. _mm512_permutex_pd (__m512d __X, const int __M)
  5991. {
  5992. return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
  5993. (__v8df)
  5994. _mm512_undefined_pd (),
  5995. (__mmask8) -1);
  5996. }
  5997. extern __inline __m512d
  5998. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5999. _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
  6000. {
  6001. return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
  6002. (__v8df) __W,
  6003. (__mmask8) __U);
  6004. }
  6005. extern __inline __m512d
  6006. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6007. _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
  6008. {
  6009. return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
  6010. (__v8df)
  6011. _mm512_setzero_pd (),
  6012. (__mmask8) __U);
  6013. }
  6014. #else
  6015. #define _mm512_permutex_pd(X, M) \
  6016. ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
  6017. (__v8df)(__m512d)_mm512_undefined_pd(),\
  6018. (__mmask8)-1))
  6019. #define _mm512_mask_permutex_pd(W, U, X, M) \
  6020. ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
  6021. (__v8df)(__m512d)(W), (__mmask8)(U)))
  6022. #define _mm512_maskz_permutex_pd(U, X, M) \
  6023. ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
  6024. (__v8df)(__m512d)_mm512_setzero_pd(),\
  6025. (__mmask8)(U)))
  6026. #define _mm512_permutex_epi64(X, I) \
  6027. ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
  6028. (int)(I), \
  6029. (__v8di)(__m512i) \
  6030. (_mm512_undefined_epi32 ()),\
  6031. (__mmask8)(-1)))
  6032. #define _mm512_maskz_permutex_epi64(M, X, I) \
  6033. ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
  6034. (int)(I), \
  6035. (__v8di)(__m512i) \
  6036. (_mm512_setzero_si512 ()),\
  6037. (__mmask8)(M)))
  6038. #define _mm512_mask_permutex_epi64(W, M, X, I) \
  6039. ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
  6040. (int)(I), \
  6041. (__v8di)(__m512i)(W), \
  6042. (__mmask8)(M)))
  6043. #endif
  6044. extern __inline __m512i
  6045. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6046. _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
  6047. {
  6048. return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
  6049. (__v8di) __X,
  6050. (__v8di)
  6051. _mm512_setzero_si512 (),
  6052. __M);
  6053. }
  6054. extern __inline __m512i
  6055. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6056. _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
  6057. {
  6058. return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
  6059. (__v8di) __X,
  6060. (__v8di)
  6061. _mm512_undefined_epi32 (),
  6062. (__mmask8) -1);
  6063. }
  6064. extern __inline __m512i
  6065. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6066. _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
  6067. __m512i __Y)
  6068. {
  6069. return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
  6070. (__v8di) __X,
  6071. (__v8di) __W,
  6072. __M);
  6073. }
  6074. extern __inline __m512i
  6075. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6076. _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
  6077. {
  6078. return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
  6079. (__v16si) __X,
  6080. (__v16si)
  6081. _mm512_setzero_si512 (),
  6082. __M);
  6083. }
  6084. extern __inline __m512i
  6085. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6086. _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
  6087. {
  6088. return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
  6089. (__v16si) __X,
  6090. (__v16si)
  6091. _mm512_undefined_epi32 (),
  6092. (__mmask16) -1);
  6093. }
  6094. extern __inline __m512i
  6095. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6096. _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
  6097. __m512i __Y)
  6098. {
  6099. return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
  6100. (__v16si) __X,
  6101. (__v16si) __W,
  6102. __M);
  6103. }
  6104. extern __inline __m512d
  6105. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6106. _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
  6107. {
  6108. return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
  6109. (__v8di) __X,
  6110. (__v8df)
  6111. _mm512_undefined_pd (),
  6112. (__mmask8) -1);
  6113. }
  6114. extern __inline __m512d
  6115. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6116. _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
  6117. {
  6118. return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
  6119. (__v8di) __X,
  6120. (__v8df) __W,
  6121. (__mmask8) __U);
  6122. }
  6123. extern __inline __m512d
  6124. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6125. _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
  6126. {
  6127. return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
  6128. (__v8di) __X,
  6129. (__v8df)
  6130. _mm512_setzero_pd (),
  6131. (__mmask8) __U);
  6132. }
  6133. extern __inline __m512
  6134. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6135. _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
  6136. {
  6137. return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
  6138. (__v16si) __X,
  6139. (__v16sf)
  6140. _mm512_undefined_ps (),
  6141. (__mmask16) -1);
  6142. }
  6143. extern __inline __m512
  6144. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6145. _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
  6146. {
  6147. return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
  6148. (__v16si) __X,
  6149. (__v16sf) __W,
  6150. (__mmask16) __U);
  6151. }
  6152. extern __inline __m512
  6153. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6154. _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
  6155. {
  6156. return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
  6157. (__v16si) __X,
  6158. (__v16sf)
  6159. _mm512_setzero_ps (),
  6160. (__mmask16) __U);
  6161. }
  6162. #ifdef __OPTIMIZE__
  6163. extern __inline __m512
  6164. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6165. _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
  6166. {
  6167. return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
  6168. (__v16sf) __V, __imm,
  6169. (__v16sf)
  6170. _mm512_undefined_ps (),
  6171. (__mmask16) -1);
  6172. }
  6173. extern __inline __m512
  6174. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6175. _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
  6176. __m512 __V, const int __imm)
  6177. {
  6178. return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
  6179. (__v16sf) __V, __imm,
  6180. (__v16sf) __W,
  6181. (__mmask16) __U);
  6182. }
  6183. extern __inline __m512
  6184. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6185. _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
  6186. {
  6187. return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
  6188. (__v16sf) __V, __imm,
  6189. (__v16sf)
  6190. _mm512_setzero_ps (),
  6191. (__mmask16) __U);
  6192. }
  6193. extern __inline __m512d
  6194. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6195. _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
  6196. {
  6197. return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
  6198. (__v8df) __V, __imm,
  6199. (__v8df)
  6200. _mm512_undefined_pd (),
  6201. (__mmask8) -1);
  6202. }
  6203. extern __inline __m512d
  6204. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6205. _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
  6206. __m512d __V, const int __imm)
  6207. {
  6208. return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
  6209. (__v8df) __V, __imm,
  6210. (__v8df) __W,
  6211. (__mmask8) __U);
  6212. }
  6213. extern __inline __m512d
  6214. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6215. _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
  6216. const int __imm)
  6217. {
  6218. return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
  6219. (__v8df) __V, __imm,
  6220. (__v8df)
  6221. _mm512_setzero_pd (),
  6222. (__mmask8) __U);
  6223. }
  6224. extern __inline __m512d
  6225. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6226. _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
  6227. const int __imm, const int __R)
  6228. {
  6229. return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
  6230. (__v8df) __B,
  6231. (__v8di) __C,
  6232. __imm,
  6233. (__mmask8) -1, __R);
  6234. }
  6235. extern __inline __m512d
  6236. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6237. _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  6238. __m512i __C, const int __imm, const int __R)
  6239. {
  6240. return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
  6241. (__v8df) __B,
  6242. (__v8di) __C,
  6243. __imm,
  6244. (__mmask8) __U, __R);
  6245. }
  6246. extern __inline __m512d
  6247. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6248. _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  6249. __m512i __C, const int __imm, const int __R)
  6250. {
  6251. return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
  6252. (__v8df) __B,
  6253. (__v8di) __C,
  6254. __imm,
  6255. (__mmask8) __U, __R);
  6256. }
  6257. extern __inline __m512
  6258. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6259. _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
  6260. const int __imm, const int __R)
  6261. {
  6262. return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
  6263. (__v16sf) __B,
  6264. (__v16si) __C,
  6265. __imm,
  6266. (__mmask16) -1, __R);
  6267. }
  6268. extern __inline __m512
  6269. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6270. _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  6271. __m512i __C, const int __imm, const int __R)
  6272. {
  6273. return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
  6274. (__v16sf) __B,
  6275. (__v16si) __C,
  6276. __imm,
  6277. (__mmask16) __U, __R);
  6278. }
  6279. extern __inline __m512
  6280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6281. _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  6282. __m512i __C, const int __imm, const int __R)
  6283. {
  6284. return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
  6285. (__v16sf) __B,
  6286. (__v16si) __C,
  6287. __imm,
  6288. (__mmask16) __U, __R);
  6289. }
  6290. extern __inline __m128d
  6291. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6292. _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
  6293. const int __imm, const int __R)
  6294. {
  6295. return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
  6296. (__v2df) __B,
  6297. (__v2di) __C, __imm,
  6298. (__mmask8) -1, __R);
  6299. }
  6300. extern __inline __m128d
  6301. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6302. _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
  6303. __m128i __C, const int __imm, const int __R)
  6304. {
  6305. return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
  6306. (__v2df) __B,
  6307. (__v2di) __C, __imm,
  6308. (__mmask8) __U, __R);
  6309. }
  6310. extern __inline __m128d
  6311. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6312. _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  6313. __m128i __C, const int __imm, const int __R)
  6314. {
  6315. return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
  6316. (__v2df) __B,
  6317. (__v2di) __C,
  6318. __imm,
  6319. (__mmask8) __U, __R);
  6320. }
  6321. extern __inline __m128
  6322. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6323. _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
  6324. const int __imm, const int __R)
  6325. {
  6326. return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
  6327. (__v4sf) __B,
  6328. (__v4si) __C, __imm,
  6329. (__mmask8) -1, __R);
  6330. }
  6331. extern __inline __m128
  6332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6333. _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
  6334. __m128i __C, const int __imm, const int __R)
  6335. {
  6336. return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
  6337. (__v4sf) __B,
  6338. (__v4si) __C, __imm,
  6339. (__mmask8) __U, __R);
  6340. }
  6341. extern __inline __m128
  6342. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6343. _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  6344. __m128i __C, const int __imm, const int __R)
  6345. {
  6346. return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
  6347. (__v4sf) __B,
  6348. (__v4si) __C, __imm,
  6349. (__mmask8) __U, __R);
  6350. }
  6351. #else
  6352. #define _mm512_shuffle_pd(X, Y, C) \
  6353. ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
  6354. (__v8df)(__m512d)(Y), (int)(C),\
  6355. (__v8df)(__m512d)_mm512_undefined_pd(),\
  6356. (__mmask8)-1))
  6357. #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
  6358. ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
  6359. (__v8df)(__m512d)(Y), (int)(C),\
  6360. (__v8df)(__m512d)(W),\
  6361. (__mmask8)(U)))
  6362. #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
  6363. ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
  6364. (__v8df)(__m512d)(Y), (int)(C),\
  6365. (__v8df)(__m512d)_mm512_setzero_pd(),\
  6366. (__mmask8)(U)))
  6367. #define _mm512_shuffle_ps(X, Y, C) \
  6368. ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
  6369. (__v16sf)(__m512)(Y), (int)(C),\
  6370. (__v16sf)(__m512)_mm512_undefined_ps(),\
  6371. (__mmask16)-1))
  6372. #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
  6373. ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
  6374. (__v16sf)(__m512)(Y), (int)(C),\
  6375. (__v16sf)(__m512)(W),\
  6376. (__mmask16)(U)))
  6377. #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
  6378. ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
  6379. (__v16sf)(__m512)(Y), (int)(C),\
  6380. (__v16sf)(__m512)_mm512_setzero_ps(),\
  6381. (__mmask16)(U)))
  6382. #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
  6383. ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
  6384. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  6385. (__mmask8)(-1), (R)))
  6386. #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
  6387. ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
  6388. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  6389. (__mmask8)(U), (R)))
  6390. #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
  6391. ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
  6392. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  6393. (__mmask8)(U), (R)))
  6394. #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
  6395. ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
  6396. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  6397. (__mmask16)(-1), (R)))
  6398. #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
  6399. ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
  6400. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  6401. (__mmask16)(U), (R)))
  6402. #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
  6403. ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
  6404. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  6405. (__mmask16)(U), (R)))
  6406. #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
  6407. ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
  6408. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  6409. (__mmask8)(-1), (R)))
  6410. #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
  6411. ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
  6412. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  6413. (__mmask8)(U), (R)))
  6414. #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
  6415. ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
  6416. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  6417. (__mmask8)(U), (R)))
  6418. #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
  6419. ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
  6420. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  6421. (__mmask8)(-1), (R)))
  6422. #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
  6423. ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
  6424. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  6425. (__mmask8)(U), (R)))
  6426. #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
  6427. ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
  6428. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  6429. (__mmask8)(U), (R)))
  6430. #endif
  6431. extern __inline __m512
  6432. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6433. _mm512_movehdup_ps (__m512 __A)
  6434. {
  6435. return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
  6436. (__v16sf)
  6437. _mm512_undefined_ps (),
  6438. (__mmask16) -1);
  6439. }
  6440. extern __inline __m512
  6441. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6442. _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
  6443. {
  6444. return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
  6445. (__v16sf) __W,
  6446. (__mmask16) __U);
  6447. }
  6448. extern __inline __m512
  6449. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6450. _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
  6451. {
  6452. return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
  6453. (__v16sf)
  6454. _mm512_setzero_ps (),
  6455. (__mmask16) __U);
  6456. }
  6457. extern __inline __m512
  6458. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6459. _mm512_moveldup_ps (__m512 __A)
  6460. {
  6461. return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
  6462. (__v16sf)
  6463. _mm512_undefined_ps (),
  6464. (__mmask16) -1);
  6465. }
  6466. extern __inline __m512
  6467. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6468. _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
  6469. {
  6470. return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
  6471. (__v16sf) __W,
  6472. (__mmask16) __U);
  6473. }
  6474. extern __inline __m512
  6475. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6476. _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
  6477. {
  6478. return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
  6479. (__v16sf)
  6480. _mm512_setzero_ps (),
  6481. (__mmask16) __U);
  6482. }
  6483. extern __inline __m512i
  6484. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6485. _mm512_or_si512 (__m512i __A, __m512i __B)
  6486. {
  6487. return (__m512i) ((__v16su) __A | (__v16su) __B);
  6488. }
  6489. extern __inline __m512i
  6490. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6491. _mm512_or_epi32 (__m512i __A, __m512i __B)
  6492. {
  6493. return (__m512i) ((__v16su) __A | (__v16su) __B);
  6494. }
  6495. extern __inline __m512i
  6496. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6497. _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  6498. {
  6499. return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
  6500. (__v16si) __B,
  6501. (__v16si) __W,
  6502. (__mmask16) __U);
  6503. }
  6504. extern __inline __m512i
  6505. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6506. _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  6507. {
  6508. return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
  6509. (__v16si) __B,
  6510. (__v16si)
  6511. _mm512_setzero_si512 (),
  6512. (__mmask16) __U);
  6513. }
  6514. extern __inline __m512i
  6515. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6516. _mm512_or_epi64 (__m512i __A, __m512i __B)
  6517. {
  6518. return (__m512i) ((__v8du) __A | (__v8du) __B);
  6519. }
  6520. extern __inline __m512i
  6521. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6522. _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  6523. {
  6524. return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
  6525. (__v8di) __B,
  6526. (__v8di) __W,
  6527. (__mmask8) __U);
  6528. }
  6529. extern __inline __m512i
  6530. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6531. _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  6532. {
  6533. return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
  6534. (__v8di) __B,
  6535. (__v8di)
  6536. _mm512_setzero_si512 (),
  6537. (__mmask8) __U);
  6538. }
  6539. extern __inline __m512i
  6540. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6541. _mm512_xor_si512 (__m512i __A, __m512i __B)
  6542. {
  6543. return (__m512i) ((__v16su) __A ^ (__v16su) __B);
  6544. }
  6545. extern __inline __m512i
  6546. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6547. _mm512_xor_epi32 (__m512i __A, __m512i __B)
  6548. {
  6549. return (__m512i) ((__v16su) __A ^ (__v16su) __B);
  6550. }
  6551. extern __inline __m512i
  6552. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6553. _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  6554. {
  6555. return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
  6556. (__v16si) __B,
  6557. (__v16si) __W,
  6558. (__mmask16) __U);
  6559. }
  6560. extern __inline __m512i
  6561. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6562. _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  6563. {
  6564. return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
  6565. (__v16si) __B,
  6566. (__v16si)
  6567. _mm512_setzero_si512 (),
  6568. (__mmask16) __U);
  6569. }
  6570. extern __inline __m512i
  6571. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6572. _mm512_xor_epi64 (__m512i __A, __m512i __B)
  6573. {
  6574. return (__m512i) ((__v8du) __A ^ (__v8du) __B);
  6575. }
  6576. extern __inline __m512i
  6577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6578. _mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  6579. {
  6580. return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
  6581. (__v8di) __B,
  6582. (__v8di) __W,
  6583. (__mmask8) __U);
  6584. }
  6585. extern __inline __m512i
  6586. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6587. _mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  6588. {
  6589. return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
  6590. (__v8di) __B,
  6591. (__v8di)
  6592. _mm512_setzero_si512 (),
  6593. (__mmask8) __U);
  6594. }
  6595. #ifdef __OPTIMIZE__
  6596. extern __inline __m512i
  6597. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6598. _mm512_rol_epi32 (__m512i __A, const int __B)
  6599. {
  6600. return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
  6601. (__v16si)
  6602. _mm512_undefined_epi32 (),
  6603. (__mmask16) -1);
  6604. }
  6605. extern __inline __m512i
  6606. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6607. _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
  6608. {
  6609. return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
  6610. (__v16si) __W,
  6611. (__mmask16) __U);
  6612. }
  6613. extern __inline __m512i
  6614. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6615. _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
  6616. {
  6617. return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
  6618. (__v16si)
  6619. _mm512_setzero_si512 (),
  6620. (__mmask16) __U);
  6621. }
  6622. extern __inline __m512i
  6623. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6624. _mm512_ror_epi32 (__m512i __A, int __B)
  6625. {
  6626. return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
  6627. (__v16si)
  6628. _mm512_undefined_epi32 (),
  6629. (__mmask16) -1);
  6630. }
  6631. extern __inline __m512i
  6632. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6633. _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
  6634. {
  6635. return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
  6636. (__v16si) __W,
  6637. (__mmask16) __U);
  6638. }
  6639. extern __inline __m512i
  6640. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6641. _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
  6642. {
  6643. return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
  6644. (__v16si)
  6645. _mm512_setzero_si512 (),
  6646. (__mmask16) __U);
  6647. }
  6648. extern __inline __m512i
  6649. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6650. _mm512_rol_epi64 (__m512i __A, const int __B)
  6651. {
  6652. return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
  6653. (__v8di)
  6654. _mm512_undefined_epi32 (),
  6655. (__mmask8) -1);
  6656. }
  6657. extern __inline __m512i
  6658. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6659. _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
  6660. {
  6661. return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
  6662. (__v8di) __W,
  6663. (__mmask8) __U);
  6664. }
  6665. extern __inline __m512i
  6666. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6667. _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
  6668. {
  6669. return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
  6670. (__v8di)
  6671. _mm512_setzero_si512 (),
  6672. (__mmask8) __U);
  6673. }
  6674. extern __inline __m512i
  6675. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6676. _mm512_ror_epi64 (__m512i __A, int __B)
  6677. {
  6678. return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
  6679. (__v8di)
  6680. _mm512_undefined_epi32 (),
  6681. (__mmask8) -1);
  6682. }
  6683. extern __inline __m512i
  6684. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6685. _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
  6686. {
  6687. return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
  6688. (__v8di) __W,
  6689. (__mmask8) __U);
  6690. }
  6691. extern __inline __m512i
  6692. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6693. _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
  6694. {
  6695. return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
  6696. (__v8di)
  6697. _mm512_setzero_si512 (),
  6698. (__mmask8) __U);
  6699. }
  6700. #else
  6701. #define _mm512_rol_epi32(A, B) \
  6702. ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
  6703. (int)(B), \
  6704. (__v16si)_mm512_undefined_epi32 (), \
  6705. (__mmask16)(-1)))
  6706. #define _mm512_mask_rol_epi32(W, U, A, B) \
  6707. ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
  6708. (int)(B), \
  6709. (__v16si)(__m512i)(W), \
  6710. (__mmask16)(U)))
  6711. #define _mm512_maskz_rol_epi32(U, A, B) \
  6712. ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
  6713. (int)(B), \
  6714. (__v16si)_mm512_setzero_si512 (), \
  6715. (__mmask16)(U)))
  6716. #define _mm512_ror_epi32(A, B) \
  6717. ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
  6718. (int)(B), \
  6719. (__v16si)_mm512_undefined_epi32 (), \
  6720. (__mmask16)(-1)))
  6721. #define _mm512_mask_ror_epi32(W, U, A, B) \
  6722. ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
  6723. (int)(B), \
  6724. (__v16si)(__m512i)(W), \
  6725. (__mmask16)(U)))
  6726. #define _mm512_maskz_ror_epi32(U, A, B) \
  6727. ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
  6728. (int)(B), \
  6729. (__v16si)_mm512_setzero_si512 (), \
  6730. (__mmask16)(U)))
  6731. #define _mm512_rol_epi64(A, B) \
  6732. ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
  6733. (int)(B), \
  6734. (__v8di)_mm512_undefined_epi32 (), \
  6735. (__mmask8)(-1)))
  6736. #define _mm512_mask_rol_epi64(W, U, A, B) \
  6737. ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
  6738. (int)(B), \
  6739. (__v8di)(__m512i)(W), \
  6740. (__mmask8)(U)))
  6741. #define _mm512_maskz_rol_epi64(U, A, B) \
  6742. ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
  6743. (int)(B), \
  6744. (__v8di)_mm512_setzero_si512 (), \
  6745. (__mmask8)(U)))
  6746. #define _mm512_ror_epi64(A, B) \
  6747. ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
  6748. (int)(B), \
  6749. (__v8di)_mm512_undefined_epi32 (), \
  6750. (__mmask8)(-1)))
  6751. #define _mm512_mask_ror_epi64(W, U, A, B) \
  6752. ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
  6753. (int)(B), \
  6754. (__v8di)(__m512i)(W), \
  6755. (__mmask8)(U)))
  6756. #define _mm512_maskz_ror_epi64(U, A, B) \
  6757. ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
  6758. (int)(B), \
  6759. (__v8di)_mm512_setzero_si512 (), \
  6760. (__mmask8)(U)))
  6761. #endif
  6762. extern __inline __m512i
  6763. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6764. _mm512_and_si512 (__m512i __A, __m512i __B)
  6765. {
  6766. return (__m512i) ((__v16su) __A & (__v16su) __B);
  6767. }
  6768. extern __inline __m512i
  6769. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6770. _mm512_and_epi32 (__m512i __A, __m512i __B)
  6771. {
  6772. return (__m512i) ((__v16su) __A & (__v16su) __B);
  6773. }
  6774. extern __inline __m512i
  6775. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6776. _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  6777. {
  6778. return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
  6779. (__v16si) __B,
  6780. (__v16si) __W,
  6781. (__mmask16) __U);
  6782. }
  6783. extern __inline __m512i
  6784. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6785. _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  6786. {
  6787. return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
  6788. (__v16si) __B,
  6789. (__v16si)
  6790. _mm512_setzero_si512 (),
  6791. (__mmask16) __U);
  6792. }
  6793. extern __inline __m512i
  6794. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6795. _mm512_and_epi64 (__m512i __A, __m512i __B)
  6796. {
  6797. return (__m512i) ((__v8du) __A & (__v8du) __B);
  6798. }
  6799. extern __inline __m512i
  6800. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6801. _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  6802. {
  6803. return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
  6804. (__v8di) __B,
  6805. (__v8di) __W, __U);
  6806. }
  6807. extern __inline __m512i
  6808. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6809. _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  6810. {
  6811. return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
  6812. (__v8di) __B,
  6813. (__v8di)
  6814. _mm512_setzero_pd (),
  6815. __U);
  6816. }
  6817. extern __inline __m512i
  6818. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6819. _mm512_andnot_si512 (__m512i __A, __m512i __B)
  6820. {
  6821. return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
  6822. (__v16si) __B,
  6823. (__v16si)
  6824. _mm512_undefined_epi32 (),
  6825. (__mmask16) -1);
  6826. }
  6827. extern __inline __m512i
  6828. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6829. _mm512_andnot_epi32 (__m512i __A, __m512i __B)
  6830. {
  6831. return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
  6832. (__v16si) __B,
  6833. (__v16si)
  6834. _mm512_undefined_epi32 (),
  6835. (__mmask16) -1);
  6836. }
  6837. extern __inline __m512i
  6838. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6839. _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  6840. {
  6841. return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
  6842. (__v16si) __B,
  6843. (__v16si) __W,
  6844. (__mmask16) __U);
  6845. }
  6846. extern __inline __m512i
  6847. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6848. _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  6849. {
  6850. return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
  6851. (__v16si) __B,
  6852. (__v16si)
  6853. _mm512_setzero_si512 (),
  6854. (__mmask16) __U);
  6855. }
  6856. extern __inline __m512i
  6857. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6858. _mm512_andnot_epi64 (__m512i __A, __m512i __B)
  6859. {
  6860. return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
  6861. (__v8di) __B,
  6862. (__v8di)
  6863. _mm512_undefined_epi32 (),
  6864. (__mmask8) -1);
  6865. }
  6866. extern __inline __m512i
  6867. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6868. _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  6869. {
  6870. return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
  6871. (__v8di) __B,
  6872. (__v8di) __W, __U);
  6873. }
  6874. extern __inline __m512i
  6875. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6876. _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  6877. {
  6878. return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
  6879. (__v8di) __B,
  6880. (__v8di)
  6881. _mm512_setzero_pd (),
  6882. __U);
  6883. }
  6884. extern __inline __mmask16
  6885. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6886. _mm512_test_epi32_mask (__m512i __A, __m512i __B)
  6887. {
  6888. return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
  6889. (__v16si) __B,
  6890. (__mmask16) -1);
  6891. }
  6892. extern __inline __mmask16
  6893. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6894. _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  6895. {
  6896. return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
  6897. (__v16si) __B, __U);
  6898. }
  6899. extern __inline __mmask8
  6900. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6901. _mm512_test_epi64_mask (__m512i __A, __m512i __B)
  6902. {
  6903. return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
  6904. (__v8di) __B,
  6905. (__mmask8) -1);
  6906. }
  6907. extern __inline __mmask8
  6908. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6909. _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  6910. {
  6911. return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
  6912. }
  6913. extern __inline __mmask16
  6914. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6915. _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
  6916. {
  6917. return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
  6918. (__v16si) __B,
  6919. (__mmask16) -1);
  6920. }
  6921. extern __inline __mmask16
  6922. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6923. _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  6924. {
  6925. return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
  6926. (__v16si) __B, __U);
  6927. }
  6928. extern __inline __mmask8
  6929. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6930. _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
  6931. {
  6932. return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
  6933. (__v8di) __B,
  6934. (__mmask8) -1);
  6935. }
  6936. extern __inline __mmask8
  6937. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6938. _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  6939. {
  6940. return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
  6941. (__v8di) __B, __U);
  6942. }
  6943. extern __inline __m512
  6944. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6945. _mm512_abs_ps (__m512 __A)
  6946. {
  6947. return (__m512) _mm512_and_epi32 ((__m512i) __A,
  6948. _mm512_set1_epi32 (0x7fffffff));
  6949. }
  6950. extern __inline __m512
  6951. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6952. _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
  6953. {
  6954. return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
  6955. _mm512_set1_epi32 (0x7fffffff));
  6956. }
  6957. extern __inline __m512d
  6958. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6959. _mm512_abs_pd (__m512d __A)
  6960. {
  6961. return (__m512d) _mm512_and_epi64 ((__m512i) __A,
  6962. _mm512_set1_epi64 (0x7fffffffffffffffLL));
  6963. }
  6964. extern __inline __m512d
  6965. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6966. _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A)
  6967. {
  6968. return (__m512d)
  6969. _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
  6970. _mm512_set1_epi64 (0x7fffffffffffffffLL));
  6971. }
  6972. extern __inline __m512i
  6973. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6974. _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
  6975. {
  6976. return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
  6977. (__v16si) __B,
  6978. (__v16si)
  6979. _mm512_undefined_epi32 (),
  6980. (__mmask16) -1);
  6981. }
  6982. extern __inline __m512i
  6983. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6984. _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  6985. __m512i __B)
  6986. {
  6987. return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
  6988. (__v16si) __B,
  6989. (__v16si) __W,
  6990. (__mmask16) __U);
  6991. }
  6992. extern __inline __m512i
  6993. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6994. _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  6995. {
  6996. return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
  6997. (__v16si) __B,
  6998. (__v16si)
  6999. _mm512_setzero_si512 (),
  7000. (__mmask16) __U);
  7001. }
  7002. extern __inline __m512i
  7003. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7004. _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
  7005. {
  7006. return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
  7007. (__v8di) __B,
  7008. (__v8di)
  7009. _mm512_undefined_epi32 (),
  7010. (__mmask8) -1);
  7011. }
  7012. extern __inline __m512i
  7013. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7014. _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  7015. {
  7016. return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
  7017. (__v8di) __B,
  7018. (__v8di) __W,
  7019. (__mmask8) __U);
  7020. }
  7021. extern __inline __m512i
  7022. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7023. _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  7024. {
  7025. return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
  7026. (__v8di) __B,
  7027. (__v8di)
  7028. _mm512_setzero_si512 (),
  7029. (__mmask8) __U);
  7030. }
  7031. extern __inline __m512i
  7032. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7033. _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
  7034. {
  7035. return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
  7036. (__v16si) __B,
  7037. (__v16si)
  7038. _mm512_undefined_epi32 (),
  7039. (__mmask16) -1);
  7040. }
  7041. extern __inline __m512i
  7042. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7043. _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  7044. __m512i __B)
  7045. {
  7046. return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
  7047. (__v16si) __B,
  7048. (__v16si) __W,
  7049. (__mmask16) __U);
  7050. }
  7051. extern __inline __m512i
  7052. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7053. _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  7054. {
  7055. return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
  7056. (__v16si) __B,
  7057. (__v16si)
  7058. _mm512_setzero_si512 (),
  7059. (__mmask16) __U);
  7060. }
  7061. extern __inline __m512i
  7062. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7063. _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
  7064. {
  7065. return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
  7066. (__v8di) __B,
  7067. (__v8di)
  7068. _mm512_undefined_epi32 (),
  7069. (__mmask8) -1);
  7070. }
  7071. extern __inline __m512i
  7072. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7073. _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  7074. {
  7075. return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
  7076. (__v8di) __B,
  7077. (__v8di) __W,
  7078. (__mmask8) __U);
  7079. }
  7080. extern __inline __m512i
  7081. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7082. _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  7083. {
  7084. return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
  7085. (__v8di) __B,
  7086. (__v8di)
  7087. _mm512_setzero_si512 (),
  7088. (__mmask8) __U);
  7089. }
  7090. #ifdef __x86_64__
  7091. #ifdef __OPTIMIZE__
  7092. extern __inline unsigned long long
  7093. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7094. _mm_cvt_roundss_u64 (__m128 __A, const int __R)
  7095. {
  7096. return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
  7097. }
  7098. extern __inline long long
  7099. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7100. _mm_cvt_roundss_si64 (__m128 __A, const int __R)
  7101. {
  7102. return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
  7103. }
  7104. extern __inline long long
  7105. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7106. _mm_cvt_roundss_i64 (__m128 __A, const int __R)
  7107. {
  7108. return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
  7109. }
  7110. extern __inline unsigned long long
  7111. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7112. _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
  7113. {
  7114. return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
  7115. }
  7116. extern __inline long long
  7117. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7118. _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
  7119. {
  7120. return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
  7121. }
  7122. extern __inline long long
  7123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7124. _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
  7125. {
  7126. return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
  7127. }
  7128. #else
  7129. #define _mm_cvt_roundss_u64(A, B) \
  7130. ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
  7131. #define _mm_cvt_roundss_si64(A, B) \
  7132. ((long long)__builtin_ia32_vcvtss2si64(A, B))
  7133. #define _mm_cvt_roundss_i64(A, B) \
  7134. ((long long)__builtin_ia32_vcvtss2si64(A, B))
  7135. #define _mm_cvtt_roundss_u64(A, B) \
  7136. ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
  7137. #define _mm_cvtt_roundss_i64(A, B) \
  7138. ((long long)__builtin_ia32_vcvttss2si64(A, B))
  7139. #define _mm_cvtt_roundss_si64(A, B) \
  7140. ((long long)__builtin_ia32_vcvttss2si64(A, B))
  7141. #endif
  7142. #endif
  7143. #ifdef __OPTIMIZE__
  7144. extern __inline unsigned
  7145. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7146. _mm_cvt_roundss_u32 (__m128 __A, const int __R)
  7147. {
  7148. return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
  7149. }
  7150. extern __inline int
  7151. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7152. _mm_cvt_roundss_si32 (__m128 __A, const int __R)
  7153. {
  7154. return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
  7155. }
  7156. extern __inline int
  7157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7158. _mm_cvt_roundss_i32 (__m128 __A, const int __R)
  7159. {
  7160. return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
  7161. }
  7162. extern __inline unsigned
  7163. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7164. _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
  7165. {
  7166. return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
  7167. }
  7168. extern __inline int
  7169. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7170. _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
  7171. {
  7172. return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
  7173. }
  7174. extern __inline int
  7175. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7176. _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
  7177. {
  7178. return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
  7179. }
  7180. #else
  7181. #define _mm_cvt_roundss_u32(A, B) \
  7182. ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
  7183. #define _mm_cvt_roundss_si32(A, B) \
  7184. ((int)__builtin_ia32_vcvtss2si32(A, B))
  7185. #define _mm_cvt_roundss_i32(A, B) \
  7186. ((int)__builtin_ia32_vcvtss2si32(A, B))
  7187. #define _mm_cvtt_roundss_u32(A, B) \
  7188. ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
  7189. #define _mm_cvtt_roundss_si32(A, B) \
  7190. ((int)__builtin_ia32_vcvttss2si32(A, B))
  7191. #define _mm_cvtt_roundss_i32(A, B) \
  7192. ((int)__builtin_ia32_vcvttss2si32(A, B))
  7193. #endif
  7194. #ifdef __x86_64__
  7195. #ifdef __OPTIMIZE__
  7196. extern __inline unsigned long long
  7197. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7198. _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
  7199. {
  7200. return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
  7201. }
  7202. extern __inline long long
  7203. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7204. _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
  7205. {
  7206. return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
  7207. }
  7208. extern __inline long long
  7209. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7210. _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
  7211. {
  7212. return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
  7213. }
  7214. extern __inline unsigned long long
  7215. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7216. _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
  7217. {
  7218. return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
  7219. }
  7220. extern __inline long long
  7221. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7222. _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
  7223. {
  7224. return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
  7225. }
  7226. extern __inline long long
  7227. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7228. _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
  7229. {
  7230. return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
  7231. }
  7232. #else
  7233. #define _mm_cvt_roundsd_u64(A, B) \
  7234. ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
  7235. #define _mm_cvt_roundsd_si64(A, B) \
  7236. ((long long)__builtin_ia32_vcvtsd2si64(A, B))
  7237. #define _mm_cvt_roundsd_i64(A, B) \
  7238. ((long long)__builtin_ia32_vcvtsd2si64(A, B))
  7239. #define _mm_cvtt_roundsd_u64(A, B) \
  7240. ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
  7241. #define _mm_cvtt_roundsd_si64(A, B) \
  7242. ((long long)__builtin_ia32_vcvttsd2si64(A, B))
  7243. #define _mm_cvtt_roundsd_i64(A, B) \
  7244. ((long long)__builtin_ia32_vcvttsd2si64(A, B))
  7245. #endif
  7246. #endif
  7247. #ifdef __OPTIMIZE__
  7248. extern __inline unsigned
  7249. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7250. _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
  7251. {
  7252. return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
  7253. }
  7254. extern __inline int
  7255. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7256. _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
  7257. {
  7258. return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
  7259. }
  7260. extern __inline int
  7261. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7262. _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
  7263. {
  7264. return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
  7265. }
  7266. extern __inline unsigned
  7267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7268. _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
  7269. {
  7270. return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
  7271. }
  7272. extern __inline int
  7273. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7274. _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
  7275. {
  7276. return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
  7277. }
  7278. extern __inline int
  7279. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7280. _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
  7281. {
  7282. return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
  7283. }
  7284. #else
  7285. #define _mm_cvt_roundsd_u32(A, B) \
  7286. ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
  7287. #define _mm_cvt_roundsd_si32(A, B) \
  7288. ((int)__builtin_ia32_vcvtsd2si32(A, B))
  7289. #define _mm_cvt_roundsd_i32(A, B) \
  7290. ((int)__builtin_ia32_vcvtsd2si32(A, B))
  7291. #define _mm_cvtt_roundsd_u32(A, B) \
  7292. ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
  7293. #define _mm_cvtt_roundsd_si32(A, B) \
  7294. ((int)__builtin_ia32_vcvttsd2si32(A, B))
  7295. #define _mm_cvtt_roundsd_i32(A, B) \
  7296. ((int)__builtin_ia32_vcvttsd2si32(A, B))
  7297. #endif
  7298. extern __inline __m512d
  7299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7300. _mm512_movedup_pd (__m512d __A)
  7301. {
  7302. return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
  7303. (__v8df)
  7304. _mm512_undefined_pd (),
  7305. (__mmask8) -1);
  7306. }
  7307. extern __inline __m512d
  7308. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7309. _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
  7310. {
  7311. return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
  7312. (__v8df) __W,
  7313. (__mmask8) __U);
  7314. }
  7315. extern __inline __m512d
  7316. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7317. _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
  7318. {
  7319. return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
  7320. (__v8df)
  7321. _mm512_setzero_pd (),
  7322. (__mmask8) __U);
  7323. }
  7324. extern __inline __m512d
  7325. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7326. _mm512_unpacklo_pd (__m512d __A, __m512d __B)
  7327. {
  7328. return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
  7329. (__v8df) __B,
  7330. (__v8df)
  7331. _mm512_undefined_pd (),
  7332. (__mmask8) -1);
  7333. }
  7334. extern __inline __m512d
  7335. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7336. _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  7337. {
  7338. return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
  7339. (__v8df) __B,
  7340. (__v8df) __W,
  7341. (__mmask8) __U);
  7342. }
  7343. extern __inline __m512d
  7344. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7345. _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
  7346. {
  7347. return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
  7348. (__v8df) __B,
  7349. (__v8df)
  7350. _mm512_setzero_pd (),
  7351. (__mmask8) __U);
  7352. }
  7353. extern __inline __m512d
  7354. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7355. _mm512_unpackhi_pd (__m512d __A, __m512d __B)
  7356. {
  7357. return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
  7358. (__v8df) __B,
  7359. (__v8df)
  7360. _mm512_undefined_pd (),
  7361. (__mmask8) -1);
  7362. }
  7363. extern __inline __m512d
  7364. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7365. _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  7366. {
  7367. return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
  7368. (__v8df) __B,
  7369. (__v8df) __W,
  7370. (__mmask8) __U);
  7371. }
  7372. extern __inline __m512d
  7373. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7374. _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
  7375. {
  7376. return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
  7377. (__v8df) __B,
  7378. (__v8df)
  7379. _mm512_setzero_pd (),
  7380. (__mmask8) __U);
  7381. }
  7382. extern __inline __m512
  7383. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7384. _mm512_unpackhi_ps (__m512 __A, __m512 __B)
  7385. {
  7386. return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
  7387. (__v16sf) __B,
  7388. (__v16sf)
  7389. _mm512_undefined_ps (),
  7390. (__mmask16) -1);
  7391. }
  7392. extern __inline __m512
  7393. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7394. _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  7395. {
  7396. return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
  7397. (__v16sf) __B,
  7398. (__v16sf) __W,
  7399. (__mmask16) __U);
  7400. }
  7401. extern __inline __m512
  7402. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7403. _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
  7404. {
  7405. return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
  7406. (__v16sf) __B,
  7407. (__v16sf)
  7408. _mm512_setzero_ps (),
  7409. (__mmask16) __U);
  7410. }
  7411. #ifdef __OPTIMIZE__
  7412. extern __inline __m512d
  7413. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7414. _mm512_cvt_roundps_pd (__m256 __A, const int __R)
  7415. {
  7416. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  7417. (__v8df)
  7418. _mm512_undefined_pd (),
  7419. (__mmask8) -1, __R);
  7420. }
  7421. extern __inline __m512d
  7422. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7423. _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
  7424. const int __R)
  7425. {
  7426. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  7427. (__v8df) __W,
  7428. (__mmask8) __U, __R);
  7429. }
  7430. extern __inline __m512d
  7431. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7432. _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
  7433. {
  7434. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  7435. (__v8df)
  7436. _mm512_setzero_pd (),
  7437. (__mmask8) __U, __R);
  7438. }
  7439. extern __inline __m512
  7440. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7441. _mm512_cvt_roundph_ps (__m256i __A, const int __R)
  7442. {
  7443. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  7444. (__v16sf)
  7445. _mm512_undefined_ps (),
  7446. (__mmask16) -1, __R);
  7447. }
  7448. extern __inline __m512
  7449. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7450. _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
  7451. const int __R)
  7452. {
  7453. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  7454. (__v16sf) __W,
  7455. (__mmask16) __U, __R);
  7456. }
  7457. extern __inline __m512
  7458. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7459. _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
  7460. {
  7461. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  7462. (__v16sf)
  7463. _mm512_setzero_ps (),
  7464. (__mmask16) __U, __R);
  7465. }
  7466. extern __inline __m256i
  7467. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7468. _mm512_cvt_roundps_ph (__m512 __A, const int __I)
  7469. {
  7470. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7471. __I,
  7472. (__v16hi)
  7473. _mm256_undefined_si256 (),
  7474. -1);
  7475. }
  7476. extern __inline __m256i
  7477. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7478. _mm512_cvtps_ph (__m512 __A, const int __I)
  7479. {
  7480. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7481. __I,
  7482. (__v16hi)
  7483. _mm256_undefined_si256 (),
  7484. -1);
  7485. }
  7486. extern __inline __m256i
  7487. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7488. _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
  7489. const int __I)
  7490. {
  7491. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7492. __I,
  7493. (__v16hi) __U,
  7494. (__mmask16) __W);
  7495. }
  7496. extern __inline __m256i
  7497. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7498. _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
  7499. {
  7500. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7501. __I,
  7502. (__v16hi) __U,
  7503. (__mmask16) __W);
  7504. }
  7505. extern __inline __m256i
  7506. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7507. _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
  7508. {
  7509. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7510. __I,
  7511. (__v16hi)
  7512. _mm256_setzero_si256 (),
  7513. (__mmask16) __W);
  7514. }
  7515. extern __inline __m256i
  7516. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7517. _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
  7518. {
  7519. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7520. __I,
  7521. (__v16hi)
  7522. _mm256_setzero_si256 (),
  7523. (__mmask16) __W);
  7524. }
  7525. #else
  7526. #define _mm512_cvt_roundps_pd(A, B) \
  7527. (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
  7528. #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
  7529. (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
  7530. #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
  7531. (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
  7532. #define _mm512_cvt_roundph_ps(A, B) \
  7533. (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
  7534. #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
  7535. (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
  7536. #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
  7537. (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
  7538. #define _mm512_cvt_roundps_ph(A, I) \
  7539. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
  7540. (__v16hi)_mm256_undefined_si256 (), -1))
  7541. #define _mm512_cvtps_ph(A, I) \
  7542. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
  7543. (__v16hi)_mm256_undefined_si256 (), -1))
  7544. #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
  7545. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
  7546. (__v16hi)(__m256i)(U), (__mmask16) (W)))
  7547. #define _mm512_mask_cvtps_ph(U, W, A, I) \
  7548. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
  7549. (__v16hi)(__m256i)(U), (__mmask16) (W)))
  7550. #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
  7551. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
  7552. (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
  7553. #define _mm512_maskz_cvtps_ph(W, A, I) \
  7554. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
  7555. (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
  7556. #endif
  7557. #ifdef __OPTIMIZE__
  7558. extern __inline __m256
  7559. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7560. _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
  7561. {
  7562. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  7563. (__v8sf)
  7564. _mm256_undefined_ps (),
  7565. (__mmask8) -1, __R);
  7566. }
  7567. extern __inline __m256
  7568. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7569. _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
  7570. const int __R)
  7571. {
  7572. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  7573. (__v8sf) __W,
  7574. (__mmask8) __U, __R);
  7575. }
  7576. extern __inline __m256
  7577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7578. _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
  7579. {
  7580. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  7581. (__v8sf)
  7582. _mm256_setzero_ps (),
  7583. (__mmask8) __U, __R);
  7584. }
  7585. extern __inline __m128
  7586. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7587. _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
  7588. {
  7589. return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
  7590. (__v2df) __B,
  7591. __R);
  7592. }
  7593. extern __inline __m128d
  7594. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7595. _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
  7596. {
  7597. return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
  7598. (__v4sf) __B,
  7599. __R);
  7600. }
  7601. #else
  7602. #define _mm512_cvt_roundpd_ps(A, B) \
  7603. (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
  7604. #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
  7605. (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
  7606. #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
  7607. (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
  7608. #define _mm_cvt_roundsd_ss(A, B, C) \
  7609. (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
  7610. #define _mm_cvt_roundss_sd(A, B, C) \
  7611. (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
  7612. #endif
  7613. extern __inline void
  7614. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7615. _mm512_stream_si512 (__m512i * __P, __m512i __A)
  7616. {
  7617. __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
  7618. }
  7619. extern __inline void
  7620. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7621. _mm512_stream_ps (float *__P, __m512 __A)
  7622. {
  7623. __builtin_ia32_movntps512 (__P, (__v16sf) __A);
  7624. }
  7625. extern __inline void
  7626. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7627. _mm512_stream_pd (double *__P, __m512d __A)
  7628. {
  7629. __builtin_ia32_movntpd512 (__P, (__v8df) __A);
  7630. }
  7631. extern __inline __m512i
  7632. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7633. _mm512_stream_load_si512 (void *__P)
  7634. {
  7635. return __builtin_ia32_movntdqa512 ((__v8di *)__P);
  7636. }
  7637. /* Constants for mantissa extraction */
  7638. typedef enum
  7639. {
  7640. _MM_MANT_NORM_1_2, /* interval [1, 2) */
  7641. _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
  7642. _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
  7643. _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
  7644. } _MM_MANTISSA_NORM_ENUM;
  7645. typedef enum
  7646. {
  7647. _MM_MANT_SIGN_src, /* sign = sign(SRC) */
  7648. _MM_MANT_SIGN_zero, /* sign = 0 */
  7649. _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
  7650. } _MM_MANTISSA_SIGN_ENUM;
  7651. #ifdef __OPTIMIZE__
  7652. extern __inline __m128
  7653. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7654. _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
  7655. {
  7656. return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
  7657. (__v4sf) __B,
  7658. __R);
  7659. }
  7660. extern __inline __m128
  7661. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7662. _mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  7663. __m128 __B, const int __R)
  7664. {
  7665. return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
  7666. (__v4sf) __B,
  7667. (__v4sf) __W,
  7668. (__mmask8) __U, __R);
  7669. }
  7670. extern __inline __m128
  7671. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7672. _mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  7673. const int __R)
  7674. {
  7675. return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
  7676. (__v4sf) __B,
  7677. (__v4sf)
  7678. _mm_setzero_ps (),
  7679. (__mmask8) __U, __R);
  7680. }
  7681. extern __inline __m128d
  7682. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7683. _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
  7684. {
  7685. return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
  7686. (__v2df) __B,
  7687. __R);
  7688. }
  7689. extern __inline __m128d
  7690. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7691. _mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  7692. __m128d __B, const int __R)
  7693. {
  7694. return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
  7695. (__v2df) __B,
  7696. (__v2df) __W,
  7697. (__mmask8) __U, __R);
  7698. }
  7699. extern __inline __m128d
  7700. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7701. _mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  7702. const int __R)
  7703. {
  7704. return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
  7705. (__v2df) __B,
  7706. (__v2df)
  7707. _mm_setzero_pd (),
  7708. (__mmask8) __U, __R);
  7709. }
  7710. extern __inline __m512
  7711. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7712. _mm512_getexp_round_ps (__m512 __A, const int __R)
  7713. {
  7714. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  7715. (__v16sf)
  7716. _mm512_undefined_ps (),
  7717. (__mmask16) -1, __R);
  7718. }
  7719. extern __inline __m512
  7720. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7721. _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  7722. const int __R)
  7723. {
  7724. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  7725. (__v16sf) __W,
  7726. (__mmask16) __U, __R);
  7727. }
  7728. extern __inline __m512
  7729. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7730. _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
  7731. {
  7732. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  7733. (__v16sf)
  7734. _mm512_setzero_ps (),
  7735. (__mmask16) __U, __R);
  7736. }
  7737. extern __inline __m512d
  7738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7739. _mm512_getexp_round_pd (__m512d __A, const int __R)
  7740. {
  7741. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  7742. (__v8df)
  7743. _mm512_undefined_pd (),
  7744. (__mmask8) -1, __R);
  7745. }
  7746. extern __inline __m512d
  7747. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7748. _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  7749. const int __R)
  7750. {
  7751. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  7752. (__v8df) __W,
  7753. (__mmask8) __U, __R);
  7754. }
  7755. extern __inline __m512d
  7756. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7757. _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
  7758. {
  7759. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  7760. (__v8df)
  7761. _mm512_setzero_pd (),
  7762. (__mmask8) __U, __R);
  7763. }
  7764. extern __inline __m512d
  7765. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7766. _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
  7767. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7768. {
  7769. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  7770. (__C << 2) | __B,
  7771. _mm512_undefined_pd (),
  7772. (__mmask8) -1, __R);
  7773. }
  7774. extern __inline __m512d
  7775. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7776. _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  7777. _MM_MANTISSA_NORM_ENUM __B,
  7778. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7779. {
  7780. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  7781. (__C << 2) | __B,
  7782. (__v8df) __W, __U,
  7783. __R);
  7784. }
  7785. extern __inline __m512d
  7786. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7787. _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
  7788. _MM_MANTISSA_NORM_ENUM __B,
  7789. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7790. {
  7791. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  7792. (__C << 2) | __B,
  7793. (__v8df)
  7794. _mm512_setzero_pd (),
  7795. __U, __R);
  7796. }
  7797. extern __inline __m512
  7798. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7799. _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
  7800. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7801. {
  7802. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  7803. (__C << 2) | __B,
  7804. _mm512_undefined_ps (),
  7805. (__mmask16) -1, __R);
  7806. }
  7807. extern __inline __m512
  7808. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7809. _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  7810. _MM_MANTISSA_NORM_ENUM __B,
  7811. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7812. {
  7813. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  7814. (__C << 2) | __B,
  7815. (__v16sf) __W, __U,
  7816. __R);
  7817. }
  7818. extern __inline __m512
  7819. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7820. _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
  7821. _MM_MANTISSA_NORM_ENUM __B,
  7822. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7823. {
  7824. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  7825. (__C << 2) | __B,
  7826. (__v16sf)
  7827. _mm512_setzero_ps (),
  7828. __U, __R);
  7829. }
  7830. extern __inline __m128d
  7831. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7832. _mm_getmant_round_sd (__m128d __A, __m128d __B,
  7833. _MM_MANTISSA_NORM_ENUM __C,
  7834. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  7835. {
  7836. return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
  7837. (__v2df) __B,
  7838. (__D << 2) | __C,
  7839. __R);
  7840. }
  7841. extern __inline __m128d
  7842. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7843. _mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  7844. __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
  7845. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  7846. {
  7847. return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
  7848. (__v2df) __B,
  7849. (__D << 2) | __C,
  7850. (__v2df) __W,
  7851. __U, __R);
  7852. }
  7853. extern __inline __m128d
  7854. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7855. _mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  7856. _MM_MANTISSA_NORM_ENUM __C,
  7857. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  7858. {
  7859. return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
  7860. (__v2df) __B,
  7861. (__D << 2) | __C,
  7862. (__v2df)
  7863. _mm_setzero_pd(),
  7864. __U, __R);
  7865. }
  7866. extern __inline __m128
  7867. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7868. _mm_getmant_round_ss (__m128 __A, __m128 __B,
  7869. _MM_MANTISSA_NORM_ENUM __C,
  7870. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  7871. {
  7872. return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
  7873. (__v4sf) __B,
  7874. (__D << 2) | __C,
  7875. __R);
  7876. }
  7877. extern __inline __m128
  7878. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7879. _mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  7880. __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
  7881. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  7882. {
  7883. return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
  7884. (__v4sf) __B,
  7885. (__D << 2) | __C,
  7886. (__v4sf) __W,
  7887. __U, __R);
  7888. }
  7889. extern __inline __m128
  7890. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7891. _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  7892. _MM_MANTISSA_NORM_ENUM __C,
  7893. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  7894. {
  7895. return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
  7896. (__v4sf) __B,
  7897. (__D << 2) | __C,
  7898. (__v4sf)
  7899. _mm_setzero_ps(),
  7900. __U, __R);
  7901. }
  7902. #else
  7903. #define _mm512_getmant_round_pd(X, B, C, R) \
  7904. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  7905. (int)(((C)<<2) | (B)), \
  7906. (__v8df)(__m512d)_mm512_undefined_pd(), \
  7907. (__mmask8)-1,\
  7908. (R)))
  7909. #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
  7910. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  7911. (int)(((C)<<2) | (B)), \
  7912. (__v8df)(__m512d)(W), \
  7913. (__mmask8)(U),\
  7914. (R)))
  7915. #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
  7916. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  7917. (int)(((C)<<2) | (B)), \
  7918. (__v8df)(__m512d)_mm512_setzero_pd(), \
  7919. (__mmask8)(U),\
  7920. (R)))
  7921. #define _mm512_getmant_round_ps(X, B, C, R) \
  7922. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  7923. (int)(((C)<<2) | (B)), \
  7924. (__v16sf)(__m512)_mm512_undefined_ps(), \
  7925. (__mmask16)-1,\
  7926. (R)))
  7927. #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
  7928. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  7929. (int)(((C)<<2) | (B)), \
  7930. (__v16sf)(__m512)(W), \
  7931. (__mmask16)(U),\
  7932. (R)))
  7933. #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
  7934. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  7935. (int)(((C)<<2) | (B)), \
  7936. (__v16sf)(__m512)_mm512_setzero_ps(), \
  7937. (__mmask16)(U),\
  7938. (R)))
  7939. #define _mm_getmant_round_sd(X, Y, C, D, R) \
  7940. ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
  7941. (__v2df)(__m128d)(Y), \
  7942. (int)(((D)<<2) | (C)), \
  7943. (R)))
  7944. #define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
  7945. ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
  7946. (__v2df)(__m128d)(Y), \
  7947. (int)(((D)<<2) | (C)), \
  7948. (__v2df)(__m128d)(W), \
  7949. (__mmask8)(U),\
  7950. (R)))
  7951. #define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
  7952. ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
  7953. (__v2df)(__m128d)(Y), \
  7954. (int)(((D)<<2) | (C)), \
  7955. (__v2df)(__m128d)_mm_setzero_pd(), \
  7956. (__mmask8)(U),\
  7957. (R)))
  7958. #define _mm_getmant_round_ss(X, Y, C, D, R) \
  7959. ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
  7960. (__v4sf)(__m128)(Y), \
  7961. (int)(((D)<<2) | (C)), \
  7962. (R)))
  7963. #define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
  7964. ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
  7965. (__v4sf)(__m128)(Y), \
  7966. (int)(((D)<<2) | (C)), \
  7967. (__v4sf)(__m128)(W), \
  7968. (__mmask8)(U),\
  7969. (R)))
  7970. #define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
  7971. ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
  7972. (__v4sf)(__m128)(Y), \
  7973. (int)(((D)<<2) | (C)), \
  7974. (__v4sf)(__m128)_mm_setzero_ps(), \
  7975. (__mmask8)(U),\
  7976. (R)))
  7977. #define _mm_getexp_round_ss(A, B, R) \
  7978. ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
  7979. #define _mm_mask_getexp_round_ss(W, U, A, B, C) \
  7980. (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
  7981. #define _mm_maskz_getexp_round_ss(U, A, B, C) \
  7982. (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  7983. #define _mm_getexp_round_sd(A, B, R) \
  7984. ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
  7985. #define _mm_mask_getexp_round_sd(W, U, A, B, C) \
  7986. (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
  7987. #define _mm_maskz_getexp_round_sd(U, A, B, C) \
  7988. (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  7989. #define _mm512_getexp_round_ps(A, R) \
  7990. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  7991. (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
  7992. #define _mm512_mask_getexp_round_ps(W, U, A, R) \
  7993. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  7994. (__v16sf)(__m512)(W), (__mmask16)(U), R))
  7995. #define _mm512_maskz_getexp_round_ps(U, A, R) \
  7996. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  7997. (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
  7998. #define _mm512_getexp_round_pd(A, R) \
  7999. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  8000. (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
  8001. #define _mm512_mask_getexp_round_pd(W, U, A, R) \
  8002. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  8003. (__v8df)(__m512d)(W), (__mmask8)(U), R))
  8004. #define _mm512_maskz_getexp_round_pd(U, A, R) \
  8005. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  8006. (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
  8007. #endif
  8008. #ifdef __OPTIMIZE__
  8009. extern __inline __m512
  8010. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8011. _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
  8012. {
  8013. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
  8014. (__v16sf)
  8015. _mm512_undefined_ps (),
  8016. -1, __R);
  8017. }
  8018. extern __inline __m512
  8019. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8020. _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
  8021. const int __imm, const int __R)
  8022. {
  8023. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
  8024. (__v16sf) __A,
  8025. (__mmask16) __B, __R);
  8026. }
  8027. extern __inline __m512
  8028. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8029. _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
  8030. const int __imm, const int __R)
  8031. {
  8032. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
  8033. __imm,
  8034. (__v16sf)
  8035. _mm512_setzero_ps (),
  8036. (__mmask16) __A, __R);
  8037. }
  8038. extern __inline __m512d
  8039. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8040. _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
  8041. {
  8042. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
  8043. (__v8df)
  8044. _mm512_undefined_pd (),
  8045. -1, __R);
  8046. }
  8047. extern __inline __m512d
  8048. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8049. _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
  8050. __m512d __C, const int __imm, const int __R)
  8051. {
  8052. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
  8053. (__v8df) __A,
  8054. (__mmask8) __B, __R);
  8055. }
  8056. extern __inline __m512d
  8057. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8058. _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
  8059. const int __imm, const int __R)
  8060. {
  8061. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
  8062. __imm,
  8063. (__v8df)
  8064. _mm512_setzero_pd (),
  8065. (__mmask8) __A, __R);
  8066. }
  8067. extern __inline __m128
  8068. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8069. _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
  8070. {
  8071. return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
  8072. (__v4sf) __B, __imm, __R);
  8073. }
  8074. extern __inline __m128d
  8075. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8076. _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
  8077. const int __R)
  8078. {
  8079. return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
  8080. (__v2df) __B, __imm, __R);
  8081. }
  8082. #else
  8083. #define _mm512_roundscale_round_ps(A, B, R) \
  8084. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
  8085. (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
  8086. #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
  8087. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
  8088. (int)(D), \
  8089. (__v16sf)(__m512)(A), \
  8090. (__mmask16)(B), R))
  8091. #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
  8092. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
  8093. (int)(C), \
  8094. (__v16sf)_mm512_setzero_ps(),\
  8095. (__mmask16)(A), R))
  8096. #define _mm512_roundscale_round_pd(A, B, R) \
  8097. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
  8098. (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
  8099. #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
  8100. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
  8101. (int)(D), \
  8102. (__v8df)(__m512d)(A), \
  8103. (__mmask8)(B), R))
  8104. #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
  8105. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
  8106. (int)(C), \
  8107. (__v8df)_mm512_setzero_pd(),\
  8108. (__mmask8)(A), R))
  8109. #define _mm_roundscale_round_ss(A, B, C, R) \
  8110. ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
  8111. (__v4sf)(__m128)(B), (int)(C), R))
  8112. #define _mm_roundscale_round_sd(A, B, C, R) \
  8113. ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
  8114. (__v2df)(__m128d)(B), (int)(C), R))
  8115. #endif
  8116. extern __inline __m512
  8117. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8118. _mm512_floor_ps (__m512 __A)
  8119. {
  8120. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
  8121. _MM_FROUND_FLOOR,
  8122. (__v16sf) __A, -1,
  8123. _MM_FROUND_CUR_DIRECTION);
  8124. }
  8125. extern __inline __m512d
  8126. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8127. _mm512_floor_pd (__m512d __A)
  8128. {
  8129. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
  8130. _MM_FROUND_FLOOR,
  8131. (__v8df) __A, -1,
  8132. _MM_FROUND_CUR_DIRECTION);
  8133. }
  8134. extern __inline __m512
  8135. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8136. _mm512_ceil_ps (__m512 __A)
  8137. {
  8138. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
  8139. _MM_FROUND_CEIL,
  8140. (__v16sf) __A, -1,
  8141. _MM_FROUND_CUR_DIRECTION);
  8142. }
  8143. extern __inline __m512d
  8144. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8145. _mm512_ceil_pd (__m512d __A)
  8146. {
  8147. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
  8148. _MM_FROUND_CEIL,
  8149. (__v8df) __A, -1,
  8150. _MM_FROUND_CUR_DIRECTION);
  8151. }
  8152. extern __inline __m512
  8153. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8154. _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
  8155. {
  8156. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
  8157. _MM_FROUND_FLOOR,
  8158. (__v16sf) __W, __U,
  8159. _MM_FROUND_CUR_DIRECTION);
  8160. }
  8161. extern __inline __m512d
  8162. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8163. _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
  8164. {
  8165. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
  8166. _MM_FROUND_FLOOR,
  8167. (__v8df) __W, __U,
  8168. _MM_FROUND_CUR_DIRECTION);
  8169. }
  8170. extern __inline __m512
  8171. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8172. _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
  8173. {
  8174. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
  8175. _MM_FROUND_CEIL,
  8176. (__v16sf) __W, __U,
  8177. _MM_FROUND_CUR_DIRECTION);
  8178. }
  8179. extern __inline __m512d
  8180. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8181. _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
  8182. {
  8183. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
  8184. _MM_FROUND_CEIL,
  8185. (__v8df) __W, __U,
  8186. _MM_FROUND_CUR_DIRECTION);
  8187. }
  8188. #ifdef __OPTIMIZE__
  8189. extern __inline __m512i
  8190. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8191. _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
  8192. {
  8193. return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
  8194. (__v16si) __B, __imm,
  8195. (__v16si)
  8196. _mm512_undefined_epi32 (),
  8197. (__mmask16) -1);
  8198. }
  8199. extern __inline __m512i
  8200. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8201. _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  8202. __m512i __B, const int __imm)
  8203. {
  8204. return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
  8205. (__v16si) __B, __imm,
  8206. (__v16si) __W,
  8207. (__mmask16) __U);
  8208. }
  8209. extern __inline __m512i
  8210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8211. _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
  8212. const int __imm)
  8213. {
  8214. return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
  8215. (__v16si) __B, __imm,
  8216. (__v16si)
  8217. _mm512_setzero_si512 (),
  8218. (__mmask16) __U);
  8219. }
  8220. extern __inline __m512i
  8221. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8222. _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
  8223. {
  8224. return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
  8225. (__v8di) __B, __imm,
  8226. (__v8di)
  8227. _mm512_undefined_epi32 (),
  8228. (__mmask8) -1);
  8229. }
  8230. extern __inline __m512i
  8231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8232. _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
  8233. __m512i __B, const int __imm)
  8234. {
  8235. return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
  8236. (__v8di) __B, __imm,
  8237. (__v8di) __W,
  8238. (__mmask8) __U);
  8239. }
  8240. extern __inline __m512i
  8241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8242. _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
  8243. const int __imm)
  8244. {
  8245. return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
  8246. (__v8di) __B, __imm,
  8247. (__v8di)
  8248. _mm512_setzero_si512 (),
  8249. (__mmask8) __U);
  8250. }
  8251. #else
  8252. #define _mm512_alignr_epi32(X, Y, C) \
  8253. ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
  8254. (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
  8255. (__mmask16)-1))
  8256. #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
  8257. ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
  8258. (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
  8259. (__mmask16)(U)))
  8260. #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
  8261. ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
  8262. (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
  8263. (__mmask16)(U)))
  8264. #define _mm512_alignr_epi64(X, Y, C) \
  8265. ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
  8266. (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
  8267. (__mmask8)-1))
  8268. #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
  8269. ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
  8270. (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
  8271. #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
  8272. ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
  8273. (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
  8274. (__mmask8)(U)))
  8275. #endif
  8276. extern __inline __mmask16
  8277. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8278. _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
  8279. {
  8280. return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
  8281. (__v16si) __B,
  8282. (__mmask16) -1);
  8283. }
  8284. extern __inline __mmask16
  8285. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8286. _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  8287. {
  8288. return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
  8289. (__v16si) __B, __U);
  8290. }
  8291. extern __inline __mmask8
  8292. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8293. _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  8294. {
  8295. return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
  8296. (__v8di) __B, __U);
  8297. }
  8298. extern __inline __mmask8
  8299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8300. _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
  8301. {
  8302. return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
  8303. (__v8di) __B,
  8304. (__mmask8) -1);
  8305. }
  8306. extern __inline __mmask16
  8307. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8308. _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
  8309. {
  8310. return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
  8311. (__v16si) __B,
  8312. (__mmask16) -1);
  8313. }
  8314. extern __inline __mmask16
  8315. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8316. _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  8317. {
  8318. return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
  8319. (__v16si) __B, __U);
  8320. }
  8321. extern __inline __mmask8
  8322. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8323. _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  8324. {
  8325. return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
  8326. (__v8di) __B, __U);
  8327. }
  8328. extern __inline __mmask8
  8329. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8330. _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
  8331. {
  8332. return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
  8333. (__v8di) __B,
  8334. (__mmask8) -1);
  8335. }
  8336. extern __inline __mmask16
  8337. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8338. _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
  8339. {
  8340. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8341. (__v16si) __Y, 5,
  8342. (__mmask16) -1);
  8343. }
  8344. extern __inline __mmask16
  8345. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8346. _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8347. {
  8348. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8349. (__v16si) __Y, 5,
  8350. (__mmask16) __M);
  8351. }
  8352. extern __inline __mmask16
  8353. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8354. _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8355. {
  8356. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8357. (__v16si) __Y, 5,
  8358. (__mmask16) __M);
  8359. }
  8360. extern __inline __mmask16
  8361. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8362. _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
  8363. {
  8364. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8365. (__v16si) __Y, 5,
  8366. (__mmask16) -1);
  8367. }
  8368. extern __inline __mmask8
  8369. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8370. _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8371. {
  8372. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8373. (__v8di) __Y, 5,
  8374. (__mmask8) __M);
  8375. }
  8376. extern __inline __mmask8
  8377. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8378. _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
  8379. {
  8380. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8381. (__v8di) __Y, 5,
  8382. (__mmask8) -1);
  8383. }
  8384. extern __inline __mmask8
  8385. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8386. _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8387. {
  8388. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8389. (__v8di) __Y, 5,
  8390. (__mmask8) __M);
  8391. }
  8392. extern __inline __mmask8
  8393. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8394. _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
  8395. {
  8396. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8397. (__v8di) __Y, 5,
  8398. (__mmask8) -1);
  8399. }
  8400. extern __inline __mmask16
  8401. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8402. _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8403. {
  8404. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8405. (__v16si) __Y, 2,
  8406. (__mmask16) __M);
  8407. }
  8408. extern __inline __mmask16
  8409. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8410. _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
  8411. {
  8412. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8413. (__v16si) __Y, 2,
  8414. (__mmask16) -1);
  8415. }
  8416. extern __inline __mmask16
  8417. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8418. _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8419. {
  8420. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8421. (__v16si) __Y, 2,
  8422. (__mmask16) __M);
  8423. }
  8424. extern __inline __mmask16
  8425. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8426. _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
  8427. {
  8428. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8429. (__v16si) __Y, 2,
  8430. (__mmask16) -1);
  8431. }
  8432. extern __inline __mmask8
  8433. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8434. _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8435. {
  8436. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8437. (__v8di) __Y, 2,
  8438. (__mmask8) __M);
  8439. }
  8440. extern __inline __mmask8
  8441. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8442. _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
  8443. {
  8444. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8445. (__v8di) __Y, 2,
  8446. (__mmask8) -1);
  8447. }
  8448. extern __inline __mmask8
  8449. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8450. _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8451. {
  8452. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8453. (__v8di) __Y, 2,
  8454. (__mmask8) __M);
  8455. }
  8456. extern __inline __mmask8
  8457. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8458. _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
  8459. {
  8460. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8461. (__v8di) __Y, 2,
  8462. (__mmask8) -1);
  8463. }
  8464. extern __inline __mmask16
  8465. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8466. _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8467. {
  8468. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8469. (__v16si) __Y, 1,
  8470. (__mmask16) __M);
  8471. }
  8472. extern __inline __mmask16
  8473. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8474. _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
  8475. {
  8476. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8477. (__v16si) __Y, 1,
  8478. (__mmask16) -1);
  8479. }
  8480. extern __inline __mmask16
  8481. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8482. _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8483. {
  8484. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8485. (__v16si) __Y, 1,
  8486. (__mmask16) __M);
  8487. }
  8488. extern __inline __mmask16
  8489. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8490. _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
  8491. {
  8492. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8493. (__v16si) __Y, 1,
  8494. (__mmask16) -1);
  8495. }
  8496. extern __inline __mmask8
  8497. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8498. _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8499. {
  8500. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8501. (__v8di) __Y, 1,
  8502. (__mmask8) __M);
  8503. }
  8504. extern __inline __mmask8
  8505. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8506. _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
  8507. {
  8508. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8509. (__v8di) __Y, 1,
  8510. (__mmask8) -1);
  8511. }
  8512. extern __inline __mmask8
  8513. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8514. _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8515. {
  8516. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8517. (__v8di) __Y, 1,
  8518. (__mmask8) __M);
  8519. }
  8520. extern __inline __mmask8
  8521. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8522. _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
  8523. {
  8524. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8525. (__v8di) __Y, 1,
  8526. (__mmask8) -1);
  8527. }
  8528. extern __inline __mmask16
  8529. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8530. _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
  8531. {
  8532. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8533. (__v16si) __Y, 4,
  8534. (__mmask16) -1);
  8535. }
  8536. extern __inline __mmask16
  8537. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8538. _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8539. {
  8540. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8541. (__v16si) __Y, 4,
  8542. (__mmask16) __M);
  8543. }
  8544. extern __inline __mmask16
  8545. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8546. _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8547. {
  8548. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8549. (__v16si) __Y, 4,
  8550. (__mmask16) __M);
  8551. }
  8552. extern __inline __mmask16
  8553. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8554. _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
  8555. {
  8556. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8557. (__v16si) __Y, 4,
  8558. (__mmask16) -1);
  8559. }
  8560. extern __inline __mmask8
  8561. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8562. _mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8563. {
  8564. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8565. (__v8di) __Y, 4,
  8566. (__mmask8) __M);
  8567. }
  8568. extern __inline __mmask8
  8569. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8570. _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
  8571. {
  8572. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8573. (__v8di) __Y, 4,
  8574. (__mmask8) -1);
  8575. }
  8576. extern __inline __mmask8
  8577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8578. _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8579. {
  8580. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8581. (__v8di) __Y, 4,
  8582. (__mmask8) __M);
  8583. }
  8584. extern __inline __mmask8
  8585. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8586. _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
  8587. {
  8588. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8589. (__v8di) __Y, 4,
  8590. (__mmask8) -1);
  8591. }
  8592. #define _MM_CMPINT_EQ 0x0
  8593. #define _MM_CMPINT_LT 0x1
  8594. #define _MM_CMPINT_LE 0x2
  8595. #define _MM_CMPINT_UNUSED 0x3
  8596. #define _MM_CMPINT_NE 0x4
  8597. #define _MM_CMPINT_NLT 0x5
  8598. #define _MM_CMPINT_GE 0x5
  8599. #define _MM_CMPINT_NLE 0x6
  8600. #define _MM_CMPINT_GT 0x6
  8601. #ifdef __OPTIMIZE__
  8602. extern __inline __mmask16
  8603. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8604. _kshiftli_mask16 (__mmask16 __A, unsigned int __B)
  8605. {
  8606. return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
  8607. (__mmask8) __B);
  8608. }
  8609. extern __inline __mmask16
  8610. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8611. _kshiftri_mask16 (__mmask16 __A, unsigned int __B)
  8612. {
  8613. return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
  8614. (__mmask8) __B);
  8615. }
  8616. extern __inline __mmask8
  8617. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8618. _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
  8619. {
  8620. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8621. (__v8di) __Y, __P,
  8622. (__mmask8) -1);
  8623. }
  8624. extern __inline __mmask16
  8625. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8626. _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
  8627. {
  8628. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8629. (__v16si) __Y, __P,
  8630. (__mmask16) -1);
  8631. }
  8632. extern __inline __mmask8
  8633. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8634. _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
  8635. {
  8636. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8637. (__v8di) __Y, __P,
  8638. (__mmask8) -1);
  8639. }
  8640. extern __inline __mmask16
  8641. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8642. _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
  8643. {
  8644. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8645. (__v16si) __Y, __P,
  8646. (__mmask16) -1);
  8647. }
  8648. extern __inline __mmask8
  8649. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8650. _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
  8651. const int __R)
  8652. {
  8653. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  8654. (__v8df) __Y, __P,
  8655. (__mmask8) -1, __R);
  8656. }
  8657. extern __inline __mmask16
  8658. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8659. _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
  8660. {
  8661. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  8662. (__v16sf) __Y, __P,
  8663. (__mmask16) -1, __R);
  8664. }
  8665. extern __inline __mmask8
  8666. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8667. _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
  8668. const int __P)
  8669. {
  8670. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8671. (__v8di) __Y, __P,
  8672. (__mmask8) __U);
  8673. }
  8674. extern __inline __mmask16
  8675. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8676. _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
  8677. const int __P)
  8678. {
  8679. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8680. (__v16si) __Y, __P,
  8681. (__mmask16) __U);
  8682. }
  8683. extern __inline __mmask8
  8684. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8685. _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
  8686. const int __P)
  8687. {
  8688. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8689. (__v8di) __Y, __P,
  8690. (__mmask8) __U);
  8691. }
  8692. extern __inline __mmask16
  8693. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8694. _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
  8695. const int __P)
  8696. {
  8697. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8698. (__v16si) __Y, __P,
  8699. (__mmask16) __U);
  8700. }
  8701. extern __inline __mmask8
  8702. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8703. _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
  8704. const int __P, const int __R)
  8705. {
  8706. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  8707. (__v8df) __Y, __P,
  8708. (__mmask8) __U, __R);
  8709. }
  8710. extern __inline __mmask16
  8711. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8712. _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
  8713. const int __P, const int __R)
  8714. {
  8715. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  8716. (__v16sf) __Y, __P,
  8717. (__mmask16) __U, __R);
  8718. }
  8719. extern __inline __mmask8
  8720. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8721. _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
  8722. {
  8723. return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
  8724. (__v2df) __Y, __P,
  8725. (__mmask8) -1, __R);
  8726. }
  8727. extern __inline __mmask8
  8728. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8729. _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
  8730. const int __P, const int __R)
  8731. {
  8732. return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
  8733. (__v2df) __Y, __P,
  8734. (__mmask8) __M, __R);
  8735. }
  8736. extern __inline __mmask8
  8737. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8738. _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
  8739. {
  8740. return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
  8741. (__v4sf) __Y, __P,
  8742. (__mmask8) -1, __R);
  8743. }
  8744. extern __inline __mmask8
  8745. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8746. _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
  8747. const int __P, const int __R)
  8748. {
  8749. return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
  8750. (__v4sf) __Y, __P,
  8751. (__mmask8) __M, __R);
  8752. }
  8753. #else
  8754. #define _kshiftli_mask16(X, Y) \
  8755. ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
  8756. #define _kshiftri_mask16(X, Y) \
  8757. ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
  8758. #define _mm512_cmp_epi64_mask(X, Y, P) \
  8759. ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
  8760. (__v8di)(__m512i)(Y), (int)(P),\
  8761. (__mmask8)-1))
  8762. #define _mm512_cmp_epi32_mask(X, Y, P) \
  8763. ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
  8764. (__v16si)(__m512i)(Y), (int)(P), \
  8765. (__mmask16)-1))
  8766. #define _mm512_cmp_epu64_mask(X, Y, P) \
  8767. ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
  8768. (__v8di)(__m512i)(Y), (int)(P),\
  8769. (__mmask8)-1))
  8770. #define _mm512_cmp_epu32_mask(X, Y, P) \
  8771. ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
  8772. (__v16si)(__m512i)(Y), (int)(P), \
  8773. (__mmask16)-1))
  8774. #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
  8775. ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
  8776. (__v8df)(__m512d)(Y), (int)(P),\
  8777. (__mmask8)-1, R))
  8778. #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
  8779. ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
  8780. (__v16sf)(__m512)(Y), (int)(P),\
  8781. (__mmask16)-1, R))
  8782. #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
  8783. ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
  8784. (__v8di)(__m512i)(Y), (int)(P),\
  8785. (__mmask8)M))
  8786. #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
  8787. ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
  8788. (__v16si)(__m512i)(Y), (int)(P), \
  8789. (__mmask16)M))
  8790. #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
  8791. ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
  8792. (__v8di)(__m512i)(Y), (int)(P),\
  8793. (__mmask8)M))
  8794. #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
  8795. ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
  8796. (__v16si)(__m512i)(Y), (int)(P), \
  8797. (__mmask16)M))
  8798. #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
  8799. ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
  8800. (__v8df)(__m512d)(Y), (int)(P),\
  8801. (__mmask8)M, R))
  8802. #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
  8803. ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
  8804. (__v16sf)(__m512)(Y), (int)(P),\
  8805. (__mmask16)M, R))
  8806. #define _mm_cmp_round_sd_mask(X, Y, P, R) \
  8807. ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
  8808. (__v2df)(__m128d)(Y), (int)(P),\
  8809. (__mmask8)-1, R))
  8810. #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
  8811. ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
  8812. (__v2df)(__m128d)(Y), (int)(P),\
  8813. (M), R))
  8814. #define _mm_cmp_round_ss_mask(X, Y, P, R) \
  8815. ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
  8816. (__v4sf)(__m128)(Y), (int)(P), \
  8817. (__mmask8)-1, R))
  8818. #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
  8819. ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
  8820. (__v4sf)(__m128)(Y), (int)(P), \
  8821. (M), R))
  8822. #endif
  8823. #ifdef __OPTIMIZE__
  8824. extern __inline __m512
  8825. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8826. _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
  8827. {
  8828. __m512 __v1_old = _mm512_undefined_ps ();
  8829. __mmask16 __mask = 0xFFFF;
  8830. return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
  8831. __addr,
  8832. (__v16si) __index,
  8833. __mask, __scale);
  8834. }
  8835. extern __inline __m512
  8836. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8837. _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
  8838. __m512i __index, void const *__addr, int __scale)
  8839. {
  8840. return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
  8841. __addr,
  8842. (__v16si) __index,
  8843. __mask, __scale);
  8844. }
  8845. extern __inline __m512d
  8846. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8847. _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
  8848. {
  8849. __m512d __v1_old = _mm512_undefined_pd ();
  8850. __mmask8 __mask = 0xFF;
  8851. return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
  8852. __addr,
  8853. (__v8si) __index, __mask,
  8854. __scale);
  8855. }
  8856. extern __inline __m512d
  8857. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8858. _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
  8859. __m256i __index, void const *__addr, int __scale)
  8860. {
  8861. return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
  8862. __addr,
  8863. (__v8si) __index,
  8864. __mask, __scale);
  8865. }
  8866. extern __inline __m256
  8867. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8868. _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
  8869. {
  8870. __m256 __v1_old = _mm256_undefined_ps ();
  8871. __mmask8 __mask = 0xFF;
  8872. return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
  8873. __addr,
  8874. (__v8di) __index, __mask,
  8875. __scale);
  8876. }
  8877. extern __inline __m256
  8878. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8879. _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
  8880. __m512i __index, void const *__addr, int __scale)
  8881. {
  8882. return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
  8883. __addr,
  8884. (__v8di) __index,
  8885. __mask, __scale);
  8886. }
  8887. extern __inline __m512d
  8888. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8889. _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
  8890. {
  8891. __m512d __v1_old = _mm512_undefined_pd ();
  8892. __mmask8 __mask = 0xFF;
  8893. return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
  8894. __addr,
  8895. (__v8di) __index, __mask,
  8896. __scale);
  8897. }
  8898. extern __inline __m512d
  8899. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8900. _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
  8901. __m512i __index, void const *__addr, int __scale)
  8902. {
  8903. return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
  8904. __addr,
  8905. (__v8di) __index,
  8906. __mask, __scale);
  8907. }
  8908. extern __inline __m512i
  8909. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8910. _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
  8911. {
  8912. __m512i __v1_old = _mm512_undefined_epi32 ();
  8913. __mmask16 __mask = 0xFFFF;
  8914. return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
  8915. __addr,
  8916. (__v16si) __index,
  8917. __mask, __scale);
  8918. }
  8919. extern __inline __m512i
  8920. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8921. _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
  8922. __m512i __index, void const *__addr, int __scale)
  8923. {
  8924. return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
  8925. __addr,
  8926. (__v16si) __index,
  8927. __mask, __scale);
  8928. }
  8929. extern __inline __m512i
  8930. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8931. _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
  8932. {
  8933. __m512i __v1_old = _mm512_undefined_epi32 ();
  8934. __mmask8 __mask = 0xFF;
  8935. return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
  8936. __addr,
  8937. (__v8si) __index, __mask,
  8938. __scale);
  8939. }
  8940. extern __inline __m512i
  8941. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8942. _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
  8943. __m256i __index, void const *__addr,
  8944. int __scale)
  8945. {
  8946. return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
  8947. __addr,
  8948. (__v8si) __index,
  8949. __mask, __scale);
  8950. }
  8951. extern __inline __m256i
  8952. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8953. _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
  8954. {
  8955. __m256i __v1_old = _mm256_undefined_si256 ();
  8956. __mmask8 __mask = 0xFF;
  8957. return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
  8958. __addr,
  8959. (__v8di) __index,
  8960. __mask, __scale);
  8961. }
  8962. extern __inline __m256i
  8963. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8964. _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
  8965. __m512i __index, void const *__addr, int __scale)
  8966. {
  8967. return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
  8968. __addr,
  8969. (__v8di) __index,
  8970. __mask, __scale);
  8971. }
  8972. extern __inline __m512i
  8973. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8974. _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
  8975. {
  8976. __m512i __v1_old = _mm512_undefined_epi32 ();
  8977. __mmask8 __mask = 0xFF;
  8978. return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
  8979. __addr,
  8980. (__v8di) __index, __mask,
  8981. __scale);
  8982. }
  8983. extern __inline __m512i
  8984. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8985. _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
  8986. __m512i __index, void const *__addr,
  8987. int __scale)
  8988. {
  8989. return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
  8990. __addr,
  8991. (__v8di) __index,
  8992. __mask, __scale);
  8993. }
  8994. extern __inline void
  8995. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8996. _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
  8997. {
  8998. __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
  8999. (__v16si) __index, (__v16sf) __v1, __scale);
  9000. }
  9001. extern __inline void
  9002. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9003. _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
  9004. __m512i __index, __m512 __v1, int __scale)
  9005. {
  9006. __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
  9007. (__v16sf) __v1, __scale);
  9008. }
  9009. extern __inline void
  9010. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9011. _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
  9012. int __scale)
  9013. {
  9014. __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
  9015. (__v8si) __index, (__v8df) __v1, __scale);
  9016. }
  9017. extern __inline void
  9018. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9019. _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
  9020. __m256i __index, __m512d __v1, int __scale)
  9021. {
  9022. __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
  9023. (__v8df) __v1, __scale);
  9024. }
  9025. extern __inline void
  9026. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9027. _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
  9028. {
  9029. __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
  9030. (__v8di) __index, (__v8sf) __v1, __scale);
  9031. }
  9032. extern __inline void
  9033. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9034. _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
  9035. __m512i __index, __m256 __v1, int __scale)
  9036. {
  9037. __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
  9038. (__v8sf) __v1, __scale);
  9039. }
  9040. extern __inline void
  9041. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9042. _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
  9043. int __scale)
  9044. {
  9045. __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
  9046. (__v8di) __index, (__v8df) __v1, __scale);
  9047. }
  9048. extern __inline void
  9049. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9050. _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
  9051. __m512i __index, __m512d __v1, int __scale)
  9052. {
  9053. __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
  9054. (__v8df) __v1, __scale);
  9055. }
  9056. extern __inline void
  9057. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9058. _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
  9059. __m512i __v1, int __scale)
  9060. {
  9061. __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
  9062. (__v16si) __index, (__v16si) __v1, __scale);
  9063. }
  9064. extern __inline void
  9065. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9066. _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
  9067. __m512i __index, __m512i __v1, int __scale)
  9068. {
  9069. __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
  9070. (__v16si) __v1, __scale);
  9071. }
  9072. extern __inline void
  9073. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9074. _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
  9075. __m512i __v1, int __scale)
  9076. {
  9077. __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
  9078. (__v8si) __index, (__v8di) __v1, __scale);
  9079. }
  9080. extern __inline void
  9081. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9082. _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
  9083. __m256i __index, __m512i __v1, int __scale)
  9084. {
  9085. __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
  9086. (__v8di) __v1, __scale);
  9087. }
  9088. extern __inline void
  9089. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9090. _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
  9091. __m256i __v1, int __scale)
  9092. {
  9093. __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
  9094. (__v8di) __index, (__v8si) __v1, __scale);
  9095. }
  9096. extern __inline void
  9097. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9098. _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
  9099. __m512i __index, __m256i __v1, int __scale)
  9100. {
  9101. __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
  9102. (__v8si) __v1, __scale);
  9103. }
  9104. extern __inline void
  9105. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9106. _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
  9107. __m512i __v1, int __scale)
  9108. {
  9109. __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
  9110. (__v8di) __index, (__v8di) __v1, __scale);
  9111. }
  9112. extern __inline void
  9113. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9114. _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
  9115. __m512i __index, __m512i __v1, int __scale)
  9116. {
  9117. __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
  9118. (__v8di) __v1, __scale);
  9119. }
  9120. #else
  9121. #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
  9122. (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
  9123. (void const *)ADDR, \
  9124. (__v16si)(__m512i)INDEX, \
  9125. (__mmask16)0xFFFF, (int)SCALE)
  9126. #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9127. (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
  9128. (void const *)ADDR, \
  9129. (__v16si)(__m512i)INDEX, \
  9130. (__mmask16)MASK, (int)SCALE)
  9131. #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
  9132. (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
  9133. (void const *)ADDR, \
  9134. (__v8si)(__m256i)INDEX, \
  9135. (__mmask8)0xFF, (int)SCALE)
  9136. #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9137. (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
  9138. (void const *)ADDR, \
  9139. (__v8si)(__m256i)INDEX, \
  9140. (__mmask8)MASK, (int)SCALE)
  9141. #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
  9142. (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
  9143. (void const *)ADDR, \
  9144. (__v8di)(__m512i)INDEX, \
  9145. (__mmask8)0xFF, (int)SCALE)
  9146. #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9147. (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
  9148. (void const *)ADDR, \
  9149. (__v8di)(__m512i)INDEX, \
  9150. (__mmask8)MASK, (int)SCALE)
  9151. #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
  9152. (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
  9153. (void const *)ADDR, \
  9154. (__v8di)(__m512i)INDEX, \
  9155. (__mmask8)0xFF, (int)SCALE)
  9156. #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9157. (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
  9158. (void const *)ADDR, \
  9159. (__v8di)(__m512i)INDEX, \
  9160. (__mmask8)MASK, (int)SCALE)
  9161. #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
  9162. (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
  9163. (void const *)ADDR, \
  9164. (__v16si)(__m512i)INDEX, \
  9165. (__mmask16)0xFFFF, (int)SCALE)
  9166. #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9167. (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
  9168. (void const *)ADDR, \
  9169. (__v16si)(__m512i)INDEX, \
  9170. (__mmask16)MASK, (int)SCALE)
  9171. #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
  9172. (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
  9173. (void const *)ADDR, \
  9174. (__v8si)(__m256i)INDEX, \
  9175. (__mmask8)0xFF, (int)SCALE)
  9176. #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9177. (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
  9178. (void const *)ADDR, \
  9179. (__v8si)(__m256i)INDEX, \
  9180. (__mmask8)MASK, (int)SCALE)
  9181. #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
  9182. (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
  9183. (void const *)ADDR, \
  9184. (__v8di)(__m512i)INDEX, \
  9185. (__mmask8)0xFF, (int)SCALE)
  9186. #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9187. (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
  9188. (void const *)ADDR, \
  9189. (__v8di)(__m512i)INDEX, \
  9190. (__mmask8)MASK, (int)SCALE)
  9191. #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
  9192. (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
  9193. (void const *)ADDR, \
  9194. (__v8di)(__m512i)INDEX, \
  9195. (__mmask8)0xFF, (int)SCALE)
  9196. #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9197. (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
  9198. (void const *)ADDR, \
  9199. (__v8di)(__m512i)INDEX, \
  9200. (__mmask8)MASK, (int)SCALE)
  9201. #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
  9202. __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
  9203. (__v16si)(__m512i)INDEX, \
  9204. (__v16sf)(__m512)V1, (int)SCALE)
  9205. #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  9206. __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
  9207. (__v16si)(__m512i)INDEX, \
  9208. (__v16sf)(__m512)V1, (int)SCALE)
  9209. #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
  9210. __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
  9211. (__v8si)(__m256i)INDEX, \
  9212. (__v8df)(__m512d)V1, (int)SCALE)
  9213. #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  9214. __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
  9215. (__v8si)(__m256i)INDEX, \
  9216. (__v8df)(__m512d)V1, (int)SCALE)
  9217. #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
  9218. __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
  9219. (__v8di)(__m512i)INDEX, \
  9220. (__v8sf)(__m256)V1, (int)SCALE)
  9221. #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  9222. __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
  9223. (__v8di)(__m512i)INDEX, \
  9224. (__v8sf)(__m256)V1, (int)SCALE)
  9225. #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
  9226. __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
  9227. (__v8di)(__m512i)INDEX, \
  9228. (__v8df)(__m512d)V1, (int)SCALE)
  9229. #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  9230. __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
  9231. (__v8di)(__m512i)INDEX, \
  9232. (__v8df)(__m512d)V1, (int)SCALE)
  9233. #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
  9234. __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
  9235. (__v16si)(__m512i)INDEX, \
  9236. (__v16si)(__m512i)V1, (int)SCALE)
  9237. #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  9238. __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
  9239. (__v16si)(__m512i)INDEX, \
  9240. (__v16si)(__m512i)V1, (int)SCALE)
  9241. #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
  9242. __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
  9243. (__v8si)(__m256i)INDEX, \
  9244. (__v8di)(__m512i)V1, (int)SCALE)
  9245. #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  9246. __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
  9247. (__v8si)(__m256i)INDEX, \
  9248. (__v8di)(__m512i)V1, (int)SCALE)
  9249. #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
  9250. __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
  9251. (__v8di)(__m512i)INDEX, \
  9252. (__v8si)(__m256i)V1, (int)SCALE)
  9253. #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  9254. __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
  9255. (__v8di)(__m512i)INDEX, \
  9256. (__v8si)(__m256i)V1, (int)SCALE)
  9257. #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
  9258. __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
  9259. (__v8di)(__m512i)INDEX, \
  9260. (__v8di)(__m512i)V1, (int)SCALE)
  9261. #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  9262. __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
  9263. (__v8di)(__m512i)INDEX, \
  9264. (__v8di)(__m512i)V1, (int)SCALE)
  9265. #endif
  9266. extern __inline __m512d
  9267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9268. _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
  9269. {
  9270. return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
  9271. (__v8df) __W,
  9272. (__mmask8) __U);
  9273. }
  9274. extern __inline __m512d
  9275. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9276. _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
  9277. {
  9278. return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
  9279. (__v8df)
  9280. _mm512_setzero_pd (),
  9281. (__mmask8) __U);
  9282. }
  9283. extern __inline void
  9284. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9285. _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
  9286. {
  9287. __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
  9288. (__mmask8) __U);
  9289. }
  9290. extern __inline __m512
  9291. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9292. _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
  9293. {
  9294. return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
  9295. (__v16sf) __W,
  9296. (__mmask16) __U);
  9297. }
  9298. extern __inline __m512
  9299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9300. _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
  9301. {
  9302. return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
  9303. (__v16sf)
  9304. _mm512_setzero_ps (),
  9305. (__mmask16) __U);
  9306. }
  9307. extern __inline void
  9308. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9309. _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
  9310. {
  9311. __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
  9312. (__mmask16) __U);
  9313. }
  9314. extern __inline __m512i
  9315. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9316. _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
  9317. {
  9318. return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
  9319. (__v8di) __W,
  9320. (__mmask8) __U);
  9321. }
  9322. extern __inline __m512i
  9323. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9324. _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
  9325. {
  9326. return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
  9327. (__v8di)
  9328. _mm512_setzero_si512 (),
  9329. (__mmask8) __U);
  9330. }
  9331. extern __inline void
  9332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9333. _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
  9334. {
  9335. __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
  9336. (__mmask8) __U);
  9337. }
  9338. extern __inline __m512i
  9339. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9340. _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
  9341. {
  9342. return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
  9343. (__v16si) __W,
  9344. (__mmask16) __U);
  9345. }
  9346. extern __inline __m512i
  9347. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9348. _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
  9349. {
  9350. return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
  9351. (__v16si)
  9352. _mm512_setzero_si512 (),
  9353. (__mmask16) __U);
  9354. }
  9355. extern __inline void
  9356. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9357. _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
  9358. {
  9359. __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
  9360. (__mmask16) __U);
  9361. }
  9362. extern __inline __m512d
  9363. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9364. _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
  9365. {
  9366. return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
  9367. (__v8df) __W,
  9368. (__mmask8) __U);
  9369. }
  9370. extern __inline __m512d
  9371. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9372. _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
  9373. {
  9374. return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
  9375. (__v8df)
  9376. _mm512_setzero_pd (),
  9377. (__mmask8) __U);
  9378. }
  9379. extern __inline __m512d
  9380. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9381. _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
  9382. {
  9383. return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
  9384. (__v8df) __W,
  9385. (__mmask8) __U);
  9386. }
  9387. extern __inline __m512d
  9388. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9389. _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
  9390. {
  9391. return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
  9392. (__v8df)
  9393. _mm512_setzero_pd (),
  9394. (__mmask8) __U);
  9395. }
  9396. extern __inline __m512
  9397. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9398. _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
  9399. {
  9400. return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
  9401. (__v16sf) __W,
  9402. (__mmask16) __U);
  9403. }
  9404. extern __inline __m512
  9405. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9406. _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
  9407. {
  9408. return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
  9409. (__v16sf)
  9410. _mm512_setzero_ps (),
  9411. (__mmask16) __U);
  9412. }
  9413. extern __inline __m512
  9414. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9415. _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
  9416. {
  9417. return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
  9418. (__v16sf) __W,
  9419. (__mmask16) __U);
  9420. }
  9421. extern __inline __m512
  9422. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9423. _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
  9424. {
  9425. return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
  9426. (__v16sf)
  9427. _mm512_setzero_ps (),
  9428. (__mmask16) __U);
  9429. }
  9430. extern __inline __m512i
  9431. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9432. _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
  9433. {
  9434. return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
  9435. (__v8di) __W,
  9436. (__mmask8) __U);
  9437. }
  9438. extern __inline __m512i
  9439. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9440. _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
  9441. {
  9442. return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
  9443. (__v8di)
  9444. _mm512_setzero_si512 (),
  9445. (__mmask8) __U);
  9446. }
  9447. extern __inline __m512i
  9448. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9449. _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
  9450. {
  9451. return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
  9452. (__v8di) __W,
  9453. (__mmask8) __U);
  9454. }
  9455. extern __inline __m512i
  9456. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9457. _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
  9458. {
  9459. return (__m512i)
  9460. __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
  9461. (__v8di)
  9462. _mm512_setzero_si512 (),
  9463. (__mmask8) __U);
  9464. }
  9465. extern __inline __m512i
  9466. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9467. _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
  9468. {
  9469. return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
  9470. (__v16si) __W,
  9471. (__mmask16) __U);
  9472. }
  9473. extern __inline __m512i
  9474. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9475. _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
  9476. {
  9477. return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
  9478. (__v16si)
  9479. _mm512_setzero_si512 (),
  9480. (__mmask16) __U);
  9481. }
  9482. extern __inline __m512i
  9483. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9484. _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
  9485. {
  9486. return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
  9487. (__v16si) __W,
  9488. (__mmask16) __U);
  9489. }
  9490. extern __inline __m512i
  9491. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9492. _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
  9493. {
  9494. return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
  9495. (__v16si)
  9496. _mm512_setzero_si512
  9497. (), (__mmask16) __U);
  9498. }
  9499. /* Mask arithmetic operations */
  9500. #define _kand_mask16 _mm512_kand
  9501. #define _kandn_mask16 _mm512_kandn
  9502. #define _knot_mask16 _mm512_knot
  9503. #define _kor_mask16 _mm512_kor
  9504. #define _kxnor_mask16 _mm512_kxnor
  9505. #define _kxor_mask16 _mm512_kxor
  9506. extern __inline unsigned char
  9507. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9508. _kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
  9509. {
  9510. *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
  9511. return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
  9512. }
  9513. extern __inline unsigned char
  9514. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9515. _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
  9516. {
  9517. return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
  9518. (__mmask16) __B);
  9519. }
  9520. extern __inline unsigned char
  9521. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9522. _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
  9523. {
  9524. return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
  9525. (__mmask16) __B);
  9526. }
  9527. extern __inline unsigned int
  9528. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9529. _cvtmask16_u32 (__mmask16 __A)
  9530. {
  9531. return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
  9532. }
  9533. extern __inline __mmask16
  9534. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9535. _cvtu32_mask16 (unsigned int __A)
  9536. {
  9537. return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
  9538. }
  9539. extern __inline __mmask16
  9540. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9541. _load_mask16 (__mmask16 *__A)
  9542. {
  9543. return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
  9544. }
  9545. extern __inline void
  9546. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9547. _store_mask16 (__mmask16 *__A, __mmask16 __B)
  9548. {
  9549. *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
  9550. }
  9551. extern __inline __mmask16
  9552. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9553. _mm512_kand (__mmask16 __A, __mmask16 __B)
  9554. {
  9555. return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
  9556. }
  9557. extern __inline __mmask16
  9558. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9559. _mm512_kandn (__mmask16 __A, __mmask16 __B)
  9560. {
  9561. return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
  9562. (__mmask16) __B);
  9563. }
  9564. extern __inline __mmask16
  9565. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9566. _mm512_kor (__mmask16 __A, __mmask16 __B)
  9567. {
  9568. return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
  9569. }
  9570. extern __inline int
  9571. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9572. _mm512_kortestz (__mmask16 __A, __mmask16 __B)
  9573. {
  9574. return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
  9575. (__mmask16) __B);
  9576. }
  9577. extern __inline int
  9578. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9579. _mm512_kortestc (__mmask16 __A, __mmask16 __B)
  9580. {
  9581. return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
  9582. (__mmask16) __B);
  9583. }
  9584. extern __inline __mmask16
  9585. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9586. _mm512_kxnor (__mmask16 __A, __mmask16 __B)
  9587. {
  9588. return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
  9589. }
  9590. extern __inline __mmask16
  9591. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9592. _mm512_kxor (__mmask16 __A, __mmask16 __B)
  9593. {
  9594. return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
  9595. }
  9596. extern __inline __mmask16
  9597. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9598. _mm512_knot (__mmask16 __A)
  9599. {
  9600. return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
  9601. }
  9602. extern __inline __mmask16
  9603. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9604. _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
  9605. {
  9606. return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
  9607. }
  9608. extern __inline __mmask16
  9609. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9610. _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
  9611. {
  9612. return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
  9613. }
  9614. #ifdef __OPTIMIZE__
  9615. extern __inline __m512i
  9616. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9617. _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
  9618. const int __imm)
  9619. {
  9620. return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
  9621. (__v4si) __D,
  9622. __imm,
  9623. (__v16si)
  9624. _mm512_setzero_si512 (),
  9625. __B);
  9626. }
  9627. extern __inline __m512
  9628. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9629. _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
  9630. const int __imm)
  9631. {
  9632. return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
  9633. (__v4sf) __D,
  9634. __imm,
  9635. (__v16sf)
  9636. _mm512_setzero_ps (), __B);
  9637. }
  9638. extern __inline __m512i
  9639. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9640. _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
  9641. __m128i __D, const int __imm)
  9642. {
  9643. return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
  9644. (__v4si) __D,
  9645. __imm,
  9646. (__v16si) __A,
  9647. __B);
  9648. }
  9649. extern __inline __m512
  9650. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9651. _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
  9652. __m128 __D, const int __imm)
  9653. {
  9654. return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
  9655. (__v4sf) __D,
  9656. __imm,
  9657. (__v16sf) __A, __B);
  9658. }
  9659. #else
  9660. #define _mm512_maskz_insertf32x4(A, X, Y, C) \
  9661. ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
  9662. (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
  9663. (__mmask16)(A)))
  9664. #define _mm512_maskz_inserti32x4(A, X, Y, C) \
  9665. ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
  9666. (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
  9667. (__mmask16)(A)))
  9668. #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
  9669. ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
  9670. (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
  9671. (__mmask16)(B)))
  9672. #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
  9673. ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
  9674. (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
  9675. (__mmask16)(B)))
  9676. #endif
  9677. extern __inline __m512i
  9678. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9679. _mm512_max_epi64 (__m512i __A, __m512i __B)
  9680. {
  9681. return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
  9682. (__v8di) __B,
  9683. (__v8di)
  9684. _mm512_undefined_epi32 (),
  9685. (__mmask8) -1);
  9686. }
  9687. extern __inline __m512i
  9688. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9689. _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
  9690. {
  9691. return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
  9692. (__v8di) __B,
  9693. (__v8di)
  9694. _mm512_setzero_si512 (),
  9695. __M);
  9696. }
  9697. extern __inline __m512i
  9698. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9699. _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
  9700. {
  9701. return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
  9702. (__v8di) __B,
  9703. (__v8di) __W, __M);
  9704. }
  9705. extern __inline __m512i
  9706. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9707. _mm512_min_epi64 (__m512i __A, __m512i __B)
  9708. {
  9709. return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
  9710. (__v8di) __B,
  9711. (__v8di)
  9712. _mm512_undefined_epi32 (),
  9713. (__mmask8) -1);
  9714. }
  9715. extern __inline __m512i
  9716. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9717. _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
  9718. {
  9719. return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
  9720. (__v8di) __B,
  9721. (__v8di) __W, __M);
  9722. }
  9723. extern __inline __m512i
  9724. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9725. _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
  9726. {
  9727. return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
  9728. (__v8di) __B,
  9729. (__v8di)
  9730. _mm512_setzero_si512 (),
  9731. __M);
  9732. }
  9733. extern __inline __m512i
  9734. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9735. _mm512_max_epu64 (__m512i __A, __m512i __B)
  9736. {
  9737. return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
  9738. (__v8di) __B,
  9739. (__v8di)
  9740. _mm512_undefined_epi32 (),
  9741. (__mmask8) -1);
  9742. }
  9743. extern __inline __m512i
  9744. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9745. _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
  9746. {
  9747. return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
  9748. (__v8di) __B,
  9749. (__v8di)
  9750. _mm512_setzero_si512 (),
  9751. __M);
  9752. }
  9753. extern __inline __m512i
  9754. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9755. _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
  9756. {
  9757. return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
  9758. (__v8di) __B,
  9759. (__v8di) __W, __M);
  9760. }
  9761. extern __inline __m512i
  9762. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9763. _mm512_min_epu64 (__m512i __A, __m512i __B)
  9764. {
  9765. return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
  9766. (__v8di) __B,
  9767. (__v8di)
  9768. _mm512_undefined_epi32 (),
  9769. (__mmask8) -1);
  9770. }
  9771. extern __inline __m512i
  9772. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9773. _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
  9774. {
  9775. return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
  9776. (__v8di) __B,
  9777. (__v8di) __W, __M);
  9778. }
  9779. extern __inline __m512i
  9780. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9781. _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
  9782. {
  9783. return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
  9784. (__v8di) __B,
  9785. (__v8di)
  9786. _mm512_setzero_si512 (),
  9787. __M);
  9788. }
  9789. extern __inline __m512i
  9790. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9791. _mm512_max_epi32 (__m512i __A, __m512i __B)
  9792. {
  9793. return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
  9794. (__v16si) __B,
  9795. (__v16si)
  9796. _mm512_undefined_epi32 (),
  9797. (__mmask16) -1);
  9798. }
  9799. extern __inline __m512i
  9800. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9801. _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
  9802. {
  9803. return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
  9804. (__v16si) __B,
  9805. (__v16si)
  9806. _mm512_setzero_si512 (),
  9807. __M);
  9808. }
  9809. extern __inline __m512i
  9810. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9811. _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
  9812. {
  9813. return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
  9814. (__v16si) __B,
  9815. (__v16si) __W, __M);
  9816. }
  9817. extern __inline __m512i
  9818. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9819. _mm512_min_epi32 (__m512i __A, __m512i __B)
  9820. {
  9821. return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
  9822. (__v16si) __B,
  9823. (__v16si)
  9824. _mm512_undefined_epi32 (),
  9825. (__mmask16) -1);
  9826. }
  9827. extern __inline __m512i
  9828. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9829. _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
  9830. {
  9831. return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
  9832. (__v16si) __B,
  9833. (__v16si)
  9834. _mm512_setzero_si512 (),
  9835. __M);
  9836. }
  9837. extern __inline __m512i
  9838. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9839. _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
  9840. {
  9841. return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
  9842. (__v16si) __B,
  9843. (__v16si) __W, __M);
  9844. }
  9845. extern __inline __m512i
  9846. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9847. _mm512_max_epu32 (__m512i __A, __m512i __B)
  9848. {
  9849. return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
  9850. (__v16si) __B,
  9851. (__v16si)
  9852. _mm512_undefined_epi32 (),
  9853. (__mmask16) -1);
  9854. }
  9855. extern __inline __m512i
  9856. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9857. _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
  9858. {
  9859. return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
  9860. (__v16si) __B,
  9861. (__v16si)
  9862. _mm512_setzero_si512 (),
  9863. __M);
  9864. }
  9865. extern __inline __m512i
  9866. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9867. _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
  9868. {
  9869. return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
  9870. (__v16si) __B,
  9871. (__v16si) __W, __M);
  9872. }
  9873. extern __inline __m512i
  9874. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9875. _mm512_min_epu32 (__m512i __A, __m512i __B)
  9876. {
  9877. return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
  9878. (__v16si) __B,
  9879. (__v16si)
  9880. _mm512_undefined_epi32 (),
  9881. (__mmask16) -1);
  9882. }
  9883. extern __inline __m512i
  9884. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9885. _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
  9886. {
  9887. return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
  9888. (__v16si) __B,
  9889. (__v16si)
  9890. _mm512_setzero_si512 (),
  9891. __M);
  9892. }
  9893. extern __inline __m512i
  9894. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9895. _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
  9896. {
  9897. return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
  9898. (__v16si) __B,
  9899. (__v16si) __W, __M);
  9900. }
  9901. extern __inline __m512
  9902. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9903. _mm512_unpacklo_ps (__m512 __A, __m512 __B)
  9904. {
  9905. return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
  9906. (__v16sf) __B,
  9907. (__v16sf)
  9908. _mm512_undefined_ps (),
  9909. (__mmask16) -1);
  9910. }
  9911. extern __inline __m512
  9912. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9913. _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  9914. {
  9915. return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
  9916. (__v16sf) __B,
  9917. (__v16sf) __W,
  9918. (__mmask16) __U);
  9919. }
  9920. extern __inline __m512
  9921. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9922. _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
  9923. {
  9924. return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
  9925. (__v16sf) __B,
  9926. (__v16sf)
  9927. _mm512_setzero_ps (),
  9928. (__mmask16) __U);
  9929. }
  9930. #ifdef __OPTIMIZE__
  9931. extern __inline __m128d
  9932. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9933. _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
  9934. {
  9935. return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
  9936. (__v2df) __B,
  9937. __R);
  9938. }
  9939. extern __inline __m128d
  9940. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9941. _mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  9942. __m128d __B, const int __R)
  9943. {
  9944. return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
  9945. (__v2df) __B,
  9946. (__v2df) __W,
  9947. (__mmask8) __U, __R);
  9948. }
  9949. extern __inline __m128d
  9950. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9951. _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  9952. const int __R)
  9953. {
  9954. return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
  9955. (__v2df) __B,
  9956. (__v2df)
  9957. _mm_setzero_pd (),
  9958. (__mmask8) __U, __R);
  9959. }
  9960. extern __inline __m128
  9961. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9962. _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
  9963. {
  9964. return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
  9965. (__v4sf) __B,
  9966. __R);
  9967. }
  9968. extern __inline __m128
  9969. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9970. _mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  9971. __m128 __B, const int __R)
  9972. {
  9973. return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
  9974. (__v4sf) __B,
  9975. (__v4sf) __W,
  9976. (__mmask8) __U, __R);
  9977. }
  9978. extern __inline __m128
  9979. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9980. _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  9981. const int __R)
  9982. {
  9983. return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
  9984. (__v4sf) __B,
  9985. (__v4sf)
  9986. _mm_setzero_ps (),
  9987. (__mmask8) __U, __R);
  9988. }
  9989. extern __inline __m128d
  9990. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9991. _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
  9992. {
  9993. return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
  9994. (__v2df) __B,
  9995. __R);
  9996. }
  9997. extern __inline __m128d
  9998. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9999. _mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  10000. __m128d __B, const int __R)
  10001. {
  10002. return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
  10003. (__v2df) __B,
  10004. (__v2df) __W,
  10005. (__mmask8) __U, __R);
  10006. }
  10007. extern __inline __m128d
  10008. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10009. _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  10010. const int __R)
  10011. {
  10012. return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
  10013. (__v2df) __B,
  10014. (__v2df)
  10015. _mm_setzero_pd (),
  10016. (__mmask8) __U, __R);
  10017. }
  10018. extern __inline __m128
  10019. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10020. _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
  10021. {
  10022. return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
  10023. (__v4sf) __B,
  10024. __R);
  10025. }
  10026. extern __inline __m128
  10027. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10028. _mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  10029. __m128 __B, const int __R)
  10030. {
  10031. return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
  10032. (__v4sf) __B,
  10033. (__v4sf) __W,
  10034. (__mmask8) __U, __R);
  10035. }
  10036. extern __inline __m128
  10037. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10038. _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  10039. const int __R)
  10040. {
  10041. return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
  10042. (__v4sf) __B,
  10043. (__v4sf)
  10044. _mm_setzero_ps (),
  10045. (__mmask8) __U, __R);
  10046. }
  10047. #else
  10048. #define _mm_max_round_sd(A, B, C) \
  10049. (__m128d)__builtin_ia32_maxsd_round(A, B, C)
  10050. #define _mm_mask_max_round_sd(W, U, A, B, C) \
  10051. (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
  10052. #define _mm_maskz_max_round_sd(U, A, B, C) \
  10053. (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  10054. #define _mm_max_round_ss(A, B, C) \
  10055. (__m128)__builtin_ia32_maxss_round(A, B, C)
  10056. #define _mm_mask_max_round_ss(W, U, A, B, C) \
  10057. (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
  10058. #define _mm_maskz_max_round_ss(U, A, B, C) \
  10059. (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  10060. #define _mm_min_round_sd(A, B, C) \
  10061. (__m128d)__builtin_ia32_minsd_round(A, B, C)
  10062. #define _mm_mask_min_round_sd(W, U, A, B, C) \
  10063. (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
  10064. #define _mm_maskz_min_round_sd(U, A, B, C) \
  10065. (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  10066. #define _mm_min_round_ss(A, B, C) \
  10067. (__m128)__builtin_ia32_minss_round(A, B, C)
  10068. #define _mm_mask_min_round_ss(W, U, A, B, C) \
  10069. (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
  10070. #define _mm_maskz_min_round_ss(U, A, B, C) \
  10071. (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  10072. #endif
  10073. extern __inline __m512d
  10074. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10075. _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
  10076. {
  10077. return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
  10078. (__v8df) __W,
  10079. (__mmask8) __U);
  10080. }
  10081. extern __inline __m512
  10082. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10083. _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
  10084. {
  10085. return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
  10086. (__v16sf) __W,
  10087. (__mmask16) __U);
  10088. }
  10089. extern __inline __m512i
  10090. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10091. _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
  10092. {
  10093. return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
  10094. (__v8di) __W,
  10095. (__mmask8) __U);
  10096. }
  10097. extern __inline __m512i
  10098. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10099. _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
  10100. {
  10101. return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
  10102. (__v16si) __W,
  10103. (__mmask16) __U);
  10104. }
  10105. #ifdef __OPTIMIZE__
  10106. extern __inline __m128d
  10107. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10108. _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
  10109. {
  10110. return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
  10111. (__v2df) __A,
  10112. (__v2df) __B,
  10113. __R);
  10114. }
  10115. extern __inline __m128
  10116. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10117. _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
  10118. {
  10119. return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
  10120. (__v4sf) __A,
  10121. (__v4sf) __B,
  10122. __R);
  10123. }
  10124. extern __inline __m128d
  10125. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10126. _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
  10127. {
  10128. return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
  10129. (__v2df) __A,
  10130. -(__v2df) __B,
  10131. __R);
  10132. }
  10133. extern __inline __m128
  10134. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10135. _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
  10136. {
  10137. return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
  10138. (__v4sf) __A,
  10139. -(__v4sf) __B,
  10140. __R);
  10141. }
  10142. extern __inline __m128d
  10143. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10144. _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
  10145. {
  10146. return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
  10147. -(__v2df) __A,
  10148. (__v2df) __B,
  10149. __R);
  10150. }
  10151. extern __inline __m128
  10152. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10153. _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
  10154. {
  10155. return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
  10156. -(__v4sf) __A,
  10157. (__v4sf) __B,
  10158. __R);
  10159. }
  10160. extern __inline __m128d
  10161. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10162. _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
  10163. {
  10164. return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
  10165. -(__v2df) __A,
  10166. -(__v2df) __B,
  10167. __R);
  10168. }
  10169. extern __inline __m128
  10170. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10171. _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
  10172. {
  10173. return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
  10174. -(__v4sf) __A,
  10175. -(__v4sf) __B,
  10176. __R);
  10177. }
  10178. #else
  10179. #define _mm_fmadd_round_sd(A, B, C, R) \
  10180. (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
  10181. #define _mm_fmadd_round_ss(A, B, C, R) \
  10182. (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
  10183. #define _mm_fmsub_round_sd(A, B, C, R) \
  10184. (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
  10185. #define _mm_fmsub_round_ss(A, B, C, R) \
  10186. (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
  10187. #define _mm_fnmadd_round_sd(A, B, C, R) \
  10188. (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
  10189. #define _mm_fnmadd_round_ss(A, B, C, R) \
  10190. (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
  10191. #define _mm_fnmsub_round_sd(A, B, C, R) \
  10192. (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
  10193. #define _mm_fnmsub_round_ss(A, B, C, R) \
  10194. (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
  10195. #endif
  10196. extern __inline __m128d
  10197. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10198. _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  10199. {
  10200. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10201. (__v2df) __A,
  10202. (__v2df) __B,
  10203. (__mmask8) __U,
  10204. _MM_FROUND_CUR_DIRECTION);
  10205. }
  10206. extern __inline __m128
  10207. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10208. _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  10209. {
  10210. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10211. (__v4sf) __A,
  10212. (__v4sf) __B,
  10213. (__mmask8) __U,
  10214. _MM_FROUND_CUR_DIRECTION);
  10215. }
  10216. extern __inline __m128d
  10217. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10218. _mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
  10219. {
  10220. return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
  10221. (__v2df) __A,
  10222. (__v2df) __B,
  10223. (__mmask8) __U,
  10224. _MM_FROUND_CUR_DIRECTION);
  10225. }
  10226. extern __inline __m128
  10227. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10228. _mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
  10229. {
  10230. return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
  10231. (__v4sf) __A,
  10232. (__v4sf) __B,
  10233. (__mmask8) __U,
  10234. _MM_FROUND_CUR_DIRECTION);
  10235. }
  10236. extern __inline __m128d
  10237. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10238. _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
  10239. {
  10240. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10241. (__v2df) __A,
  10242. (__v2df) __B,
  10243. (__mmask8) __U,
  10244. _MM_FROUND_CUR_DIRECTION);
  10245. }
  10246. extern __inline __m128
  10247. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10248. _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
  10249. {
  10250. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10251. (__v4sf) __A,
  10252. (__v4sf) __B,
  10253. (__mmask8) __U,
  10254. _MM_FROUND_CUR_DIRECTION);
  10255. }
  10256. extern __inline __m128d
  10257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10258. _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  10259. {
  10260. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10261. (__v2df) __A,
  10262. -(__v2df) __B,
  10263. (__mmask8) __U,
  10264. _MM_FROUND_CUR_DIRECTION);
  10265. }
  10266. extern __inline __m128
  10267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10268. _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  10269. {
  10270. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10271. (__v4sf) __A,
  10272. -(__v4sf) __B,
  10273. (__mmask8) __U,
  10274. _MM_FROUND_CUR_DIRECTION);
  10275. }
  10276. extern __inline __m128d
  10277. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10278. _mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
  10279. {
  10280. return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
  10281. (__v2df) __A,
  10282. (__v2df) __B,
  10283. (__mmask8) __U,
  10284. _MM_FROUND_CUR_DIRECTION);
  10285. }
  10286. extern __inline __m128
  10287. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10288. _mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
  10289. {
  10290. return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
  10291. (__v4sf) __A,
  10292. (__v4sf) __B,
  10293. (__mmask8) __U,
  10294. _MM_FROUND_CUR_DIRECTION);
  10295. }
  10296. extern __inline __m128d
  10297. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10298. _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
  10299. {
  10300. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10301. (__v2df) __A,
  10302. -(__v2df) __B,
  10303. (__mmask8) __U,
  10304. _MM_FROUND_CUR_DIRECTION);
  10305. }
  10306. extern __inline __m128
  10307. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10308. _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
  10309. {
  10310. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10311. (__v4sf) __A,
  10312. -(__v4sf) __B,
  10313. (__mmask8) __U,
  10314. _MM_FROUND_CUR_DIRECTION);
  10315. }
  10316. extern __inline __m128d
  10317. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10318. _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  10319. {
  10320. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10321. -(__v2df) __A,
  10322. (__v2df) __B,
  10323. (__mmask8) __U,
  10324. _MM_FROUND_CUR_DIRECTION);
  10325. }
  10326. extern __inline __m128
  10327. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10328. _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  10329. {
  10330. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10331. -(__v4sf) __A,
  10332. (__v4sf) __B,
  10333. (__mmask8) __U,
  10334. _MM_FROUND_CUR_DIRECTION);
  10335. }
  10336. extern __inline __m128d
  10337. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10338. _mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
  10339. {
  10340. return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
  10341. -(__v2df) __A,
  10342. (__v2df) __B,
  10343. (__mmask8) __U,
  10344. _MM_FROUND_CUR_DIRECTION);
  10345. }
  10346. extern __inline __m128
  10347. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10348. _mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
  10349. {
  10350. return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
  10351. -(__v4sf) __A,
  10352. (__v4sf) __B,
  10353. (__mmask8) __U,
  10354. _MM_FROUND_CUR_DIRECTION);
  10355. }
  10356. extern __inline __m128d
  10357. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10358. _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
  10359. {
  10360. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10361. -(__v2df) __A,
  10362. (__v2df) __B,
  10363. (__mmask8) __U,
  10364. _MM_FROUND_CUR_DIRECTION);
  10365. }
  10366. extern __inline __m128
  10367. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10368. _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
  10369. {
  10370. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10371. -(__v4sf) __A,
  10372. (__v4sf) __B,
  10373. (__mmask8) __U,
  10374. _MM_FROUND_CUR_DIRECTION);
  10375. }
  10376. extern __inline __m128d
  10377. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10378. _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  10379. {
  10380. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10381. -(__v2df) __A,
  10382. -(__v2df) __B,
  10383. (__mmask8) __U,
  10384. _MM_FROUND_CUR_DIRECTION);
  10385. }
  10386. extern __inline __m128
  10387. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10388. _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  10389. {
  10390. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10391. -(__v4sf) __A,
  10392. -(__v4sf) __B,
  10393. (__mmask8) __U,
  10394. _MM_FROUND_CUR_DIRECTION);
  10395. }
  10396. extern __inline __m128d
  10397. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10398. _mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
  10399. {
  10400. return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
  10401. -(__v2df) __A,
  10402. (__v2df) __B,
  10403. (__mmask8) __U,
  10404. _MM_FROUND_CUR_DIRECTION);
  10405. }
  10406. extern __inline __m128
  10407. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10408. _mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
  10409. {
  10410. return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
  10411. -(__v4sf) __A,
  10412. (__v4sf) __B,
  10413. (__mmask8) __U,
  10414. _MM_FROUND_CUR_DIRECTION);
  10415. }
  10416. extern __inline __m128d
  10417. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10418. _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
  10419. {
  10420. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10421. -(__v2df) __A,
  10422. -(__v2df) __B,
  10423. (__mmask8) __U,
  10424. _MM_FROUND_CUR_DIRECTION);
  10425. }
  10426. extern __inline __m128
  10427. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10428. _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
  10429. {
  10430. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10431. -(__v4sf) __A,
  10432. -(__v4sf) __B,
  10433. (__mmask8) __U,
  10434. _MM_FROUND_CUR_DIRECTION);
  10435. }
  10436. #ifdef __OPTIMIZE__
  10437. extern __inline __m128d
  10438. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10439. _mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  10440. const int __R)
  10441. {
  10442. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10443. (__v2df) __A,
  10444. (__v2df) __B,
  10445. (__mmask8) __U, __R);
  10446. }
  10447. extern __inline __m128
  10448. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10449. _mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  10450. const int __R)
  10451. {
  10452. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10453. (__v4sf) __A,
  10454. (__v4sf) __B,
  10455. (__mmask8) __U, __R);
  10456. }
  10457. extern __inline __m128d
  10458. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10459. _mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
  10460. const int __R)
  10461. {
  10462. return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
  10463. (__v2df) __A,
  10464. (__v2df) __B,
  10465. (__mmask8) __U, __R);
  10466. }
  10467. extern __inline __m128
  10468. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10469. _mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
  10470. const int __R)
  10471. {
  10472. return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
  10473. (__v4sf) __A,
  10474. (__v4sf) __B,
  10475. (__mmask8) __U, __R);
  10476. }
  10477. extern __inline __m128d
  10478. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10479. _mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
  10480. const int __R)
  10481. {
  10482. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10483. (__v2df) __A,
  10484. (__v2df) __B,
  10485. (__mmask8) __U, __R);
  10486. }
  10487. extern __inline __m128
  10488. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10489. _mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
  10490. const int __R)
  10491. {
  10492. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10493. (__v4sf) __A,
  10494. (__v4sf) __B,
  10495. (__mmask8) __U, __R);
  10496. }
  10497. extern __inline __m128d
  10498. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10499. _mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  10500. const int __R)
  10501. {
  10502. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10503. (__v2df) __A,
  10504. -(__v2df) __B,
  10505. (__mmask8) __U, __R);
  10506. }
  10507. extern __inline __m128
  10508. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10509. _mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  10510. const int __R)
  10511. {
  10512. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10513. (__v4sf) __A,
  10514. -(__v4sf) __B,
  10515. (__mmask8) __U, __R);
  10516. }
  10517. extern __inline __m128d
  10518. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10519. _mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
  10520. const int __R)
  10521. {
  10522. return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
  10523. (__v2df) __A,
  10524. (__v2df) __B,
  10525. (__mmask8) __U, __R);
  10526. }
  10527. extern __inline __m128
  10528. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10529. _mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
  10530. const int __R)
  10531. {
  10532. return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
  10533. (__v4sf) __A,
  10534. (__v4sf) __B,
  10535. (__mmask8) __U, __R);
  10536. }
  10537. extern __inline __m128d
  10538. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10539. _mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
  10540. const int __R)
  10541. {
  10542. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10543. (__v2df) __A,
  10544. -(__v2df) __B,
  10545. (__mmask8) __U, __R);
  10546. }
  10547. extern __inline __m128
  10548. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10549. _mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
  10550. const int __R)
  10551. {
  10552. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10553. (__v4sf) __A,
  10554. -(__v4sf) __B,
  10555. (__mmask8) __U, __R);
  10556. }
  10557. extern __inline __m128d
  10558. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10559. _mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  10560. const int __R)
  10561. {
  10562. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10563. -(__v2df) __A,
  10564. (__v2df) __B,
  10565. (__mmask8) __U, __R);
  10566. }
  10567. extern __inline __m128
  10568. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10569. _mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  10570. const int __R)
  10571. {
  10572. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10573. -(__v4sf) __A,
  10574. (__v4sf) __B,
  10575. (__mmask8) __U, __R);
  10576. }
  10577. extern __inline __m128d
  10578. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10579. _mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
  10580. const int __R)
  10581. {
  10582. return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
  10583. -(__v2df) __A,
  10584. (__v2df) __B,
  10585. (__mmask8) __U, __R);
  10586. }
  10587. extern __inline __m128
  10588. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10589. _mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
  10590. const int __R)
  10591. {
  10592. return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
  10593. -(__v4sf) __A,
  10594. (__v4sf) __B,
  10595. (__mmask8) __U, __R);
  10596. }
  10597. extern __inline __m128d
  10598. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10599. _mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
  10600. const int __R)
  10601. {
  10602. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10603. -(__v2df) __A,
  10604. (__v2df) __B,
  10605. (__mmask8) __U, __R);
  10606. }
  10607. extern __inline __m128
  10608. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10609. _mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
  10610. const int __R)
  10611. {
  10612. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10613. -(__v4sf) __A,
  10614. (__v4sf) __B,
  10615. (__mmask8) __U, __R);
  10616. }
  10617. extern __inline __m128d
  10618. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10619. _mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  10620. const int __R)
  10621. {
  10622. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10623. -(__v2df) __A,
  10624. -(__v2df) __B,
  10625. (__mmask8) __U, __R);
  10626. }
  10627. extern __inline __m128
  10628. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10629. _mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  10630. const int __R)
  10631. {
  10632. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10633. -(__v4sf) __A,
  10634. -(__v4sf) __B,
  10635. (__mmask8) __U, __R);
  10636. }
  10637. extern __inline __m128d
  10638. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10639. _mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
  10640. const int __R)
  10641. {
  10642. return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
  10643. -(__v2df) __A,
  10644. (__v2df) __B,
  10645. (__mmask8) __U, __R);
  10646. }
  10647. extern __inline __m128
  10648. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10649. _mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
  10650. const int __R)
  10651. {
  10652. return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
  10653. -(__v4sf) __A,
  10654. (__v4sf) __B,
  10655. (__mmask8) __U, __R);
  10656. }
  10657. extern __inline __m128d
  10658. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10659. _mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
  10660. const int __R)
  10661. {
  10662. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10663. -(__v2df) __A,
  10664. -(__v2df) __B,
  10665. (__mmask8) __U, __R);
  10666. }
  10667. extern __inline __m128
  10668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10669. _mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
  10670. const int __R)
  10671. {
  10672. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10673. -(__v4sf) __A,
  10674. -(__v4sf) __B,
  10675. (__mmask8) __U, __R);
  10676. }
  10677. #else
  10678. #define _mm_mask_fmadd_round_sd(A, U, B, C, R) \
  10679. (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R)
  10680. #define _mm_mask_fmadd_round_ss(A, U, B, C, R) \
  10681. (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R)
  10682. #define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \
  10683. (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
  10684. #define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \
  10685. (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
  10686. #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
  10687. (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
  10688. #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
  10689. (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
  10690. #define _mm_mask_fmsub_round_sd(A, U, B, C, R) \
  10691. (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
  10692. #define _mm_mask_fmsub_round_ss(A, U, B, C, R) \
  10693. (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R)
  10694. #define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \
  10695. (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R)
  10696. #define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \
  10697. (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
  10698. #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
  10699. (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
  10700. #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
  10701. (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
  10702. #define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \
  10703. (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
  10704. #define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \
  10705. (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R)
  10706. #define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \
  10707. (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
  10708. #define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \
  10709. (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
  10710. #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
  10711. (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
  10712. #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
  10713. (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
  10714. #define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \
  10715. (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
  10716. #define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \
  10717. (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R)
  10718. #define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \
  10719. (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R)
  10720. #define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \
  10721. (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
  10722. #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
  10723. (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
  10724. #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
  10725. (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
  10726. #endif
  10727. #ifdef __OPTIMIZE__
  10728. extern __inline int
  10729. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10730. _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
  10731. {
  10732. return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
  10733. }
  10734. extern __inline int
  10735. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10736. _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
  10737. {
  10738. return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
  10739. }
  10740. #else
  10741. #define _mm_comi_round_ss(A, B, C, D)\
  10742. __builtin_ia32_vcomiss(A, B, C, D)
  10743. #define _mm_comi_round_sd(A, B, C, D)\
  10744. __builtin_ia32_vcomisd(A, B, C, D)
  10745. #endif
  10746. extern __inline __m512d
  10747. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10748. _mm512_sqrt_pd (__m512d __A)
  10749. {
  10750. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  10751. (__v8df)
  10752. _mm512_undefined_pd (),
  10753. (__mmask8) -1,
  10754. _MM_FROUND_CUR_DIRECTION);
  10755. }
  10756. extern __inline __m512d
  10757. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10758. _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
  10759. {
  10760. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  10761. (__v8df) __W,
  10762. (__mmask8) __U,
  10763. _MM_FROUND_CUR_DIRECTION);
  10764. }
  10765. extern __inline __m512d
  10766. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10767. _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
  10768. {
  10769. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  10770. (__v8df)
  10771. _mm512_setzero_pd (),
  10772. (__mmask8) __U,
  10773. _MM_FROUND_CUR_DIRECTION);
  10774. }
  10775. extern __inline __m512
  10776. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10777. _mm512_sqrt_ps (__m512 __A)
  10778. {
  10779. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  10780. (__v16sf)
  10781. _mm512_undefined_ps (),
  10782. (__mmask16) -1,
  10783. _MM_FROUND_CUR_DIRECTION);
  10784. }
  10785. extern __inline __m512
  10786. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10787. _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
  10788. {
  10789. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  10790. (__v16sf) __W,
  10791. (__mmask16) __U,
  10792. _MM_FROUND_CUR_DIRECTION);
  10793. }
  10794. extern __inline __m512
  10795. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10796. _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
  10797. {
  10798. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  10799. (__v16sf)
  10800. _mm512_setzero_ps (),
  10801. (__mmask16) __U,
  10802. _MM_FROUND_CUR_DIRECTION);
  10803. }
  10804. extern __inline __m512d
  10805. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10806. _mm512_add_pd (__m512d __A, __m512d __B)
  10807. {
  10808. return (__m512d) ((__v8df)__A + (__v8df)__B);
  10809. }
  10810. extern __inline __m512d
  10811. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10812. _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  10813. {
  10814. return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
  10815. (__v8df) __B,
  10816. (__v8df) __W,
  10817. (__mmask8) __U,
  10818. _MM_FROUND_CUR_DIRECTION);
  10819. }
  10820. extern __inline __m512d
  10821. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10822. _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
  10823. {
  10824. return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
  10825. (__v8df) __B,
  10826. (__v8df)
  10827. _mm512_setzero_pd (),
  10828. (__mmask8) __U,
  10829. _MM_FROUND_CUR_DIRECTION);
  10830. }
  10831. extern __inline __m512
  10832. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10833. _mm512_add_ps (__m512 __A, __m512 __B)
  10834. {
  10835. return (__m512) ((__v16sf)__A + (__v16sf)__B);
  10836. }
  10837. extern __inline __m512
  10838. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10839. _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  10840. {
  10841. return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
  10842. (__v16sf) __B,
  10843. (__v16sf) __W,
  10844. (__mmask16) __U,
  10845. _MM_FROUND_CUR_DIRECTION);
  10846. }
  10847. extern __inline __m512
  10848. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10849. _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
  10850. {
  10851. return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
  10852. (__v16sf) __B,
  10853. (__v16sf)
  10854. _mm512_setzero_ps (),
  10855. (__mmask16) __U,
  10856. _MM_FROUND_CUR_DIRECTION);
  10857. }
  10858. extern __inline __m128d
  10859. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10860. _mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  10861. {
  10862. return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
  10863. (__v2df) __B,
  10864. (__v2df) __W,
  10865. (__mmask8) __U,
  10866. _MM_FROUND_CUR_DIRECTION);
  10867. }
  10868. extern __inline __m128d
  10869. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10870. _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
  10871. {
  10872. return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
  10873. (__v2df) __B,
  10874. (__v2df)
  10875. _mm_setzero_pd (),
  10876. (__mmask8) __U,
  10877. _MM_FROUND_CUR_DIRECTION);
  10878. }
  10879. extern __inline __m128
  10880. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10881. _mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  10882. {
  10883. return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
  10884. (__v4sf) __B,
  10885. (__v4sf) __W,
  10886. (__mmask8) __U,
  10887. _MM_FROUND_CUR_DIRECTION);
  10888. }
  10889. extern __inline __m128
  10890. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10891. _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
  10892. {
  10893. return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
  10894. (__v4sf) __B,
  10895. (__v4sf)
  10896. _mm_setzero_ps (),
  10897. (__mmask8) __U,
  10898. _MM_FROUND_CUR_DIRECTION);
  10899. }
  10900. extern __inline __m512d
  10901. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10902. _mm512_sub_pd (__m512d __A, __m512d __B)
  10903. {
  10904. return (__m512d) ((__v8df)__A - (__v8df)__B);
  10905. }
  10906. extern __inline __m512d
  10907. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10908. _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  10909. {
  10910. return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
  10911. (__v8df) __B,
  10912. (__v8df) __W,
  10913. (__mmask8) __U,
  10914. _MM_FROUND_CUR_DIRECTION);
  10915. }
  10916. extern __inline __m512d
  10917. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10918. _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
  10919. {
  10920. return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
  10921. (__v8df) __B,
  10922. (__v8df)
  10923. _mm512_setzero_pd (),
  10924. (__mmask8) __U,
  10925. _MM_FROUND_CUR_DIRECTION);
  10926. }
  10927. extern __inline __m512
  10928. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10929. _mm512_sub_ps (__m512 __A, __m512 __B)
  10930. {
  10931. return (__m512) ((__v16sf)__A - (__v16sf)__B);
  10932. }
  10933. extern __inline __m512
  10934. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10935. _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  10936. {
  10937. return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
  10938. (__v16sf) __B,
  10939. (__v16sf) __W,
  10940. (__mmask16) __U,
  10941. _MM_FROUND_CUR_DIRECTION);
  10942. }
  10943. extern __inline __m512
  10944. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10945. _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
  10946. {
  10947. return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
  10948. (__v16sf) __B,
  10949. (__v16sf)
  10950. _mm512_setzero_ps (),
  10951. (__mmask16) __U,
  10952. _MM_FROUND_CUR_DIRECTION);
  10953. }
  10954. extern __inline __m128d
  10955. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10956. _mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  10957. {
  10958. return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
  10959. (__v2df) __B,
  10960. (__v2df) __W,
  10961. (__mmask8) __U,
  10962. _MM_FROUND_CUR_DIRECTION);
  10963. }
  10964. extern __inline __m128d
  10965. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10966. _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
  10967. {
  10968. return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
  10969. (__v2df) __B,
  10970. (__v2df)
  10971. _mm_setzero_pd (),
  10972. (__mmask8) __U,
  10973. _MM_FROUND_CUR_DIRECTION);
  10974. }
  10975. extern __inline __m128
  10976. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10977. _mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  10978. {
  10979. return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
  10980. (__v4sf) __B,
  10981. (__v4sf) __W,
  10982. (__mmask8) __U,
  10983. _MM_FROUND_CUR_DIRECTION);
  10984. }
  10985. extern __inline __m128
  10986. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10987. _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
  10988. {
  10989. return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
  10990. (__v4sf) __B,
  10991. (__v4sf)
  10992. _mm_setzero_ps (),
  10993. (__mmask8) __U,
  10994. _MM_FROUND_CUR_DIRECTION);
  10995. }
  10996. extern __inline __m512d
  10997. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10998. _mm512_mul_pd (__m512d __A, __m512d __B)
  10999. {
  11000. return (__m512d) ((__v8df)__A * (__v8df)__B);
  11001. }
  11002. extern __inline __m512d
  11003. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11004. _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  11005. {
  11006. return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
  11007. (__v8df) __B,
  11008. (__v8df) __W,
  11009. (__mmask8) __U,
  11010. _MM_FROUND_CUR_DIRECTION);
  11011. }
  11012. extern __inline __m512d
  11013. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11014. _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
  11015. {
  11016. return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
  11017. (__v8df) __B,
  11018. (__v8df)
  11019. _mm512_setzero_pd (),
  11020. (__mmask8) __U,
  11021. _MM_FROUND_CUR_DIRECTION);
  11022. }
  11023. extern __inline __m512
  11024. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11025. _mm512_mul_ps (__m512 __A, __m512 __B)
  11026. {
  11027. return (__m512) ((__v16sf)__A * (__v16sf)__B);
  11028. }
  11029. extern __inline __m512
  11030. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11031. _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11032. {
  11033. return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
  11034. (__v16sf) __B,
  11035. (__v16sf) __W,
  11036. (__mmask16) __U,
  11037. _MM_FROUND_CUR_DIRECTION);
  11038. }
  11039. extern __inline __m512
  11040. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11041. _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11042. {
  11043. return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
  11044. (__v16sf) __B,
  11045. (__v16sf)
  11046. _mm512_setzero_ps (),
  11047. (__mmask16) __U,
  11048. _MM_FROUND_CUR_DIRECTION);
  11049. }
  11050. extern __inline __m128d
  11051. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11052. _mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
  11053. __m128d __B)
  11054. {
  11055. return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
  11056. (__v2df) __B,
  11057. (__v2df) __W,
  11058. (__mmask8) __U,
  11059. _MM_FROUND_CUR_DIRECTION);
  11060. }
  11061. extern __inline __m128d
  11062. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11063. _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
  11064. {
  11065. return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
  11066. (__v2df) __B,
  11067. (__v2df)
  11068. _mm_setzero_pd (),
  11069. (__mmask8) __U,
  11070. _MM_FROUND_CUR_DIRECTION);
  11071. }
  11072. extern __inline __m128
  11073. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11074. _mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
  11075. __m128 __B)
  11076. {
  11077. return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
  11078. (__v4sf) __B,
  11079. (__v4sf) __W,
  11080. (__mmask8) __U,
  11081. _MM_FROUND_CUR_DIRECTION);
  11082. }
  11083. extern __inline __m128
  11084. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11085. _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
  11086. {
  11087. return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
  11088. (__v4sf) __B,
  11089. (__v4sf)
  11090. _mm_setzero_ps (),
  11091. (__mmask8) __U,
  11092. _MM_FROUND_CUR_DIRECTION);
  11093. }
  11094. extern __inline __m512d
  11095. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11096. _mm512_div_pd (__m512d __M, __m512d __V)
  11097. {
  11098. return (__m512d) ((__v8df)__M / (__v8df)__V);
  11099. }
  11100. extern __inline __m512d
  11101. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11102. _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
  11103. {
  11104. return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
  11105. (__v8df) __V,
  11106. (__v8df) __W,
  11107. (__mmask8) __U,
  11108. _MM_FROUND_CUR_DIRECTION);
  11109. }
  11110. extern __inline __m512d
  11111. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11112. _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
  11113. {
  11114. return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
  11115. (__v8df) __V,
  11116. (__v8df)
  11117. _mm512_setzero_pd (),
  11118. (__mmask8) __U,
  11119. _MM_FROUND_CUR_DIRECTION);
  11120. }
  11121. extern __inline __m512
  11122. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11123. _mm512_div_ps (__m512 __A, __m512 __B)
  11124. {
  11125. return (__m512) ((__v16sf)__A / (__v16sf)__B);
  11126. }
  11127. extern __inline __m512
  11128. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11129. _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11130. {
  11131. return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
  11132. (__v16sf) __B,
  11133. (__v16sf) __W,
  11134. (__mmask16) __U,
  11135. _MM_FROUND_CUR_DIRECTION);
  11136. }
  11137. extern __inline __m512
  11138. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11139. _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11140. {
  11141. return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
  11142. (__v16sf) __B,
  11143. (__v16sf)
  11144. _mm512_setzero_ps (),
  11145. (__mmask16) __U,
  11146. _MM_FROUND_CUR_DIRECTION);
  11147. }
  11148. extern __inline __m128d
  11149. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11150. _mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
  11151. __m128d __B)
  11152. {
  11153. return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
  11154. (__v2df) __B,
  11155. (__v2df) __W,
  11156. (__mmask8) __U,
  11157. _MM_FROUND_CUR_DIRECTION);
  11158. }
  11159. extern __inline __m128d
  11160. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11161. _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
  11162. {
  11163. return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
  11164. (__v2df) __B,
  11165. (__v2df)
  11166. _mm_setzero_pd (),
  11167. (__mmask8) __U,
  11168. _MM_FROUND_CUR_DIRECTION);
  11169. }
  11170. extern __inline __m128
  11171. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11172. _mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
  11173. __m128 __B)
  11174. {
  11175. return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
  11176. (__v4sf) __B,
  11177. (__v4sf) __W,
  11178. (__mmask8) __U,
  11179. _MM_FROUND_CUR_DIRECTION);
  11180. }
  11181. extern __inline __m128
  11182. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11183. _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
  11184. {
  11185. return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
  11186. (__v4sf) __B,
  11187. (__v4sf)
  11188. _mm_setzero_ps (),
  11189. (__mmask8) __U,
  11190. _MM_FROUND_CUR_DIRECTION);
  11191. }
  11192. extern __inline __m512d
  11193. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11194. _mm512_max_pd (__m512d __A, __m512d __B)
  11195. {
  11196. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  11197. (__v8df) __B,
  11198. (__v8df)
  11199. _mm512_undefined_pd (),
  11200. (__mmask8) -1,
  11201. _MM_FROUND_CUR_DIRECTION);
  11202. }
  11203. extern __inline __m512d
  11204. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11205. _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  11206. {
  11207. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  11208. (__v8df) __B,
  11209. (__v8df) __W,
  11210. (__mmask8) __U,
  11211. _MM_FROUND_CUR_DIRECTION);
  11212. }
  11213. extern __inline __m512d
  11214. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11215. _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
  11216. {
  11217. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  11218. (__v8df) __B,
  11219. (__v8df)
  11220. _mm512_setzero_pd (),
  11221. (__mmask8) __U,
  11222. _MM_FROUND_CUR_DIRECTION);
  11223. }
  11224. extern __inline __m512
  11225. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11226. _mm512_max_ps (__m512 __A, __m512 __B)
  11227. {
  11228. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  11229. (__v16sf) __B,
  11230. (__v16sf)
  11231. _mm512_undefined_ps (),
  11232. (__mmask16) -1,
  11233. _MM_FROUND_CUR_DIRECTION);
  11234. }
  11235. extern __inline __m512
  11236. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11237. _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11238. {
  11239. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  11240. (__v16sf) __B,
  11241. (__v16sf) __W,
  11242. (__mmask16) __U,
  11243. _MM_FROUND_CUR_DIRECTION);
  11244. }
  11245. extern __inline __m512
  11246. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11247. _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11248. {
  11249. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  11250. (__v16sf) __B,
  11251. (__v16sf)
  11252. _mm512_setzero_ps (),
  11253. (__mmask16) __U,
  11254. _MM_FROUND_CUR_DIRECTION);
  11255. }
  11256. extern __inline __m128d
  11257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11258. _mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  11259. {
  11260. return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
  11261. (__v2df) __B,
  11262. (__v2df) __W,
  11263. (__mmask8) __U,
  11264. _MM_FROUND_CUR_DIRECTION);
  11265. }
  11266. extern __inline __m128d
  11267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11268. _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
  11269. {
  11270. return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
  11271. (__v2df) __B,
  11272. (__v2df)
  11273. _mm_setzero_pd (),
  11274. (__mmask8) __U,
  11275. _MM_FROUND_CUR_DIRECTION);
  11276. }
  11277. extern __inline __m128
  11278. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11279. _mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  11280. {
  11281. return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
  11282. (__v4sf) __B,
  11283. (__v4sf) __W,
  11284. (__mmask8) __U,
  11285. _MM_FROUND_CUR_DIRECTION);
  11286. }
  11287. extern __inline __m128
  11288. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11289. _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
  11290. {
  11291. return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
  11292. (__v4sf) __B,
  11293. (__v4sf)
  11294. _mm_setzero_ps (),
  11295. (__mmask8) __U,
  11296. _MM_FROUND_CUR_DIRECTION);
  11297. }
  11298. extern __inline __m512d
  11299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11300. _mm512_min_pd (__m512d __A, __m512d __B)
  11301. {
  11302. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  11303. (__v8df) __B,
  11304. (__v8df)
  11305. _mm512_undefined_pd (),
  11306. (__mmask8) -1,
  11307. _MM_FROUND_CUR_DIRECTION);
  11308. }
  11309. extern __inline __m512d
  11310. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11311. _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  11312. {
  11313. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  11314. (__v8df) __B,
  11315. (__v8df) __W,
  11316. (__mmask8) __U,
  11317. _MM_FROUND_CUR_DIRECTION);
  11318. }
  11319. extern __inline __m512d
  11320. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11321. _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
  11322. {
  11323. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  11324. (__v8df) __B,
  11325. (__v8df)
  11326. _mm512_setzero_pd (),
  11327. (__mmask8) __U,
  11328. _MM_FROUND_CUR_DIRECTION);
  11329. }
  11330. extern __inline __m512
  11331. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11332. _mm512_min_ps (__m512 __A, __m512 __B)
  11333. {
  11334. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  11335. (__v16sf) __B,
  11336. (__v16sf)
  11337. _mm512_undefined_ps (),
  11338. (__mmask16) -1,
  11339. _MM_FROUND_CUR_DIRECTION);
  11340. }
  11341. extern __inline __m512
  11342. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11343. _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11344. {
  11345. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  11346. (__v16sf) __B,
  11347. (__v16sf) __W,
  11348. (__mmask16) __U,
  11349. _MM_FROUND_CUR_DIRECTION);
  11350. }
  11351. extern __inline __m512
  11352. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11353. _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11354. {
  11355. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  11356. (__v16sf) __B,
  11357. (__v16sf)
  11358. _mm512_setzero_ps (),
  11359. (__mmask16) __U,
  11360. _MM_FROUND_CUR_DIRECTION);
  11361. }
  11362. extern __inline __m128d
  11363. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11364. _mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  11365. {
  11366. return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
  11367. (__v2df) __B,
  11368. (__v2df) __W,
  11369. (__mmask8) __U,
  11370. _MM_FROUND_CUR_DIRECTION);
  11371. }
  11372. extern __inline __m128d
  11373. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11374. _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
  11375. {
  11376. return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
  11377. (__v2df) __B,
  11378. (__v2df)
  11379. _mm_setzero_pd (),
  11380. (__mmask8) __U,
  11381. _MM_FROUND_CUR_DIRECTION);
  11382. }
  11383. extern __inline __m128
  11384. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11385. _mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  11386. {
  11387. return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
  11388. (__v4sf) __B,
  11389. (__v4sf) __W,
  11390. (__mmask8) __U,
  11391. _MM_FROUND_CUR_DIRECTION);
  11392. }
  11393. extern __inline __m128
  11394. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11395. _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
  11396. {
  11397. return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
  11398. (__v4sf) __B,
  11399. (__v4sf)
  11400. _mm_setzero_ps (),
  11401. (__mmask8) __U,
  11402. _MM_FROUND_CUR_DIRECTION);
  11403. }
  11404. extern __inline __m512d
  11405. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11406. _mm512_scalef_pd (__m512d __A, __m512d __B)
  11407. {
  11408. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  11409. (__v8df) __B,
  11410. (__v8df)
  11411. _mm512_undefined_pd (),
  11412. (__mmask8) -1,
  11413. _MM_FROUND_CUR_DIRECTION);
  11414. }
  11415. extern __inline __m512d
  11416. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11417. _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  11418. {
  11419. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  11420. (__v8df) __B,
  11421. (__v8df) __W,
  11422. (__mmask8) __U,
  11423. _MM_FROUND_CUR_DIRECTION);
  11424. }
  11425. extern __inline __m512d
  11426. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11427. _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
  11428. {
  11429. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  11430. (__v8df) __B,
  11431. (__v8df)
  11432. _mm512_setzero_pd (),
  11433. (__mmask8) __U,
  11434. _MM_FROUND_CUR_DIRECTION);
  11435. }
  11436. extern __inline __m512
  11437. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11438. _mm512_scalef_ps (__m512 __A, __m512 __B)
  11439. {
  11440. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  11441. (__v16sf) __B,
  11442. (__v16sf)
  11443. _mm512_undefined_ps (),
  11444. (__mmask16) -1,
  11445. _MM_FROUND_CUR_DIRECTION);
  11446. }
  11447. extern __inline __m512
  11448. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11449. _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11450. {
  11451. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  11452. (__v16sf) __B,
  11453. (__v16sf) __W,
  11454. (__mmask16) __U,
  11455. _MM_FROUND_CUR_DIRECTION);
  11456. }
  11457. extern __inline __m512
  11458. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11459. _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11460. {
  11461. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  11462. (__v16sf) __B,
  11463. (__v16sf)
  11464. _mm512_setzero_ps (),
  11465. (__mmask16) __U,
  11466. _MM_FROUND_CUR_DIRECTION);
  11467. }
  11468. extern __inline __m128d
  11469. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11470. _mm_scalef_sd (__m128d __A, __m128d __B)
  11471. {
  11472. return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
  11473. (__v2df) __B,
  11474. (__v2df)
  11475. _mm_setzero_pd (),
  11476. (__mmask8) -1,
  11477. _MM_FROUND_CUR_DIRECTION);
  11478. }
  11479. extern __inline __m128
  11480. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11481. _mm_scalef_ss (__m128 __A, __m128 __B)
  11482. {
  11483. return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
  11484. (__v4sf) __B,
  11485. (__v4sf)
  11486. _mm_setzero_ps (),
  11487. (__mmask8) -1,
  11488. _MM_FROUND_CUR_DIRECTION);
  11489. }
  11490. extern __inline __m512d
  11491. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11492. _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
  11493. {
  11494. return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
  11495. (__v8df) __B,
  11496. (__v8df) __C,
  11497. (__mmask8) -1,
  11498. _MM_FROUND_CUR_DIRECTION);
  11499. }
  11500. extern __inline __m512d
  11501. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11502. _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  11503. {
  11504. return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
  11505. (__v8df) __B,
  11506. (__v8df) __C,
  11507. (__mmask8) __U,
  11508. _MM_FROUND_CUR_DIRECTION);
  11509. }
  11510. extern __inline __m512d
  11511. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11512. _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  11513. {
  11514. return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
  11515. (__v8df) __B,
  11516. (__v8df) __C,
  11517. (__mmask8) __U,
  11518. _MM_FROUND_CUR_DIRECTION);
  11519. }
  11520. extern __inline __m512d
  11521. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11522. _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  11523. {
  11524. return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
  11525. (__v8df) __B,
  11526. (__v8df) __C,
  11527. (__mmask8) __U,
  11528. _MM_FROUND_CUR_DIRECTION);
  11529. }
  11530. extern __inline __m512
  11531. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11532. _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
  11533. {
  11534. return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
  11535. (__v16sf) __B,
  11536. (__v16sf) __C,
  11537. (__mmask16) -1,
  11538. _MM_FROUND_CUR_DIRECTION);
  11539. }
  11540. extern __inline __m512
  11541. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11542. _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  11543. {
  11544. return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
  11545. (__v16sf) __B,
  11546. (__v16sf) __C,
  11547. (__mmask16) __U,
  11548. _MM_FROUND_CUR_DIRECTION);
  11549. }
  11550. extern __inline __m512
  11551. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11552. _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  11553. {
  11554. return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
  11555. (__v16sf) __B,
  11556. (__v16sf) __C,
  11557. (__mmask16) __U,
  11558. _MM_FROUND_CUR_DIRECTION);
  11559. }
  11560. extern __inline __m512
  11561. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11562. _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  11563. {
  11564. return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
  11565. (__v16sf) __B,
  11566. (__v16sf) __C,
  11567. (__mmask16) __U,
  11568. _MM_FROUND_CUR_DIRECTION);
  11569. }
  11570. extern __inline __m512d
  11571. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11572. _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
  11573. {
  11574. return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
  11575. (__v8df) __B,
  11576. (__v8df) __C,
  11577. (__mmask8) -1,
  11578. _MM_FROUND_CUR_DIRECTION);
  11579. }
  11580. extern __inline __m512d
  11581. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11582. _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  11583. {
  11584. return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
  11585. (__v8df) __B,
  11586. (__v8df) __C,
  11587. (__mmask8) __U,
  11588. _MM_FROUND_CUR_DIRECTION);
  11589. }
  11590. extern __inline __m512d
  11591. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11592. _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  11593. {
  11594. return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
  11595. (__v8df) __B,
  11596. (__v8df) __C,
  11597. (__mmask8) __U,
  11598. _MM_FROUND_CUR_DIRECTION);
  11599. }
  11600. extern __inline __m512d
  11601. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11602. _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  11603. {
  11604. return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
  11605. (__v8df) __B,
  11606. (__v8df) __C,
  11607. (__mmask8) __U,
  11608. _MM_FROUND_CUR_DIRECTION);
  11609. }
  11610. extern __inline __m512
  11611. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11612. _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
  11613. {
  11614. return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
  11615. (__v16sf) __B,
  11616. (__v16sf) __C,
  11617. (__mmask16) -1,
  11618. _MM_FROUND_CUR_DIRECTION);
  11619. }
  11620. extern __inline __m512
  11621. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11622. _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  11623. {
  11624. return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
  11625. (__v16sf) __B,
  11626. (__v16sf) __C,
  11627. (__mmask16) __U,
  11628. _MM_FROUND_CUR_DIRECTION);
  11629. }
  11630. extern __inline __m512
  11631. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11632. _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  11633. {
  11634. return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
  11635. (__v16sf) __B,
  11636. (__v16sf) __C,
  11637. (__mmask16) __U,
  11638. _MM_FROUND_CUR_DIRECTION);
  11639. }
  11640. extern __inline __m512
  11641. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11642. _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  11643. {
  11644. return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
  11645. (__v16sf) __B,
  11646. (__v16sf) __C,
  11647. (__mmask16) __U,
  11648. _MM_FROUND_CUR_DIRECTION);
  11649. }
  11650. extern __inline __m512d
  11651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11652. _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
  11653. {
  11654. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  11655. (__v8df) __B,
  11656. (__v8df) __C,
  11657. (__mmask8) -1,
  11658. _MM_FROUND_CUR_DIRECTION);
  11659. }
  11660. extern __inline __m512d
  11661. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11662. _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  11663. {
  11664. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  11665. (__v8df) __B,
  11666. (__v8df) __C,
  11667. (__mmask8) __U,
  11668. _MM_FROUND_CUR_DIRECTION);
  11669. }
  11670. extern __inline __m512d
  11671. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11672. _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  11673. {
  11674. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
  11675. (__v8df) __B,
  11676. (__v8df) __C,
  11677. (__mmask8) __U,
  11678. _MM_FROUND_CUR_DIRECTION);
  11679. }
  11680. extern __inline __m512d
  11681. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11682. _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  11683. {
  11684. return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
  11685. (__v8df) __B,
  11686. (__v8df) __C,
  11687. (__mmask8) __U,
  11688. _MM_FROUND_CUR_DIRECTION);
  11689. }
  11690. extern __inline __m512
  11691. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11692. _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
  11693. {
  11694. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  11695. (__v16sf) __B,
  11696. (__v16sf) __C,
  11697. (__mmask16) -1,
  11698. _MM_FROUND_CUR_DIRECTION);
  11699. }
  11700. extern __inline __m512
  11701. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11702. _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  11703. {
  11704. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  11705. (__v16sf) __B,
  11706. (__v16sf) __C,
  11707. (__mmask16) __U,
  11708. _MM_FROUND_CUR_DIRECTION);
  11709. }
  11710. extern __inline __m512
  11711. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11712. _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  11713. {
  11714. return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
  11715. (__v16sf) __B,
  11716. (__v16sf) __C,
  11717. (__mmask16) __U,
  11718. _MM_FROUND_CUR_DIRECTION);
  11719. }
  11720. extern __inline __m512
  11721. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11722. _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  11723. {
  11724. return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
  11725. (__v16sf) __B,
  11726. (__v16sf) __C,
  11727. (__mmask16) __U,
  11728. _MM_FROUND_CUR_DIRECTION);
  11729. }
  11730. extern __inline __m512d
  11731. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11732. _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
  11733. {
  11734. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  11735. (__v8df) __B,
  11736. -(__v8df) __C,
  11737. (__mmask8) -1,
  11738. _MM_FROUND_CUR_DIRECTION);
  11739. }
  11740. extern __inline __m512d
  11741. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11742. _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  11743. {
  11744. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  11745. (__v8df) __B,
  11746. -(__v8df) __C,
  11747. (__mmask8) __U,
  11748. _MM_FROUND_CUR_DIRECTION);
  11749. }
  11750. extern __inline __m512d
  11751. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11752. _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  11753. {
  11754. return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
  11755. (__v8df) __B,
  11756. (__v8df) __C,
  11757. (__mmask8) __U,
  11758. _MM_FROUND_CUR_DIRECTION);
  11759. }
  11760. extern __inline __m512d
  11761. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11762. _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  11763. {
  11764. return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
  11765. (__v8df) __B,
  11766. -(__v8df) __C,
  11767. (__mmask8) __U,
  11768. _MM_FROUND_CUR_DIRECTION);
  11769. }
  11770. extern __inline __m512
  11771. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11772. _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
  11773. {
  11774. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  11775. (__v16sf) __B,
  11776. -(__v16sf) __C,
  11777. (__mmask16) -1,
  11778. _MM_FROUND_CUR_DIRECTION);
  11779. }
  11780. extern __inline __m512
  11781. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11782. _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  11783. {
  11784. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  11785. (__v16sf) __B,
  11786. -(__v16sf) __C,
  11787. (__mmask16) __U,
  11788. _MM_FROUND_CUR_DIRECTION);
  11789. }
  11790. extern __inline __m512
  11791. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11792. _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  11793. {
  11794. return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
  11795. (__v16sf) __B,
  11796. (__v16sf) __C,
  11797. (__mmask16) __U,
  11798. _MM_FROUND_CUR_DIRECTION);
  11799. }
  11800. extern __inline __m512
  11801. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11802. _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  11803. {
  11804. return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
  11805. (__v16sf) __B,
  11806. -(__v16sf) __C,
  11807. (__mmask16) __U,
  11808. _MM_FROUND_CUR_DIRECTION);
  11809. }
  11810. extern __inline __m512d
  11811. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11812. _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
  11813. {
  11814. return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
  11815. (__v8df) __B,
  11816. (__v8df) __C,
  11817. (__mmask8) -1,
  11818. _MM_FROUND_CUR_DIRECTION);
  11819. }
  11820. extern __inline __m512d
  11821. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11822. _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  11823. {
  11824. return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
  11825. (__v8df) __B,
  11826. (__v8df) __C,
  11827. (__mmask8) __U,
  11828. _MM_FROUND_CUR_DIRECTION);
  11829. }
  11830. extern __inline __m512d
  11831. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11832. _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  11833. {
  11834. return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
  11835. (__v8df) __B,
  11836. (__v8df) __C,
  11837. (__mmask8) __U,
  11838. _MM_FROUND_CUR_DIRECTION);
  11839. }
  11840. extern __inline __m512d
  11841. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11842. _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  11843. {
  11844. return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
  11845. (__v8df) __B,
  11846. (__v8df) __C,
  11847. (__mmask8) __U,
  11848. _MM_FROUND_CUR_DIRECTION);
  11849. }
  11850. extern __inline __m512
  11851. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11852. _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
  11853. {
  11854. return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
  11855. (__v16sf) __B,
  11856. (__v16sf) __C,
  11857. (__mmask16) -1,
  11858. _MM_FROUND_CUR_DIRECTION);
  11859. }
  11860. extern __inline __m512
  11861. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11862. _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  11863. {
  11864. return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
  11865. (__v16sf) __B,
  11866. (__v16sf) __C,
  11867. (__mmask16) __U,
  11868. _MM_FROUND_CUR_DIRECTION);
  11869. }
  11870. extern __inline __m512
  11871. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11872. _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  11873. {
  11874. return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
  11875. (__v16sf) __B,
  11876. (__v16sf) __C,
  11877. (__mmask16) __U,
  11878. _MM_FROUND_CUR_DIRECTION);
  11879. }
  11880. extern __inline __m512
  11881. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11882. _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  11883. {
  11884. return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
  11885. (__v16sf) __B,
  11886. (__v16sf) __C,
  11887. (__mmask16) __U,
  11888. _MM_FROUND_CUR_DIRECTION);
  11889. }
  11890. extern __inline __m512d
  11891. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11892. _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
  11893. {
  11894. return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
  11895. (__v8df) __B,
  11896. (__v8df) __C,
  11897. (__mmask8) -1,
  11898. _MM_FROUND_CUR_DIRECTION);
  11899. }
  11900. extern __inline __m512d
  11901. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11902. _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  11903. {
  11904. return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
  11905. (__v8df) __B,
  11906. (__v8df) __C,
  11907. (__mmask8) __U,
  11908. _MM_FROUND_CUR_DIRECTION);
  11909. }
  11910. extern __inline __m512d
  11911. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11912. _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  11913. {
  11914. return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
  11915. (__v8df) __B,
  11916. (__v8df) __C,
  11917. (__mmask8) __U,
  11918. _MM_FROUND_CUR_DIRECTION);
  11919. }
  11920. extern __inline __m512d
  11921. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11922. _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  11923. {
  11924. return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
  11925. (__v8df) __B,
  11926. (__v8df) __C,
  11927. (__mmask8) __U,
  11928. _MM_FROUND_CUR_DIRECTION);
  11929. }
  11930. extern __inline __m512
  11931. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11932. _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
  11933. {
  11934. return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
  11935. (__v16sf) __B,
  11936. (__v16sf) __C,
  11937. (__mmask16) -1,
  11938. _MM_FROUND_CUR_DIRECTION);
  11939. }
  11940. extern __inline __m512
  11941. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11942. _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  11943. {
  11944. return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
  11945. (__v16sf) __B,
  11946. (__v16sf) __C,
  11947. (__mmask16) __U,
  11948. _MM_FROUND_CUR_DIRECTION);
  11949. }
  11950. extern __inline __m512
  11951. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11952. _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  11953. {
  11954. return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
  11955. (__v16sf) __B,
  11956. (__v16sf) __C,
  11957. (__mmask16) __U,
  11958. _MM_FROUND_CUR_DIRECTION);
  11959. }
  11960. extern __inline __m512
  11961. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11962. _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  11963. {
  11964. return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
  11965. (__v16sf) __B,
  11966. (__v16sf) __C,
  11967. (__mmask16) __U,
  11968. _MM_FROUND_CUR_DIRECTION);
  11969. }
  11970. extern __inline __m256i
  11971. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11972. _mm512_cvttpd_epi32 (__m512d __A)
  11973. {
  11974. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  11975. (__v8si)
  11976. _mm256_undefined_si256 (),
  11977. (__mmask8) -1,
  11978. _MM_FROUND_CUR_DIRECTION);
  11979. }
  11980. extern __inline __m256i
  11981. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11982. _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
  11983. {
  11984. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  11985. (__v8si) __W,
  11986. (__mmask8) __U,
  11987. _MM_FROUND_CUR_DIRECTION);
  11988. }
  11989. extern __inline __m256i
  11990. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11991. _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
  11992. {
  11993. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  11994. (__v8si)
  11995. _mm256_setzero_si256 (),
  11996. (__mmask8) __U,
  11997. _MM_FROUND_CUR_DIRECTION);
  11998. }
  11999. extern __inline __m256i
  12000. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12001. _mm512_cvttpd_epu32 (__m512d __A)
  12002. {
  12003. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  12004. (__v8si)
  12005. _mm256_undefined_si256 (),
  12006. (__mmask8) -1,
  12007. _MM_FROUND_CUR_DIRECTION);
  12008. }
  12009. extern __inline __m256i
  12010. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12011. _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
  12012. {
  12013. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  12014. (__v8si) __W,
  12015. (__mmask8) __U,
  12016. _MM_FROUND_CUR_DIRECTION);
  12017. }
  12018. extern __inline __m256i
  12019. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12020. _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
  12021. {
  12022. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  12023. (__v8si)
  12024. _mm256_setzero_si256 (),
  12025. (__mmask8) __U,
  12026. _MM_FROUND_CUR_DIRECTION);
  12027. }
  12028. extern __inline __m256i
  12029. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12030. _mm512_cvtpd_epi32 (__m512d __A)
  12031. {
  12032. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  12033. (__v8si)
  12034. _mm256_undefined_si256 (),
  12035. (__mmask8) -1,
  12036. _MM_FROUND_CUR_DIRECTION);
  12037. }
  12038. extern __inline __m256i
  12039. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12040. _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
  12041. {
  12042. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  12043. (__v8si) __W,
  12044. (__mmask8) __U,
  12045. _MM_FROUND_CUR_DIRECTION);
  12046. }
  12047. extern __inline __m256i
  12048. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12049. _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
  12050. {
  12051. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  12052. (__v8si)
  12053. _mm256_setzero_si256 (),
  12054. (__mmask8) __U,
  12055. _MM_FROUND_CUR_DIRECTION);
  12056. }
  12057. extern __inline __m256i
  12058. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12059. _mm512_cvtpd_epu32 (__m512d __A)
  12060. {
  12061. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  12062. (__v8si)
  12063. _mm256_undefined_si256 (),
  12064. (__mmask8) -1,
  12065. _MM_FROUND_CUR_DIRECTION);
  12066. }
  12067. extern __inline __m256i
  12068. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12069. _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
  12070. {
  12071. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  12072. (__v8si) __W,
  12073. (__mmask8) __U,
  12074. _MM_FROUND_CUR_DIRECTION);
  12075. }
  12076. extern __inline __m256i
  12077. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12078. _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
  12079. {
  12080. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  12081. (__v8si)
  12082. _mm256_setzero_si256 (),
  12083. (__mmask8) __U,
  12084. _MM_FROUND_CUR_DIRECTION);
  12085. }
  12086. extern __inline __m512i
  12087. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12088. _mm512_cvttps_epi32 (__m512 __A)
  12089. {
  12090. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  12091. (__v16si)
  12092. _mm512_undefined_epi32 (),
  12093. (__mmask16) -1,
  12094. _MM_FROUND_CUR_DIRECTION);
  12095. }
  12096. extern __inline __m512i
  12097. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12098. _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
  12099. {
  12100. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  12101. (__v16si) __W,
  12102. (__mmask16) __U,
  12103. _MM_FROUND_CUR_DIRECTION);
  12104. }
  12105. extern __inline __m512i
  12106. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12107. _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
  12108. {
  12109. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  12110. (__v16si)
  12111. _mm512_setzero_si512 (),
  12112. (__mmask16) __U,
  12113. _MM_FROUND_CUR_DIRECTION);
  12114. }
  12115. extern __inline __m512i
  12116. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12117. _mm512_cvttps_epu32 (__m512 __A)
  12118. {
  12119. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  12120. (__v16si)
  12121. _mm512_undefined_epi32 (),
  12122. (__mmask16) -1,
  12123. _MM_FROUND_CUR_DIRECTION);
  12124. }
  12125. extern __inline __m512i
  12126. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12127. _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
  12128. {
  12129. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  12130. (__v16si) __W,
  12131. (__mmask16) __U,
  12132. _MM_FROUND_CUR_DIRECTION);
  12133. }
  12134. extern __inline __m512i
  12135. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12136. _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
  12137. {
  12138. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  12139. (__v16si)
  12140. _mm512_setzero_si512 (),
  12141. (__mmask16) __U,
  12142. _MM_FROUND_CUR_DIRECTION);
  12143. }
  12144. extern __inline __m512i
  12145. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12146. _mm512_cvtps_epi32 (__m512 __A)
  12147. {
  12148. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  12149. (__v16si)
  12150. _mm512_undefined_epi32 (),
  12151. (__mmask16) -1,
  12152. _MM_FROUND_CUR_DIRECTION);
  12153. }
  12154. extern __inline __m512i
  12155. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12156. _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
  12157. {
  12158. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  12159. (__v16si) __W,
  12160. (__mmask16) __U,
  12161. _MM_FROUND_CUR_DIRECTION);
  12162. }
  12163. extern __inline __m512i
  12164. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12165. _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
  12166. {
  12167. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  12168. (__v16si)
  12169. _mm512_setzero_si512 (),
  12170. (__mmask16) __U,
  12171. _MM_FROUND_CUR_DIRECTION);
  12172. }
  12173. extern __inline __m512i
  12174. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12175. _mm512_cvtps_epu32 (__m512 __A)
  12176. {
  12177. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  12178. (__v16si)
  12179. _mm512_undefined_epi32 (),
  12180. (__mmask16) -1,
  12181. _MM_FROUND_CUR_DIRECTION);
  12182. }
  12183. extern __inline __m512i
  12184. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12185. _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
  12186. {
  12187. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  12188. (__v16si) __W,
  12189. (__mmask16) __U,
  12190. _MM_FROUND_CUR_DIRECTION);
  12191. }
  12192. extern __inline __m512i
  12193. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12194. _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
  12195. {
  12196. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  12197. (__v16si)
  12198. _mm512_setzero_si512 (),
  12199. (__mmask16) __U,
  12200. _MM_FROUND_CUR_DIRECTION);
  12201. }
  12202. extern __inline double
  12203. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12204. _mm512_cvtsd_f64 (__m512d __A)
  12205. {
  12206. return __A[0];
  12207. }
  12208. extern __inline float
  12209. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12210. _mm512_cvtss_f32 (__m512 __A)
  12211. {
  12212. return __A[0];
  12213. }
  12214. #ifdef __x86_64__
  12215. extern __inline __m128
  12216. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12217. _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
  12218. {
  12219. return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
  12220. _MM_FROUND_CUR_DIRECTION);
  12221. }
  12222. extern __inline __m128d
  12223. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12224. _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
  12225. {
  12226. return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
  12227. _MM_FROUND_CUR_DIRECTION);
  12228. }
  12229. #endif
  12230. extern __inline __m128
  12231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12232. _mm_cvtu32_ss (__m128 __A, unsigned __B)
  12233. {
  12234. return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
  12235. _MM_FROUND_CUR_DIRECTION);
  12236. }
  12237. extern __inline __m512
  12238. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12239. _mm512_cvtepi32_ps (__m512i __A)
  12240. {
  12241. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  12242. (__v16sf)
  12243. _mm512_undefined_ps (),
  12244. (__mmask16) -1,
  12245. _MM_FROUND_CUR_DIRECTION);
  12246. }
  12247. extern __inline __m512
  12248. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12249. _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
  12250. {
  12251. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  12252. (__v16sf) __W,
  12253. (__mmask16) __U,
  12254. _MM_FROUND_CUR_DIRECTION);
  12255. }
  12256. extern __inline __m512
  12257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12258. _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
  12259. {
  12260. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  12261. (__v16sf)
  12262. _mm512_setzero_ps (),
  12263. (__mmask16) __U,
  12264. _MM_FROUND_CUR_DIRECTION);
  12265. }
  12266. extern __inline __m512
  12267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12268. _mm512_cvtepu32_ps (__m512i __A)
  12269. {
  12270. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  12271. (__v16sf)
  12272. _mm512_undefined_ps (),
  12273. (__mmask16) -1,
  12274. _MM_FROUND_CUR_DIRECTION);
  12275. }
  12276. extern __inline __m512
  12277. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12278. _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
  12279. {
  12280. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  12281. (__v16sf) __W,
  12282. (__mmask16) __U,
  12283. _MM_FROUND_CUR_DIRECTION);
  12284. }
  12285. extern __inline __m512
  12286. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12287. _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
  12288. {
  12289. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  12290. (__v16sf)
  12291. _mm512_setzero_ps (),
  12292. (__mmask16) __U,
  12293. _MM_FROUND_CUR_DIRECTION);
  12294. }
  12295. #ifdef __OPTIMIZE__
  12296. extern __inline __m512d
  12297. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12298. _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
  12299. {
  12300. return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
  12301. (__v8df) __B,
  12302. (__v8di) __C,
  12303. __imm,
  12304. (__mmask8) -1,
  12305. _MM_FROUND_CUR_DIRECTION);
  12306. }
  12307. extern __inline __m512d
  12308. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12309. _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
  12310. __m512i __C, const int __imm)
  12311. {
  12312. return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
  12313. (__v8df) __B,
  12314. (__v8di) __C,
  12315. __imm,
  12316. (__mmask8) __U,
  12317. _MM_FROUND_CUR_DIRECTION);
  12318. }
  12319. extern __inline __m512d
  12320. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12321. _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
  12322. __m512i __C, const int __imm)
  12323. {
  12324. return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
  12325. (__v8df) __B,
  12326. (__v8di) __C,
  12327. __imm,
  12328. (__mmask8) __U,
  12329. _MM_FROUND_CUR_DIRECTION);
  12330. }
  12331. extern __inline __m512
  12332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12333. _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
  12334. {
  12335. return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
  12336. (__v16sf) __B,
  12337. (__v16si) __C,
  12338. __imm,
  12339. (__mmask16) -1,
  12340. _MM_FROUND_CUR_DIRECTION);
  12341. }
  12342. extern __inline __m512
  12343. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12344. _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
  12345. __m512i __C, const int __imm)
  12346. {
  12347. return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
  12348. (__v16sf) __B,
  12349. (__v16si) __C,
  12350. __imm,
  12351. (__mmask16) __U,
  12352. _MM_FROUND_CUR_DIRECTION);
  12353. }
  12354. extern __inline __m512
  12355. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12356. _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
  12357. __m512i __C, const int __imm)
  12358. {
  12359. return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
  12360. (__v16sf) __B,
  12361. (__v16si) __C,
  12362. __imm,
  12363. (__mmask16) __U,
  12364. _MM_FROUND_CUR_DIRECTION);
  12365. }
  12366. extern __inline __m128d
  12367. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12368. _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
  12369. {
  12370. return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
  12371. (__v2df) __B,
  12372. (__v2di) __C, __imm,
  12373. (__mmask8) -1,
  12374. _MM_FROUND_CUR_DIRECTION);
  12375. }
  12376. extern __inline __m128d
  12377. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12378. _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
  12379. __m128i __C, const int __imm)
  12380. {
  12381. return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
  12382. (__v2df) __B,
  12383. (__v2di) __C, __imm,
  12384. (__mmask8) __U,
  12385. _MM_FROUND_CUR_DIRECTION);
  12386. }
  12387. extern __inline __m128d
  12388. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12389. _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
  12390. __m128i __C, const int __imm)
  12391. {
  12392. return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
  12393. (__v2df) __B,
  12394. (__v2di) __C,
  12395. __imm,
  12396. (__mmask8) __U,
  12397. _MM_FROUND_CUR_DIRECTION);
  12398. }
  12399. extern __inline __m128
  12400. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12401. _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
  12402. {
  12403. return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
  12404. (__v4sf) __B,
  12405. (__v4si) __C, __imm,
  12406. (__mmask8) -1,
  12407. _MM_FROUND_CUR_DIRECTION);
  12408. }
  12409. extern __inline __m128
  12410. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12411. _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
  12412. __m128i __C, const int __imm)
  12413. {
  12414. return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
  12415. (__v4sf) __B,
  12416. (__v4si) __C, __imm,
  12417. (__mmask8) __U,
  12418. _MM_FROUND_CUR_DIRECTION);
  12419. }
  12420. extern __inline __m128
  12421. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12422. _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
  12423. __m128i __C, const int __imm)
  12424. {
  12425. return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
  12426. (__v4sf) __B,
  12427. (__v4si) __C, __imm,
  12428. (__mmask8) __U,
  12429. _MM_FROUND_CUR_DIRECTION);
  12430. }
  12431. #else
  12432. #define _mm512_fixupimm_pd(X, Y, Z, C) \
  12433. ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
  12434. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  12435. (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
  12436. #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
  12437. ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
  12438. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  12439. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12440. #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
  12441. ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
  12442. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  12443. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12444. #define _mm512_fixupimm_ps(X, Y, Z, C) \
  12445. ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
  12446. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  12447. (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
  12448. #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
  12449. ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
  12450. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  12451. (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
  12452. #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
  12453. ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
  12454. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  12455. (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
  12456. #define _mm_fixupimm_sd(X, Y, Z, C) \
  12457. ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
  12458. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  12459. (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
  12460. #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
  12461. ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
  12462. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  12463. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12464. #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
  12465. ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
  12466. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  12467. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12468. #define _mm_fixupimm_ss(X, Y, Z, C) \
  12469. ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
  12470. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  12471. (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
  12472. #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
  12473. ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
  12474. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  12475. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12476. #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
  12477. ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
  12478. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  12479. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12480. #endif
  12481. #ifdef __x86_64__
  12482. extern __inline unsigned long long
  12483. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12484. _mm_cvtss_u64 (__m128 __A)
  12485. {
  12486. return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
  12487. __A,
  12488. _MM_FROUND_CUR_DIRECTION);
  12489. }
  12490. extern __inline unsigned long long
  12491. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12492. _mm_cvttss_u64 (__m128 __A)
  12493. {
  12494. return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
  12495. __A,
  12496. _MM_FROUND_CUR_DIRECTION);
  12497. }
  12498. extern __inline long long
  12499. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12500. _mm_cvttss_i64 (__m128 __A)
  12501. {
  12502. return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
  12503. _MM_FROUND_CUR_DIRECTION);
  12504. }
  12505. #endif /* __x86_64__ */
  12506. extern __inline unsigned
  12507. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12508. _mm_cvtss_u32 (__m128 __A)
  12509. {
  12510. return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
  12511. _MM_FROUND_CUR_DIRECTION);
  12512. }
  12513. extern __inline unsigned
  12514. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12515. _mm_cvttss_u32 (__m128 __A)
  12516. {
  12517. return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
  12518. _MM_FROUND_CUR_DIRECTION);
  12519. }
  12520. extern __inline int
  12521. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12522. _mm_cvttss_i32 (__m128 __A)
  12523. {
  12524. return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
  12525. _MM_FROUND_CUR_DIRECTION);
  12526. }
  12527. #ifdef __x86_64__
  12528. extern __inline unsigned long long
  12529. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12530. _mm_cvtsd_u64 (__m128d __A)
  12531. {
  12532. return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
  12533. __A,
  12534. _MM_FROUND_CUR_DIRECTION);
  12535. }
  12536. extern __inline unsigned long long
  12537. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12538. _mm_cvttsd_u64 (__m128d __A)
  12539. {
  12540. return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
  12541. __A,
  12542. _MM_FROUND_CUR_DIRECTION);
  12543. }
  12544. extern __inline long long
  12545. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12546. _mm_cvttsd_i64 (__m128d __A)
  12547. {
  12548. return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
  12549. _MM_FROUND_CUR_DIRECTION);
  12550. }
  12551. #endif /* __x86_64__ */
  12552. extern __inline unsigned
  12553. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12554. _mm_cvtsd_u32 (__m128d __A)
  12555. {
  12556. return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
  12557. _MM_FROUND_CUR_DIRECTION);
  12558. }
  12559. extern __inline unsigned
  12560. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12561. _mm_cvttsd_u32 (__m128d __A)
  12562. {
  12563. return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
  12564. _MM_FROUND_CUR_DIRECTION);
  12565. }
  12566. extern __inline int
  12567. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12568. _mm_cvttsd_i32 (__m128d __A)
  12569. {
  12570. return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
  12571. _MM_FROUND_CUR_DIRECTION);
  12572. }
  12573. extern __inline __m512d
  12574. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12575. _mm512_cvtps_pd (__m256 __A)
  12576. {
  12577. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  12578. (__v8df)
  12579. _mm512_undefined_pd (),
  12580. (__mmask8) -1,
  12581. _MM_FROUND_CUR_DIRECTION);
  12582. }
  12583. extern __inline __m512d
  12584. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12585. _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
  12586. {
  12587. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  12588. (__v8df) __W,
  12589. (__mmask8) __U,
  12590. _MM_FROUND_CUR_DIRECTION);
  12591. }
  12592. extern __inline __m512d
  12593. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12594. _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
  12595. {
  12596. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  12597. (__v8df)
  12598. _mm512_setzero_pd (),
  12599. (__mmask8) __U,
  12600. _MM_FROUND_CUR_DIRECTION);
  12601. }
  12602. extern __inline __m512
  12603. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12604. _mm512_cvtph_ps (__m256i __A)
  12605. {
  12606. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  12607. (__v16sf)
  12608. _mm512_undefined_ps (),
  12609. (__mmask16) -1,
  12610. _MM_FROUND_CUR_DIRECTION);
  12611. }
  12612. extern __inline __m512
  12613. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12614. _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
  12615. {
  12616. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  12617. (__v16sf) __W,
  12618. (__mmask16) __U,
  12619. _MM_FROUND_CUR_DIRECTION);
  12620. }
  12621. extern __inline __m512
  12622. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12623. _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
  12624. {
  12625. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  12626. (__v16sf)
  12627. _mm512_setzero_ps (),
  12628. (__mmask16) __U,
  12629. _MM_FROUND_CUR_DIRECTION);
  12630. }
  12631. extern __inline __m256
  12632. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12633. _mm512_cvtpd_ps (__m512d __A)
  12634. {
  12635. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  12636. (__v8sf)
  12637. _mm256_undefined_ps (),
  12638. (__mmask8) -1,
  12639. _MM_FROUND_CUR_DIRECTION);
  12640. }
  12641. extern __inline __m256
  12642. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12643. _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
  12644. {
  12645. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  12646. (__v8sf) __W,
  12647. (__mmask8) __U,
  12648. _MM_FROUND_CUR_DIRECTION);
  12649. }
  12650. extern __inline __m256
  12651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12652. _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
  12653. {
  12654. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  12655. (__v8sf)
  12656. _mm256_setzero_ps (),
  12657. (__mmask8) __U,
  12658. _MM_FROUND_CUR_DIRECTION);
  12659. }
  12660. #ifdef __OPTIMIZE__
  12661. extern __inline __m512
  12662. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12663. _mm512_getexp_ps (__m512 __A)
  12664. {
  12665. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  12666. (__v16sf)
  12667. _mm512_undefined_ps (),
  12668. (__mmask16) -1,
  12669. _MM_FROUND_CUR_DIRECTION);
  12670. }
  12671. extern __inline __m512
  12672. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12673. _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
  12674. {
  12675. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  12676. (__v16sf) __W,
  12677. (__mmask16) __U,
  12678. _MM_FROUND_CUR_DIRECTION);
  12679. }
  12680. extern __inline __m512
  12681. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12682. _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
  12683. {
  12684. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  12685. (__v16sf)
  12686. _mm512_setzero_ps (),
  12687. (__mmask16) __U,
  12688. _MM_FROUND_CUR_DIRECTION);
  12689. }
  12690. extern __inline __m512d
  12691. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12692. _mm512_getexp_pd (__m512d __A)
  12693. {
  12694. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  12695. (__v8df)
  12696. _mm512_undefined_pd (),
  12697. (__mmask8) -1,
  12698. _MM_FROUND_CUR_DIRECTION);
  12699. }
  12700. extern __inline __m512d
  12701. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12702. _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
  12703. {
  12704. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  12705. (__v8df) __W,
  12706. (__mmask8) __U,
  12707. _MM_FROUND_CUR_DIRECTION);
  12708. }
  12709. extern __inline __m512d
  12710. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12711. _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
  12712. {
  12713. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  12714. (__v8df)
  12715. _mm512_setzero_pd (),
  12716. (__mmask8) __U,
  12717. _MM_FROUND_CUR_DIRECTION);
  12718. }
  12719. extern __inline __m128
  12720. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12721. _mm_getexp_ss (__m128 __A, __m128 __B)
  12722. {
  12723. return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
  12724. (__v4sf) __B,
  12725. _MM_FROUND_CUR_DIRECTION);
  12726. }
  12727. extern __inline __m128
  12728. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12729. _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  12730. {
  12731. return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
  12732. (__v4sf) __B,
  12733. (__v4sf) __W,
  12734. (__mmask8) __U,
  12735. _MM_FROUND_CUR_DIRECTION);
  12736. }
  12737. extern __inline __m128
  12738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12739. _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
  12740. {
  12741. return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
  12742. (__v4sf) __B,
  12743. (__v4sf)
  12744. _mm_setzero_ps (),
  12745. (__mmask8) __U,
  12746. _MM_FROUND_CUR_DIRECTION);
  12747. }
  12748. extern __inline __m128d
  12749. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12750. _mm_getexp_sd (__m128d __A, __m128d __B)
  12751. {
  12752. return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
  12753. (__v2df) __B,
  12754. _MM_FROUND_CUR_DIRECTION);
  12755. }
  12756. extern __inline __m128d
  12757. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12758. _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  12759. {
  12760. return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
  12761. (__v2df) __B,
  12762. (__v2df) __W,
  12763. (__mmask8) __U,
  12764. _MM_FROUND_CUR_DIRECTION);
  12765. }
  12766. extern __inline __m128d
  12767. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12768. _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
  12769. {
  12770. return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
  12771. (__v2df) __B,
  12772. (__v2df)
  12773. _mm_setzero_pd (),
  12774. (__mmask8) __U,
  12775. _MM_FROUND_CUR_DIRECTION);
  12776. }
  12777. extern __inline __m512d
  12778. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12779. _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
  12780. _MM_MANTISSA_SIGN_ENUM __C)
  12781. {
  12782. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  12783. (__C << 2) | __B,
  12784. _mm512_undefined_pd (),
  12785. (__mmask8) -1,
  12786. _MM_FROUND_CUR_DIRECTION);
  12787. }
  12788. extern __inline __m512d
  12789. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12790. _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
  12791. _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
  12792. {
  12793. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  12794. (__C << 2) | __B,
  12795. (__v8df) __W, __U,
  12796. _MM_FROUND_CUR_DIRECTION);
  12797. }
  12798. extern __inline __m512d
  12799. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12800. _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
  12801. _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
  12802. {
  12803. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  12804. (__C << 2) | __B,
  12805. (__v8df)
  12806. _mm512_setzero_pd (),
  12807. __U,
  12808. _MM_FROUND_CUR_DIRECTION);
  12809. }
  12810. extern __inline __m512
  12811. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12812. _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
  12813. _MM_MANTISSA_SIGN_ENUM __C)
  12814. {
  12815. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  12816. (__C << 2) | __B,
  12817. _mm512_undefined_ps (),
  12818. (__mmask16) -1,
  12819. _MM_FROUND_CUR_DIRECTION);
  12820. }
  12821. extern __inline __m512
  12822. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12823. _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
  12824. _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
  12825. {
  12826. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  12827. (__C << 2) | __B,
  12828. (__v16sf) __W, __U,
  12829. _MM_FROUND_CUR_DIRECTION);
  12830. }
  12831. extern __inline __m512
  12832. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12833. _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
  12834. _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
  12835. {
  12836. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  12837. (__C << 2) | __B,
  12838. (__v16sf)
  12839. _mm512_setzero_ps (),
  12840. __U,
  12841. _MM_FROUND_CUR_DIRECTION);
  12842. }
  12843. extern __inline __m128d
  12844. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12845. _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
  12846. _MM_MANTISSA_SIGN_ENUM __D)
  12847. {
  12848. return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
  12849. (__v2df) __B,
  12850. (__D << 2) | __C,
  12851. _MM_FROUND_CUR_DIRECTION);
  12852. }
  12853. extern __inline __m128d
  12854. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12855. _mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  12856. _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
  12857. {
  12858. return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
  12859. (__v2df) __B,
  12860. (__D << 2) | __C,
  12861. (__v2df) __W,
  12862. __U,
  12863. _MM_FROUND_CUR_DIRECTION);
  12864. }
  12865. extern __inline __m128d
  12866. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12867. _mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
  12868. _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
  12869. {
  12870. return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
  12871. (__v2df) __B,
  12872. (__D << 2) | __C,
  12873. (__v2df)
  12874. _mm_setzero_pd(),
  12875. __U,
  12876. _MM_FROUND_CUR_DIRECTION);
  12877. }
  12878. extern __inline __m128
  12879. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12880. _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
  12881. _MM_MANTISSA_SIGN_ENUM __D)
  12882. {
  12883. return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
  12884. (__v4sf) __B,
  12885. (__D << 2) | __C,
  12886. _MM_FROUND_CUR_DIRECTION);
  12887. }
  12888. extern __inline __m128
  12889. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12890. _mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  12891. _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
  12892. {
  12893. return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
  12894. (__v4sf) __B,
  12895. (__D << 2) | __C,
  12896. (__v4sf) __W,
  12897. __U,
  12898. _MM_FROUND_CUR_DIRECTION);
  12899. }
  12900. extern __inline __m128
  12901. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12902. _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
  12903. _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
  12904. {
  12905. return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
  12906. (__v4sf) __B,
  12907. (__D << 2) | __C,
  12908. (__v4sf)
  12909. _mm_setzero_ps(),
  12910. __U,
  12911. _MM_FROUND_CUR_DIRECTION);
  12912. }
  12913. #else
  12914. #define _mm512_getmant_pd(X, B, C) \
  12915. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  12916. (int)(((C)<<2) | (B)), \
  12917. (__v8df)_mm512_undefined_pd(), \
  12918. (__mmask8)-1,\
  12919. _MM_FROUND_CUR_DIRECTION))
  12920. #define _mm512_mask_getmant_pd(W, U, X, B, C) \
  12921. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  12922. (int)(((C)<<2) | (B)), \
  12923. (__v8df)(__m512d)(W), \
  12924. (__mmask8)(U),\
  12925. _MM_FROUND_CUR_DIRECTION))
  12926. #define _mm512_maskz_getmant_pd(U, X, B, C) \
  12927. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  12928. (int)(((C)<<2) | (B)), \
  12929. (__v8df)_mm512_setzero_pd(), \
  12930. (__mmask8)(U),\
  12931. _MM_FROUND_CUR_DIRECTION))
  12932. #define _mm512_getmant_ps(X, B, C) \
  12933. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  12934. (int)(((C)<<2) | (B)), \
  12935. (__v16sf)_mm512_undefined_ps(), \
  12936. (__mmask16)-1,\
  12937. _MM_FROUND_CUR_DIRECTION))
  12938. #define _mm512_mask_getmant_ps(W, U, X, B, C) \
  12939. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  12940. (int)(((C)<<2) | (B)), \
  12941. (__v16sf)(__m512)(W), \
  12942. (__mmask16)(U),\
  12943. _MM_FROUND_CUR_DIRECTION))
  12944. #define _mm512_maskz_getmant_ps(U, X, B, C) \
  12945. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  12946. (int)(((C)<<2) | (B)), \
  12947. (__v16sf)_mm512_setzero_ps(), \
  12948. (__mmask16)(U),\
  12949. _MM_FROUND_CUR_DIRECTION))
  12950. #define _mm_getmant_sd(X, Y, C, D) \
  12951. ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
  12952. (__v2df)(__m128d)(Y), \
  12953. (int)(((D)<<2) | (C)), \
  12954. _MM_FROUND_CUR_DIRECTION))
  12955. #define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
  12956. ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
  12957. (__v2df)(__m128d)(Y), \
  12958. (int)(((D)<<2) | (C)), \
  12959. (__v2df)(__m128d)(W), \
  12960. (__mmask8)(U),\
  12961. _MM_FROUND_CUR_DIRECTION))
  12962. #define _mm_maskz_getmant_sd(U, X, Y, C, D) \
  12963. ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
  12964. (__v2df)(__m128d)(Y), \
  12965. (int)(((D)<<2) | (C)), \
  12966. (__v2df)_mm_setzero_pd(), \
  12967. (__mmask8)(U),\
  12968. _MM_FROUND_CUR_DIRECTION))
  12969. #define _mm_getmant_ss(X, Y, C, D) \
  12970. ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
  12971. (__v4sf)(__m128)(Y), \
  12972. (int)(((D)<<2) | (C)), \
  12973. _MM_FROUND_CUR_DIRECTION))
  12974. #define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
  12975. ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
  12976. (__v4sf)(__m128)(Y), \
  12977. (int)(((D)<<2) | (C)), \
  12978. (__v4sf)(__m128)(W), \
  12979. (__mmask8)(U),\
  12980. _MM_FROUND_CUR_DIRECTION))
  12981. #define _mm_maskz_getmant_ss(U, X, Y, C, D) \
  12982. ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
  12983. (__v4sf)(__m128)(Y), \
  12984. (int)(((D)<<2) | (C)), \
  12985. (__v4sf)_mm_setzero_ps(), \
  12986. (__mmask8)(U),\
  12987. _MM_FROUND_CUR_DIRECTION))
  12988. #define _mm_getexp_ss(A, B) \
  12989. ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
  12990. _MM_FROUND_CUR_DIRECTION))
  12991. #define _mm_mask_getexp_ss(W, U, A, B) \
  12992. (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
  12993. _MM_FROUND_CUR_DIRECTION)
  12994. #define _mm_maskz_getexp_ss(U, A, B) \
  12995. (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
  12996. _MM_FROUND_CUR_DIRECTION)
  12997. #define _mm_getexp_sd(A, B) \
  12998. ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
  12999. _MM_FROUND_CUR_DIRECTION))
  13000. #define _mm_mask_getexp_sd(W, U, A, B) \
  13001. (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
  13002. _MM_FROUND_CUR_DIRECTION)
  13003. #define _mm_maskz_getexp_sd(U, A, B) \
  13004. (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
  13005. _MM_FROUND_CUR_DIRECTION)
  13006. #define _mm512_getexp_ps(A) \
  13007. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  13008. (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
  13009. #define _mm512_mask_getexp_ps(W, U, A) \
  13010. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  13011. (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
  13012. #define _mm512_maskz_getexp_ps(U, A) \
  13013. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  13014. (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
  13015. #define _mm512_getexp_pd(A) \
  13016. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  13017. (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
  13018. #define _mm512_mask_getexp_pd(W, U, A) \
  13019. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  13020. (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  13021. #define _mm512_maskz_getexp_pd(U, A) \
  13022. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  13023. (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  13024. #endif
  13025. #ifdef __OPTIMIZE__
  13026. extern __inline __m512
  13027. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13028. _mm512_roundscale_ps (__m512 __A, const int __imm)
  13029. {
  13030. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
  13031. (__v16sf)
  13032. _mm512_undefined_ps (),
  13033. -1,
  13034. _MM_FROUND_CUR_DIRECTION);
  13035. }
  13036. extern __inline __m512
  13037. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13038. _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
  13039. const int __imm)
  13040. {
  13041. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
  13042. (__v16sf) __A,
  13043. (__mmask16) __B,
  13044. _MM_FROUND_CUR_DIRECTION);
  13045. }
  13046. extern __inline __m512
  13047. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13048. _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
  13049. {
  13050. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
  13051. __imm,
  13052. (__v16sf)
  13053. _mm512_setzero_ps (),
  13054. (__mmask16) __A,
  13055. _MM_FROUND_CUR_DIRECTION);
  13056. }
  13057. extern __inline __m512d
  13058. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13059. _mm512_roundscale_pd (__m512d __A, const int __imm)
  13060. {
  13061. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
  13062. (__v8df)
  13063. _mm512_undefined_pd (),
  13064. -1,
  13065. _MM_FROUND_CUR_DIRECTION);
  13066. }
  13067. extern __inline __m512d
  13068. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13069. _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
  13070. const int __imm)
  13071. {
  13072. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
  13073. (__v8df) __A,
  13074. (__mmask8) __B,
  13075. _MM_FROUND_CUR_DIRECTION);
  13076. }
  13077. extern __inline __m512d
  13078. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13079. _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
  13080. {
  13081. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
  13082. __imm,
  13083. (__v8df)
  13084. _mm512_setzero_pd (),
  13085. (__mmask8) __A,
  13086. _MM_FROUND_CUR_DIRECTION);
  13087. }
  13088. extern __inline __m128
  13089. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13090. _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
  13091. {
  13092. return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
  13093. (__v4sf) __B, __imm,
  13094. _MM_FROUND_CUR_DIRECTION);
  13095. }
  13096. extern __inline __m128d
  13097. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13098. _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
  13099. {
  13100. return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
  13101. (__v2df) __B, __imm,
  13102. _MM_FROUND_CUR_DIRECTION);
  13103. }
  13104. #else
  13105. #define _mm512_roundscale_ps(A, B) \
  13106. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
  13107. (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
  13108. #define _mm512_mask_roundscale_ps(A, B, C, D) \
  13109. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
  13110. (int)(D), \
  13111. (__v16sf)(__m512)(A), \
  13112. (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
  13113. #define _mm512_maskz_roundscale_ps(A, B, C) \
  13114. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
  13115. (int)(C), \
  13116. (__v16sf)_mm512_setzero_ps(),\
  13117. (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
  13118. #define _mm512_roundscale_pd(A, B) \
  13119. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
  13120. (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
  13121. #define _mm512_mask_roundscale_pd(A, B, C, D) \
  13122. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
  13123. (int)(D), \
  13124. (__v8df)(__m512d)(A), \
  13125. (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
  13126. #define _mm512_maskz_roundscale_pd(A, B, C) \
  13127. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
  13128. (int)(C), \
  13129. (__v8df)_mm512_setzero_pd(),\
  13130. (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
  13131. #define _mm_roundscale_ss(A, B, C) \
  13132. ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
  13133. (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
  13134. #define _mm_roundscale_sd(A, B, C) \
  13135. ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
  13136. (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
  13137. #endif
  13138. #ifdef __OPTIMIZE__
  13139. extern __inline __mmask8
  13140. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13141. _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
  13142. {
  13143. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13144. (__v8df) __Y, __P,
  13145. (__mmask8) -1,
  13146. _MM_FROUND_CUR_DIRECTION);
  13147. }
  13148. extern __inline __mmask16
  13149. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13150. _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
  13151. {
  13152. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13153. (__v16sf) __Y, __P,
  13154. (__mmask16) -1,
  13155. _MM_FROUND_CUR_DIRECTION);
  13156. }
  13157. extern __inline __mmask16
  13158. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13159. _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
  13160. {
  13161. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13162. (__v16sf) __Y, __P,
  13163. (__mmask16) __U,
  13164. _MM_FROUND_CUR_DIRECTION);
  13165. }
  13166. extern __inline __mmask8
  13167. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13168. _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
  13169. {
  13170. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13171. (__v8df) __Y, __P,
  13172. (__mmask8) __U,
  13173. _MM_FROUND_CUR_DIRECTION);
  13174. }
  13175. extern __inline __mmask8
  13176. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13177. _mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
  13178. {
  13179. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13180. (__v8df) __Y, _CMP_EQ_OQ,
  13181. (__mmask8) -1,
  13182. _MM_FROUND_CUR_DIRECTION);
  13183. }
  13184. extern __inline __mmask8
  13185. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13186. _mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13187. {
  13188. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13189. (__v8df) __Y, _CMP_EQ_OQ,
  13190. (__mmask8) __U,
  13191. _MM_FROUND_CUR_DIRECTION);
  13192. }
  13193. extern __inline __mmask8
  13194. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13195. _mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
  13196. {
  13197. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13198. (__v8df) __Y, _CMP_LT_OS,
  13199. (__mmask8) -1,
  13200. _MM_FROUND_CUR_DIRECTION);
  13201. }
  13202. extern __inline __mmask8
  13203. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13204. _mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13205. {
  13206. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13207. (__v8df) __Y, _CMP_LT_OS,
  13208. (__mmask8) __U,
  13209. _MM_FROUND_CUR_DIRECTION);
  13210. }
  13211. extern __inline __mmask8
  13212. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13213. _mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
  13214. {
  13215. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13216. (__v8df) __Y, _CMP_LE_OS,
  13217. (__mmask8) -1,
  13218. _MM_FROUND_CUR_DIRECTION);
  13219. }
  13220. extern __inline __mmask8
  13221. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13222. _mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13223. {
  13224. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13225. (__v8df) __Y, _CMP_LE_OS,
  13226. (__mmask8) __U,
  13227. _MM_FROUND_CUR_DIRECTION);
  13228. }
  13229. extern __inline __mmask8
  13230. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13231. _mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
  13232. {
  13233. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13234. (__v8df) __Y, _CMP_UNORD_Q,
  13235. (__mmask8) -1,
  13236. _MM_FROUND_CUR_DIRECTION);
  13237. }
  13238. extern __inline __mmask8
  13239. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13240. _mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13241. {
  13242. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13243. (__v8df) __Y, _CMP_UNORD_Q,
  13244. (__mmask8) __U,
  13245. _MM_FROUND_CUR_DIRECTION);
  13246. }
  13247. extern __inline __mmask8
  13248. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13249. _mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
  13250. {
  13251. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13252. (__v8df) __Y, _CMP_NEQ_UQ,
  13253. (__mmask8) -1,
  13254. _MM_FROUND_CUR_DIRECTION);
  13255. }
  13256. extern __inline __mmask8
  13257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13258. _mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13259. {
  13260. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13261. (__v8df) __Y, _CMP_NEQ_UQ,
  13262. (__mmask8) __U,
  13263. _MM_FROUND_CUR_DIRECTION);
  13264. }
  13265. extern __inline __mmask8
  13266. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13267. _mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
  13268. {
  13269. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13270. (__v8df) __Y, _CMP_NLT_US,
  13271. (__mmask8) -1,
  13272. _MM_FROUND_CUR_DIRECTION);
  13273. }
  13274. extern __inline __mmask8
  13275. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13276. _mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13277. {
  13278. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13279. (__v8df) __Y, _CMP_NLT_US,
  13280. (__mmask8) __U,
  13281. _MM_FROUND_CUR_DIRECTION);
  13282. }
  13283. extern __inline __mmask8
  13284. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13285. _mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
  13286. {
  13287. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13288. (__v8df) __Y, _CMP_NLE_US,
  13289. (__mmask8) -1,
  13290. _MM_FROUND_CUR_DIRECTION);
  13291. }
  13292. extern __inline __mmask8
  13293. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13294. _mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13295. {
  13296. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13297. (__v8df) __Y, _CMP_NLE_US,
  13298. (__mmask8) __U,
  13299. _MM_FROUND_CUR_DIRECTION);
  13300. }
  13301. extern __inline __mmask8
  13302. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13303. _mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
  13304. {
  13305. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13306. (__v8df) __Y, _CMP_ORD_Q,
  13307. (__mmask8) -1,
  13308. _MM_FROUND_CUR_DIRECTION);
  13309. }
  13310. extern __inline __mmask8
  13311. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13312. _mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13313. {
  13314. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13315. (__v8df) __Y, _CMP_ORD_Q,
  13316. (__mmask8) __U,
  13317. _MM_FROUND_CUR_DIRECTION);
  13318. }
  13319. extern __inline __mmask16
  13320. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13321. _mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
  13322. {
  13323. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13324. (__v16sf) __Y, _CMP_EQ_OQ,
  13325. (__mmask16) -1,
  13326. _MM_FROUND_CUR_DIRECTION);
  13327. }
  13328. extern __inline __mmask16
  13329. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13330. _mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13331. {
  13332. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13333. (__v16sf) __Y, _CMP_EQ_OQ,
  13334. (__mmask16) __U,
  13335. _MM_FROUND_CUR_DIRECTION);
  13336. }
  13337. extern __inline __mmask16
  13338. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13339. _mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
  13340. {
  13341. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13342. (__v16sf) __Y, _CMP_LT_OS,
  13343. (__mmask16) -1,
  13344. _MM_FROUND_CUR_DIRECTION);
  13345. }
  13346. extern __inline __mmask16
  13347. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13348. _mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13349. {
  13350. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13351. (__v16sf) __Y, _CMP_LT_OS,
  13352. (__mmask16) __U,
  13353. _MM_FROUND_CUR_DIRECTION);
  13354. }
  13355. extern __inline __mmask16
  13356. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13357. _mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
  13358. {
  13359. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13360. (__v16sf) __Y, _CMP_LE_OS,
  13361. (__mmask16) -1,
  13362. _MM_FROUND_CUR_DIRECTION);
  13363. }
  13364. extern __inline __mmask16
  13365. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13366. _mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13367. {
  13368. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13369. (__v16sf) __Y, _CMP_LE_OS,
  13370. (__mmask16) __U,
  13371. _MM_FROUND_CUR_DIRECTION);
  13372. }
  13373. extern __inline __mmask16
  13374. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13375. _mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
  13376. {
  13377. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13378. (__v16sf) __Y, _CMP_UNORD_Q,
  13379. (__mmask16) -1,
  13380. _MM_FROUND_CUR_DIRECTION);
  13381. }
  13382. extern __inline __mmask16
  13383. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13384. _mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13385. {
  13386. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13387. (__v16sf) __Y, _CMP_UNORD_Q,
  13388. (__mmask16) __U,
  13389. _MM_FROUND_CUR_DIRECTION);
  13390. }
  13391. extern __inline __mmask16
  13392. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13393. _mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
  13394. {
  13395. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13396. (__v16sf) __Y, _CMP_NEQ_UQ,
  13397. (__mmask16) -1,
  13398. _MM_FROUND_CUR_DIRECTION);
  13399. }
  13400. extern __inline __mmask16
  13401. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13402. _mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13403. {
  13404. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13405. (__v16sf) __Y, _CMP_NEQ_UQ,
  13406. (__mmask16) __U,
  13407. _MM_FROUND_CUR_DIRECTION);
  13408. }
  13409. extern __inline __mmask16
  13410. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13411. _mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
  13412. {
  13413. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13414. (__v16sf) __Y, _CMP_NLT_US,
  13415. (__mmask16) -1,
  13416. _MM_FROUND_CUR_DIRECTION);
  13417. }
  13418. extern __inline __mmask16
  13419. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13420. _mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13421. {
  13422. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13423. (__v16sf) __Y, _CMP_NLT_US,
  13424. (__mmask16) __U,
  13425. _MM_FROUND_CUR_DIRECTION);
  13426. }
  13427. extern __inline __mmask16
  13428. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13429. _mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
  13430. {
  13431. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13432. (__v16sf) __Y, _CMP_NLE_US,
  13433. (__mmask16) -1,
  13434. _MM_FROUND_CUR_DIRECTION);
  13435. }
  13436. extern __inline __mmask16
  13437. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13438. _mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13439. {
  13440. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13441. (__v16sf) __Y, _CMP_NLE_US,
  13442. (__mmask16) __U,
  13443. _MM_FROUND_CUR_DIRECTION);
  13444. }
  13445. extern __inline __mmask16
  13446. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13447. _mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
  13448. {
  13449. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13450. (__v16sf) __Y, _CMP_ORD_Q,
  13451. (__mmask16) -1,
  13452. _MM_FROUND_CUR_DIRECTION);
  13453. }
  13454. extern __inline __mmask16
  13455. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13456. _mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13457. {
  13458. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13459. (__v16sf) __Y, _CMP_ORD_Q,
  13460. (__mmask16) __U,
  13461. _MM_FROUND_CUR_DIRECTION);
  13462. }
  13463. extern __inline __mmask8
  13464. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13465. _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
  13466. {
  13467. return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
  13468. (__v2df) __Y, __P,
  13469. (__mmask8) -1,
  13470. _MM_FROUND_CUR_DIRECTION);
  13471. }
  13472. extern __inline __mmask8
  13473. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13474. _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
  13475. {
  13476. return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
  13477. (__v2df) __Y, __P,
  13478. (__mmask8) __M,
  13479. _MM_FROUND_CUR_DIRECTION);
  13480. }
  13481. extern __inline __mmask8
  13482. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13483. _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
  13484. {
  13485. return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
  13486. (__v4sf) __Y, __P,
  13487. (__mmask8) -1,
  13488. _MM_FROUND_CUR_DIRECTION);
  13489. }
  13490. extern __inline __mmask8
  13491. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13492. _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
  13493. {
  13494. return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
  13495. (__v4sf) __Y, __P,
  13496. (__mmask8) __M,
  13497. _MM_FROUND_CUR_DIRECTION);
  13498. }
  13499. #else
  13500. #define _mm512_cmp_pd_mask(X, Y, P) \
  13501. ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
  13502. (__v8df)(__m512d)(Y), (int)(P),\
  13503. (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
  13504. #define _mm512_cmp_ps_mask(X, Y, P) \
  13505. ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
  13506. (__v16sf)(__m512)(Y), (int)(P),\
  13507. (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
  13508. #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
  13509. ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
  13510. (__v8df)(__m512d)(Y), (int)(P),\
  13511. (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
  13512. #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
  13513. ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
  13514. (__v16sf)(__m512)(Y), (int)(P),\
  13515. (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
  13516. #define _mm_cmp_sd_mask(X, Y, P) \
  13517. ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
  13518. (__v2df)(__m128d)(Y), (int)(P),\
  13519. (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
  13520. #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
  13521. ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
  13522. (__v2df)(__m128d)(Y), (int)(P),\
  13523. M,_MM_FROUND_CUR_DIRECTION))
  13524. #define _mm_cmp_ss_mask(X, Y, P) \
  13525. ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
  13526. (__v4sf)(__m128)(Y), (int)(P), \
  13527. (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
  13528. #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
  13529. ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
  13530. (__v4sf)(__m128)(Y), (int)(P), \
  13531. M,_MM_FROUND_CUR_DIRECTION))
  13532. #endif
  13533. extern __inline __mmask16
  13534. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13535. _mm512_kmov (__mmask16 __A)
  13536. {
  13537. return __builtin_ia32_kmovw (__A);
  13538. }
  13539. extern __inline __m512
  13540. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13541. _mm512_castpd_ps (__m512d __A)
  13542. {
  13543. return (__m512) (__A);
  13544. }
  13545. extern __inline __m512i
  13546. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13547. _mm512_castpd_si512 (__m512d __A)
  13548. {
  13549. return (__m512i) (__A);
  13550. }
  13551. extern __inline __m512d
  13552. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13553. _mm512_castps_pd (__m512 __A)
  13554. {
  13555. return (__m512d) (__A);
  13556. }
  13557. extern __inline __m512i
  13558. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13559. _mm512_castps_si512 (__m512 __A)
  13560. {
  13561. return (__m512i) (__A);
  13562. }
  13563. extern __inline __m512
  13564. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13565. _mm512_castsi512_ps (__m512i __A)
  13566. {
  13567. return (__m512) (__A);
  13568. }
  13569. extern __inline __m512d
  13570. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13571. _mm512_castsi512_pd (__m512i __A)
  13572. {
  13573. return (__m512d) (__A);
  13574. }
  13575. extern __inline __m128d
  13576. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13577. _mm512_castpd512_pd128 (__m512d __A)
  13578. {
  13579. return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
  13580. }
  13581. extern __inline __m128
  13582. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13583. _mm512_castps512_ps128 (__m512 __A)
  13584. {
  13585. return _mm512_extractf32x4_ps(__A, 0);
  13586. }
  13587. extern __inline __m128i
  13588. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13589. _mm512_castsi512_si128 (__m512i __A)
  13590. {
  13591. return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
  13592. }
  13593. extern __inline __m256d
  13594. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13595. _mm512_castpd512_pd256 (__m512d __A)
  13596. {
  13597. return _mm512_extractf64x4_pd(__A, 0);
  13598. }
  13599. extern __inline __m256
  13600. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13601. _mm512_castps512_ps256 (__m512 __A)
  13602. {
  13603. return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
  13604. }
  13605. extern __inline __m256i
  13606. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13607. _mm512_castsi512_si256 (__m512i __A)
  13608. {
  13609. return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
  13610. }
  13611. extern __inline __m512d
  13612. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13613. _mm512_castpd128_pd512 (__m128d __A)
  13614. {
  13615. return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
  13616. }
  13617. extern __inline __m512
  13618. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13619. _mm512_castps128_ps512 (__m128 __A)
  13620. {
  13621. return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
  13622. }
  13623. extern __inline __m512i
  13624. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13625. _mm512_castsi128_si512 (__m128i __A)
  13626. {
  13627. return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
  13628. }
  13629. extern __inline __m512d
  13630. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13631. _mm512_castpd256_pd512 (__m256d __A)
  13632. {
  13633. return __builtin_ia32_pd512_256pd (__A);
  13634. }
  13635. extern __inline __m512
  13636. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13637. _mm512_castps256_ps512 (__m256 __A)
  13638. {
  13639. return __builtin_ia32_ps512_256ps (__A);
  13640. }
  13641. extern __inline __m512i
  13642. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13643. _mm512_castsi256_si512 (__m256i __A)
  13644. {
  13645. return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
  13646. }
  13647. extern __inline __mmask16
  13648. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13649. _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
  13650. {
  13651. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
  13652. (__v16si) __B, 0,
  13653. (__mmask16) -1);
  13654. }
  13655. extern __inline __mmask16
  13656. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13657. _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  13658. {
  13659. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
  13660. (__v16si) __B, 0, __U);
  13661. }
  13662. extern __inline __mmask8
  13663. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13664. _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  13665. {
  13666. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
  13667. (__v8di) __B, 0, __U);
  13668. }
  13669. extern __inline __mmask8
  13670. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13671. _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
  13672. {
  13673. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
  13674. (__v8di) __B, 0,
  13675. (__mmask8) -1);
  13676. }
  13677. extern __inline __mmask16
  13678. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13679. _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
  13680. {
  13681. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
  13682. (__v16si) __B, 6,
  13683. (__mmask16) -1);
  13684. }
  13685. extern __inline __mmask16
  13686. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13687. _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  13688. {
  13689. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
  13690. (__v16si) __B, 6, __U);
  13691. }
  13692. extern __inline __mmask8
  13693. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13694. _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  13695. {
  13696. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
  13697. (__v8di) __B, 6, __U);
  13698. }
  13699. extern __inline __mmask8
  13700. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13701. _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
  13702. {
  13703. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
  13704. (__v8di) __B, 6,
  13705. (__mmask8) -1);
  13706. }
  13707. #undef __MM512_REDUCE_OP
  13708. #define __MM512_REDUCE_OP(op) \
  13709. __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
  13710. __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
  13711. __m256i __T3 = (__m256i) (__T1 op __T2); \
  13712. __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
  13713. __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
  13714. __v4si __T6 = __T4 op __T5; \
  13715. __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
  13716. __v4si __T8 = __T6 op __T7; \
  13717. return __T8[0] op __T8[1]
  13718. extern __inline int
  13719. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13720. _mm512_reduce_add_epi32 (__m512i __A)
  13721. {
  13722. __MM512_REDUCE_OP (+);
  13723. }
  13724. extern __inline int
  13725. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13726. _mm512_reduce_mul_epi32 (__m512i __A)
  13727. {
  13728. __MM512_REDUCE_OP (*);
  13729. }
  13730. extern __inline int
  13731. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13732. _mm512_reduce_and_epi32 (__m512i __A)
  13733. {
  13734. __MM512_REDUCE_OP (&);
  13735. }
  13736. extern __inline int
  13737. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13738. _mm512_reduce_or_epi32 (__m512i __A)
  13739. {
  13740. __MM512_REDUCE_OP (|);
  13741. }
  13742. extern __inline int
  13743. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13744. _mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
  13745. {
  13746. __A = _mm512_maskz_mov_epi32 (__U, __A);
  13747. __MM512_REDUCE_OP (+);
  13748. }
  13749. extern __inline int
  13750. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13751. _mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
  13752. {
  13753. __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
  13754. __MM512_REDUCE_OP (*);
  13755. }
  13756. extern __inline int
  13757. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13758. _mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
  13759. {
  13760. __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
  13761. __MM512_REDUCE_OP (&);
  13762. }
  13763. extern __inline int
  13764. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13765. _mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
  13766. {
  13767. __A = _mm512_maskz_mov_epi32 (__U, __A);
  13768. __MM512_REDUCE_OP (|);
  13769. }
  13770. #undef __MM512_REDUCE_OP
  13771. #define __MM512_REDUCE_OP(op) \
  13772. __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
  13773. __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
  13774. __m256i __T3 = _mm256_##op (__T1, __T2); \
  13775. __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
  13776. __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
  13777. __m128i __T6 = _mm_##op (__T4, __T5); \
  13778. __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
  13779. (__v4si) { 2, 3, 0, 1 }); \
  13780. __m128i __T8 = _mm_##op (__T6, __T7); \
  13781. __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
  13782. (__v4si) { 1, 0, 1, 0 }); \
  13783. __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
  13784. return __T10[0]
  13785. extern __inline int
  13786. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13787. _mm512_reduce_min_epi32 (__m512i __A)
  13788. {
  13789. __MM512_REDUCE_OP (min_epi32);
  13790. }
  13791. extern __inline int
  13792. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13793. _mm512_reduce_max_epi32 (__m512i __A)
  13794. {
  13795. __MM512_REDUCE_OP (max_epi32);
  13796. }
  13797. extern __inline unsigned int
  13798. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13799. _mm512_reduce_min_epu32 (__m512i __A)
  13800. {
  13801. __MM512_REDUCE_OP (min_epu32);
  13802. }
  13803. extern __inline unsigned int
  13804. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13805. _mm512_reduce_max_epu32 (__m512i __A)
  13806. {
  13807. __MM512_REDUCE_OP (max_epu32);
  13808. }
  13809. extern __inline int
  13810. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13811. _mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
  13812. {
  13813. __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
  13814. __MM512_REDUCE_OP (min_epi32);
  13815. }
  13816. extern __inline int
  13817. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13818. _mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
  13819. {
  13820. __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
  13821. __MM512_REDUCE_OP (max_epi32);
  13822. }
  13823. extern __inline unsigned int
  13824. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13825. _mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
  13826. {
  13827. __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
  13828. __MM512_REDUCE_OP (min_epu32);
  13829. }
  13830. extern __inline unsigned int
  13831. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13832. _mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
  13833. {
  13834. __A = _mm512_maskz_mov_epi32 (__U, __A);
  13835. __MM512_REDUCE_OP (max_epu32);
  13836. }
  13837. #undef __MM512_REDUCE_OP
  13838. #define __MM512_REDUCE_OP(op) \
  13839. __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
  13840. __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
  13841. __m256 __T3 = __T1 op __T2; \
  13842. __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
  13843. __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
  13844. __m128 __T6 = __T4 op __T5; \
  13845. __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
  13846. __m128 __T8 = __T6 op __T7; \
  13847. return __T8[0] op __T8[1]
  13848. extern __inline float
  13849. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13850. _mm512_reduce_add_ps (__m512 __A)
  13851. {
  13852. __MM512_REDUCE_OP (+);
  13853. }
  13854. extern __inline float
  13855. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13856. _mm512_reduce_mul_ps (__m512 __A)
  13857. {
  13858. __MM512_REDUCE_OP (*);
  13859. }
  13860. extern __inline float
  13861. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13862. _mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
  13863. {
  13864. __A = _mm512_maskz_mov_ps (__U, __A);
  13865. __MM512_REDUCE_OP (+);
  13866. }
  13867. extern __inline float
  13868. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13869. _mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
  13870. {
  13871. __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
  13872. __MM512_REDUCE_OP (*);
  13873. }
  13874. #undef __MM512_REDUCE_OP
  13875. #define __MM512_REDUCE_OP(op) \
  13876. __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
  13877. __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
  13878. __m256 __T3 = _mm256_##op (__T1, __T2); \
  13879. __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
  13880. __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
  13881. __m128 __T6 = _mm_##op (__T4, __T5); \
  13882. __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
  13883. __m128 __T8 = _mm_##op (__T6, __T7); \
  13884. __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
  13885. __m128 __T10 = _mm_##op (__T8, __T9); \
  13886. return __T10[0]
  13887. extern __inline float
  13888. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13889. _mm512_reduce_min_ps (__m512 __A)
  13890. {
  13891. __MM512_REDUCE_OP (min_ps);
  13892. }
  13893. extern __inline float
  13894. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13895. _mm512_reduce_max_ps (__m512 __A)
  13896. {
  13897. __MM512_REDUCE_OP (max_ps);
  13898. }
  13899. extern __inline float
  13900. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13901. _mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
  13902. {
  13903. __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
  13904. __MM512_REDUCE_OP (min_ps);
  13905. }
  13906. extern __inline float
  13907. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13908. _mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
  13909. {
  13910. __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
  13911. __MM512_REDUCE_OP (max_ps);
  13912. }
  13913. #undef __MM512_REDUCE_OP
  13914. #define __MM512_REDUCE_OP(op) \
  13915. __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
  13916. __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
  13917. __m256i __T3 = (__m256i) (__T1 op __T2); \
  13918. __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
  13919. __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
  13920. __v2di __T6 = __T4 op __T5; \
  13921. return __T6[0] op __T6[1]
  13922. extern __inline long long
  13923. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13924. _mm512_reduce_add_epi64 (__m512i __A)
  13925. {
  13926. __MM512_REDUCE_OP (+);
  13927. }
  13928. extern __inline long long
  13929. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13930. _mm512_reduce_mul_epi64 (__m512i __A)
  13931. {
  13932. __MM512_REDUCE_OP (*);
  13933. }
  13934. extern __inline long long
  13935. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13936. _mm512_reduce_and_epi64 (__m512i __A)
  13937. {
  13938. __MM512_REDUCE_OP (&);
  13939. }
  13940. extern __inline long long
  13941. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13942. _mm512_reduce_or_epi64 (__m512i __A)
  13943. {
  13944. __MM512_REDUCE_OP (|);
  13945. }
  13946. extern __inline long long
  13947. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13948. _mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
  13949. {
  13950. __A = _mm512_maskz_mov_epi64 (__U, __A);
  13951. __MM512_REDUCE_OP (+);
  13952. }
  13953. extern __inline long long
  13954. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13955. _mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
  13956. {
  13957. __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
  13958. __MM512_REDUCE_OP (*);
  13959. }
  13960. extern __inline long long
  13961. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13962. _mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
  13963. {
  13964. __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
  13965. __MM512_REDUCE_OP (&);
  13966. }
  13967. extern __inline long long
  13968. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13969. _mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
  13970. {
  13971. __A = _mm512_maskz_mov_epi64 (__U, __A);
  13972. __MM512_REDUCE_OP (|);
  13973. }
  13974. #undef __MM512_REDUCE_OP
  13975. #define __MM512_REDUCE_OP(op) \
  13976. __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
  13977. __m512i __T2 = _mm512_##op (__A, __T1); \
  13978. __m512i __T3 \
  13979. = (__m512i) __builtin_shuffle ((__v8di) __T2, \
  13980. (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
  13981. __m512i __T4 = _mm512_##op (__T2, __T3); \
  13982. __m512i __T5 \
  13983. = (__m512i) __builtin_shuffle ((__v8di) __T4, \
  13984. (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
  13985. __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
  13986. return __T6[0]
  13987. extern __inline long long
  13988. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13989. _mm512_reduce_min_epi64 (__m512i __A)
  13990. {
  13991. __MM512_REDUCE_OP (min_epi64);
  13992. }
  13993. extern __inline long long
  13994. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13995. _mm512_reduce_max_epi64 (__m512i __A)
  13996. {
  13997. __MM512_REDUCE_OP (max_epi64);
  13998. }
  13999. extern __inline long long
  14000. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14001. _mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
  14002. {
  14003. __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
  14004. __U, __A);
  14005. __MM512_REDUCE_OP (min_epi64);
  14006. }
  14007. extern __inline long long
  14008. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14009. _mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
  14010. {
  14011. __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
  14012. __U, __A);
  14013. __MM512_REDUCE_OP (max_epi64);
  14014. }
  14015. extern __inline unsigned long long
  14016. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14017. _mm512_reduce_min_epu64 (__m512i __A)
  14018. {
  14019. __MM512_REDUCE_OP (min_epu64);
  14020. }
  14021. extern __inline unsigned long long
  14022. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14023. _mm512_reduce_max_epu64 (__m512i __A)
  14024. {
  14025. __MM512_REDUCE_OP (max_epu64);
  14026. }
  14027. extern __inline unsigned long long
  14028. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14029. _mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
  14030. {
  14031. __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
  14032. __MM512_REDUCE_OP (min_epu64);
  14033. }
  14034. extern __inline unsigned long long
  14035. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14036. _mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
  14037. {
  14038. __A = _mm512_maskz_mov_epi64 (__U, __A);
  14039. __MM512_REDUCE_OP (max_epu64);
  14040. }
  14041. #undef __MM512_REDUCE_OP
  14042. #define __MM512_REDUCE_OP(op) \
  14043. __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
  14044. __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
  14045. __m256d __T3 = __T1 op __T2; \
  14046. __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
  14047. __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
  14048. __m128d __T6 = __T4 op __T5; \
  14049. return __T6[0] op __T6[1]
  14050. extern __inline double
  14051. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14052. _mm512_reduce_add_pd (__m512d __A)
  14053. {
  14054. __MM512_REDUCE_OP (+);
  14055. }
  14056. extern __inline double
  14057. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14058. _mm512_reduce_mul_pd (__m512d __A)
  14059. {
  14060. __MM512_REDUCE_OP (*);
  14061. }
  14062. extern __inline double
  14063. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14064. _mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
  14065. {
  14066. __A = _mm512_maskz_mov_pd (__U, __A);
  14067. __MM512_REDUCE_OP (+);
  14068. }
  14069. extern __inline double
  14070. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14071. _mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
  14072. {
  14073. __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
  14074. __MM512_REDUCE_OP (*);
  14075. }
  14076. #undef __MM512_REDUCE_OP
  14077. #define __MM512_REDUCE_OP(op) \
  14078. __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
  14079. __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
  14080. __m256d __T3 = _mm256_##op (__T1, __T2); \
  14081. __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
  14082. __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
  14083. __m128d __T6 = _mm_##op (__T4, __T5); \
  14084. __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
  14085. __m128d __T8 = _mm_##op (__T6, __T7); \
  14086. return __T8[0]
  14087. extern __inline double
  14088. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14089. _mm512_reduce_min_pd (__m512d __A)
  14090. {
  14091. __MM512_REDUCE_OP (min_pd);
  14092. }
  14093. extern __inline double
  14094. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14095. _mm512_reduce_max_pd (__m512d __A)
  14096. {
  14097. __MM512_REDUCE_OP (max_pd);
  14098. }
  14099. extern __inline double
  14100. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14101. _mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
  14102. {
  14103. __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
  14104. __MM512_REDUCE_OP (min_pd);
  14105. }
  14106. extern __inline double
  14107. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14108. _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
  14109. {
  14110. __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
  14111. __MM512_REDUCE_OP (max_pd);
  14112. }
  14113. #undef __MM512_REDUCE_OP
  14114. #ifdef __DISABLE_AVX512F__
  14115. #undef __DISABLE_AVX512F__
  14116. #pragma GCC pop_options
  14117. #endif /* __DISABLE_AVX512F__ */
  14118. #endif /* _AVX512FINTRIN_H_INCLUDED */