avx512fintrin.h 513 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474
  1. /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
  2. This file is part of GCC.
  3. GCC is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. GCC is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. Under Section 7 of GPL version 3, you are granted additional
  12. permissions described in the GCC Runtime Library Exception, version
  13. 3.1, as published by the Free Software Foundation.
  14. You should have received a copy of the GNU General Public License and
  15. a copy of the GCC Runtime Library Exception along with this program;
  16. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  17. <http://www.gnu.org/licenses/>. */
  18. #ifndef _IMMINTRIN_H_INCLUDED
  19. #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
  20. #endif
  21. #ifndef _AVX512FINTRIN_H_INCLUDED
  22. #define _AVX512FINTRIN_H_INCLUDED
  23. #ifndef __AVX512F__
  24. #pragma GCC push_options
  25. #pragma GCC target("avx512f")
  26. #define __DISABLE_AVX512F__
  27. #endif /* __AVX512F__ */
  28. /* Internal data types for implementing the intrinsics. */
  29. typedef double __v8df __attribute__ ((__vector_size__ (64)));
  30. typedef float __v16sf __attribute__ ((__vector_size__ (64)));
  31. typedef long long __v8di __attribute__ ((__vector_size__ (64)));
  32. typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
  33. typedef int __v16si __attribute__ ((__vector_size__ (64)));
  34. typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
  35. typedef short __v32hi __attribute__ ((__vector_size__ (64)));
  36. typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
  37. typedef char __v64qi __attribute__ ((__vector_size__ (64)));
  38. typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
  39. /* The Intel API is flexible enough that we must allow aliasing with other
  40. vector types, and their scalar components. */
  41. typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
  42. typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
  43. typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
  44. /* Unaligned version of the same type. */
  45. typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
  46. typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
  47. typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
  48. typedef unsigned char __mmask8;
  49. typedef unsigned short __mmask16;
  50. extern __inline __mmask16
  51. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  52. _mm512_int2mask (int __M)
  53. {
  54. return (__mmask16) __M;
  55. }
  56. extern __inline int
  57. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  58. _mm512_mask2int (__mmask16 __M)
  59. {
  60. return (int) __M;
  61. }
  62. extern __inline __m512i
  63. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  64. _mm512_set_epi64 (long long __A, long long __B, long long __C,
  65. long long __D, long long __E, long long __F,
  66. long long __G, long long __H)
  67. {
  68. return __extension__ (__m512i) (__v8di)
  69. { __H, __G, __F, __E, __D, __C, __B, __A };
  70. }
  71. /* Create the vector [A B C D E F G H I J K L M N O P]. */
  72. extern __inline __m512i
  73. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  74. _mm512_set_epi32 (int __A, int __B, int __C, int __D,
  75. int __E, int __F, int __G, int __H,
  76. int __I, int __J, int __K, int __L,
  77. int __M, int __N, int __O, int __P)
  78. {
  79. return __extension__ (__m512i)(__v16si)
  80. { __P, __O, __N, __M, __L, __K, __J, __I,
  81. __H, __G, __F, __E, __D, __C, __B, __A };
  82. }
  83. extern __inline __m512i
  84. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  85. _mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28,
  86. short __q27, short __q26, short __q25, short __q24,
  87. short __q23, short __q22, short __q21, short __q20,
  88. short __q19, short __q18, short __q17, short __q16,
  89. short __q15, short __q14, short __q13, short __q12,
  90. short __q11, short __q10, short __q09, short __q08,
  91. short __q07, short __q06, short __q05, short __q04,
  92. short __q03, short __q02, short __q01, short __q00)
  93. {
  94. return __extension__ (__m512i)(__v32hi){
  95. __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
  96. __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
  97. __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
  98. __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
  99. };
  100. }
  101. extern __inline __m512i
  102. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  103. _mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60,
  104. char __q59, char __q58, char __q57, char __q56,
  105. char __q55, char __q54, char __q53, char __q52,
  106. char __q51, char __q50, char __q49, char __q48,
  107. char __q47, char __q46, char __q45, char __q44,
  108. char __q43, char __q42, char __q41, char __q40,
  109. char __q39, char __q38, char __q37, char __q36,
  110. char __q35, char __q34, char __q33, char __q32,
  111. char __q31, char __q30, char __q29, char __q28,
  112. char __q27, char __q26, char __q25, char __q24,
  113. char __q23, char __q22, char __q21, char __q20,
  114. char __q19, char __q18, char __q17, char __q16,
  115. char __q15, char __q14, char __q13, char __q12,
  116. char __q11, char __q10, char __q09, char __q08,
  117. char __q07, char __q06, char __q05, char __q04,
  118. char __q03, char __q02, char __q01, char __q00)
  119. {
  120. return __extension__ (__m512i)(__v64qi){
  121. __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
  122. __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
  123. __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
  124. __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31,
  125. __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39,
  126. __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47,
  127. __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55,
  128. __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63
  129. };
  130. }
  131. extern __inline __m512d
  132. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  133. _mm512_set_pd (double __A, double __B, double __C, double __D,
  134. double __E, double __F, double __G, double __H)
  135. {
  136. return __extension__ (__m512d)
  137. { __H, __G, __F, __E, __D, __C, __B, __A };
  138. }
  139. extern __inline __m512
  140. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  141. _mm512_set_ps (float __A, float __B, float __C, float __D,
  142. float __E, float __F, float __G, float __H,
  143. float __I, float __J, float __K, float __L,
  144. float __M, float __N, float __O, float __P)
  145. {
  146. return __extension__ (__m512)
  147. { __P, __O, __N, __M, __L, __K, __J, __I,
  148. __H, __G, __F, __E, __D, __C, __B, __A };
  149. }
  150. #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
  151. _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
  152. #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
  153. e8,e9,e10,e11,e12,e13,e14,e15) \
  154. _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
  155. #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
  156. _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
  157. #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
  158. _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
  159. extern __inline __m512
  160. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  161. _mm512_undefined_ps (void)
  162. {
  163. __m512 __Y = __Y;
  164. return __Y;
  165. }
  166. #define _mm512_undefined _mm512_undefined_ps
  167. extern __inline __m512d
  168. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  169. _mm512_undefined_pd (void)
  170. {
  171. __m512d __Y = __Y;
  172. return __Y;
  173. }
  174. extern __inline __m512i
  175. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  176. _mm512_undefined_epi32 (void)
  177. {
  178. __m512i __Y = __Y;
  179. return __Y;
  180. }
  181. #define _mm512_undefined_si512 _mm512_undefined_epi32
  182. extern __inline __m512i
  183. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  184. _mm512_set1_epi8 (char __A)
  185. {
  186. return __extension__ (__m512i)(__v64qi)
  187. { __A, __A, __A, __A, __A, __A, __A, __A,
  188. __A, __A, __A, __A, __A, __A, __A, __A,
  189. __A, __A, __A, __A, __A, __A, __A, __A,
  190. __A, __A, __A, __A, __A, __A, __A, __A,
  191. __A, __A, __A, __A, __A, __A, __A, __A,
  192. __A, __A, __A, __A, __A, __A, __A, __A,
  193. __A, __A, __A, __A, __A, __A, __A, __A,
  194. __A, __A, __A, __A, __A, __A, __A, __A };
  195. }
  196. extern __inline __m512i
  197. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  198. _mm512_set1_epi16 (short __A)
  199. {
  200. return __extension__ (__m512i)(__v32hi)
  201. { __A, __A, __A, __A, __A, __A, __A, __A,
  202. __A, __A, __A, __A, __A, __A, __A, __A,
  203. __A, __A, __A, __A, __A, __A, __A, __A,
  204. __A, __A, __A, __A, __A, __A, __A, __A };
  205. }
  206. extern __inline __m512d
  207. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  208. _mm512_set1_pd (double __A)
  209. {
  210. return __extension__ (__m512d)(__v8df)
  211. { __A, __A, __A, __A, __A, __A, __A, __A };
  212. }
  213. extern __inline __m512
  214. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  215. _mm512_set1_ps (float __A)
  216. {
  217. return __extension__ (__m512)(__v16sf)
  218. { __A, __A, __A, __A, __A, __A, __A, __A,
  219. __A, __A, __A, __A, __A, __A, __A, __A };
  220. }
  221. /* Create the vector [A B C D A B C D A B C D A B C D]. */
  222. extern __inline __m512i
  223. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  224. _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
  225. {
  226. return __extension__ (__m512i)(__v16si)
  227. { __D, __C, __B, __A, __D, __C, __B, __A,
  228. __D, __C, __B, __A, __D, __C, __B, __A };
  229. }
  230. extern __inline __m512i
  231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  232. _mm512_set4_epi64 (long long __A, long long __B, long long __C,
  233. long long __D)
  234. {
  235. return __extension__ (__m512i) (__v8di)
  236. { __D, __C, __B, __A, __D, __C, __B, __A };
  237. }
  238. extern __inline __m512d
  239. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  240. _mm512_set4_pd (double __A, double __B, double __C, double __D)
  241. {
  242. return __extension__ (__m512d)
  243. { __D, __C, __B, __A, __D, __C, __B, __A };
  244. }
  245. extern __inline __m512
  246. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  247. _mm512_set4_ps (float __A, float __B, float __C, float __D)
  248. {
  249. return __extension__ (__m512)
  250. { __D, __C, __B, __A, __D, __C, __B, __A,
  251. __D, __C, __B, __A, __D, __C, __B, __A };
  252. }
  253. #define _mm512_setr4_epi64(e0,e1,e2,e3) \
  254. _mm512_set4_epi64(e3,e2,e1,e0)
  255. #define _mm512_setr4_epi32(e0,e1,e2,e3) \
  256. _mm512_set4_epi32(e3,e2,e1,e0)
  257. #define _mm512_setr4_pd(e0,e1,e2,e3) \
  258. _mm512_set4_pd(e3,e2,e1,e0)
  259. #define _mm512_setr4_ps(e0,e1,e2,e3) \
  260. _mm512_set4_ps(e3,e2,e1,e0)
  261. extern __inline __m512
  262. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  263. _mm512_setzero_ps (void)
  264. {
  265. return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
  266. 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
  267. }
  268. extern __inline __m512
  269. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  270. _mm512_setzero (void)
  271. {
  272. return _mm512_setzero_ps ();
  273. }
  274. extern __inline __m512d
  275. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  276. _mm512_setzero_pd (void)
  277. {
  278. return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
  279. }
  280. extern __inline __m512i
  281. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  282. _mm512_setzero_epi32 (void)
  283. {
  284. return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
  285. }
  286. extern __inline __m512i
  287. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  288. _mm512_setzero_si512 (void)
  289. {
  290. return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
  291. }
  292. extern __inline __m512d
  293. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  294. _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
  295. {
  296. return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
  297. (__v8df) __W,
  298. (__mmask8) __U);
  299. }
  300. extern __inline __m512d
  301. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  302. _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
  303. {
  304. return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
  305. (__v8df)
  306. _mm512_setzero_pd (),
  307. (__mmask8) __U);
  308. }
  309. extern __inline __m512
  310. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  311. _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
  312. {
  313. return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
  314. (__v16sf) __W,
  315. (__mmask16) __U);
  316. }
  317. extern __inline __m512
  318. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  319. _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
  320. {
  321. return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
  322. (__v16sf)
  323. _mm512_setzero_ps (),
  324. (__mmask16) __U);
  325. }
  326. extern __inline __m512d
  327. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  328. _mm512_load_pd (void const *__P)
  329. {
  330. return *(__m512d *) __P;
  331. }
  332. extern __inline __m512d
  333. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  334. _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
  335. {
  336. return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
  337. (__v8df) __W,
  338. (__mmask8) __U);
  339. }
  340. extern __inline __m512d
  341. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  342. _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
  343. {
  344. return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
  345. (__v8df)
  346. _mm512_setzero_pd (),
  347. (__mmask8) __U);
  348. }
  349. extern __inline void
  350. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  351. _mm512_store_pd (void *__P, __m512d __A)
  352. {
  353. *(__m512d *) __P = __A;
  354. }
  355. extern __inline void
  356. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  357. _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
  358. {
  359. __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
  360. (__mmask8) __U);
  361. }
  362. extern __inline __m512
  363. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  364. _mm512_load_ps (void const *__P)
  365. {
  366. return *(__m512 *) __P;
  367. }
  368. extern __inline __m512
  369. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  370. _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
  371. {
  372. return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
  373. (__v16sf) __W,
  374. (__mmask16) __U);
  375. }
  376. extern __inline __m512
  377. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  378. _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
  379. {
  380. return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
  381. (__v16sf)
  382. _mm512_setzero_ps (),
  383. (__mmask16) __U);
  384. }
  385. extern __inline void
  386. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  387. _mm512_store_ps (void *__P, __m512 __A)
  388. {
  389. *(__m512 *) __P = __A;
  390. }
  391. extern __inline void
  392. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  393. _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
  394. {
  395. __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
  396. (__mmask16) __U);
  397. }
  398. extern __inline __m512i
  399. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  400. _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
  401. {
  402. return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
  403. (__v8di) __W,
  404. (__mmask8) __U);
  405. }
  406. extern __inline __m512i
  407. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  408. _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
  409. {
  410. return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
  411. (__v8di)
  412. _mm512_setzero_si512 (),
  413. (__mmask8) __U);
  414. }
  415. extern __inline __m512i
  416. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  417. _mm512_load_epi64 (void const *__P)
  418. {
  419. return *(__m512i *) __P;
  420. }
  421. extern __inline __m512i
  422. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  423. _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
  424. {
  425. return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
  426. (__v8di) __W,
  427. (__mmask8) __U);
  428. }
  429. extern __inline __m512i
  430. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  431. _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
  432. {
  433. return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
  434. (__v8di)
  435. _mm512_setzero_si512 (),
  436. (__mmask8) __U);
  437. }
  438. extern __inline void
  439. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  440. _mm512_store_epi64 (void *__P, __m512i __A)
  441. {
  442. *(__m512i *) __P = __A;
  443. }
  444. extern __inline void
  445. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  446. _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
  447. {
  448. __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
  449. (__mmask8) __U);
  450. }
  451. extern __inline __m512i
  452. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  453. _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
  454. {
  455. return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
  456. (__v16si) __W,
  457. (__mmask16) __U);
  458. }
  459. extern __inline __m512i
  460. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  461. _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
  462. {
  463. return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
  464. (__v16si)
  465. _mm512_setzero_si512 (),
  466. (__mmask16) __U);
  467. }
  468. extern __inline __m512i
  469. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  470. _mm512_load_si512 (void const *__P)
  471. {
  472. return *(__m512i *) __P;
  473. }
  474. extern __inline __m512i
  475. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  476. _mm512_load_epi32 (void const *__P)
  477. {
  478. return *(__m512i *) __P;
  479. }
  480. extern __inline __m512i
  481. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  482. _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
  483. {
  484. return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
  485. (__v16si) __W,
  486. (__mmask16) __U);
  487. }
  488. extern __inline __m512i
  489. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  490. _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
  491. {
  492. return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
  493. (__v16si)
  494. _mm512_setzero_si512 (),
  495. (__mmask16) __U);
  496. }
  497. extern __inline void
  498. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  499. _mm512_store_si512 (void *__P, __m512i __A)
  500. {
  501. *(__m512i *) __P = __A;
  502. }
  503. extern __inline void
  504. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  505. _mm512_store_epi32 (void *__P, __m512i __A)
  506. {
  507. *(__m512i *) __P = __A;
  508. }
  509. extern __inline void
  510. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  511. _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
  512. {
  513. __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
  514. (__mmask16) __U);
  515. }
  516. extern __inline __m512i
  517. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  518. _mm512_mullo_epi32 (__m512i __A, __m512i __B)
  519. {
  520. return (__m512i) ((__v16su) __A * (__v16su) __B);
  521. }
  522. extern __inline __m512i
  523. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  524. _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
  525. {
  526. return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
  527. (__v16si) __B,
  528. (__v16si)
  529. _mm512_setzero_si512 (),
  530. __M);
  531. }
  532. extern __inline __m512i
  533. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  534. _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
  535. {
  536. return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
  537. (__v16si) __B,
  538. (__v16si) __W, __M);
  539. }
  540. extern __inline __m512i
  541. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  542. _mm512_mullox_epi64 (__m512i __A, __m512i __B)
  543. {
  544. return (__m512i) ((__v8du) __A * (__v8du) __B);
  545. }
  546. extern __inline __m512i
  547. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  548. _mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
  549. {
  550. return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
  551. }
  552. extern __inline __m512i
  553. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  554. _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
  555. {
  556. return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
  557. (__v16si) __Y,
  558. (__v16si)
  559. _mm512_undefined_epi32 (),
  560. (__mmask16) -1);
  561. }
  562. extern __inline __m512i
  563. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  564. _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
  565. {
  566. return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
  567. (__v16si) __Y,
  568. (__v16si) __W,
  569. (__mmask16) __U);
  570. }
  571. extern __inline __m512i
  572. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  573. _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
  574. {
  575. return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
  576. (__v16si) __Y,
  577. (__v16si)
  578. _mm512_setzero_si512 (),
  579. (__mmask16) __U);
  580. }
  581. extern __inline __m512i
  582. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  583. _mm512_srav_epi32 (__m512i __X, __m512i __Y)
  584. {
  585. return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
  586. (__v16si) __Y,
  587. (__v16si)
  588. _mm512_undefined_epi32 (),
  589. (__mmask16) -1);
  590. }
  591. extern __inline __m512i
  592. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  593. _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
  594. {
  595. return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
  596. (__v16si) __Y,
  597. (__v16si) __W,
  598. (__mmask16) __U);
  599. }
  600. extern __inline __m512i
  601. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  602. _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
  603. {
  604. return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
  605. (__v16si) __Y,
  606. (__v16si)
  607. _mm512_setzero_si512 (),
  608. (__mmask16) __U);
  609. }
  610. extern __inline __m512i
  611. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  612. _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
  613. {
  614. return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
  615. (__v16si) __Y,
  616. (__v16si)
  617. _mm512_undefined_epi32 (),
  618. (__mmask16) -1);
  619. }
  620. extern __inline __m512i
  621. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  622. _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
  623. {
  624. return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
  625. (__v16si) __Y,
  626. (__v16si) __W,
  627. (__mmask16) __U);
  628. }
  629. extern __inline __m512i
  630. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  631. _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
  632. {
  633. return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
  634. (__v16si) __Y,
  635. (__v16si)
  636. _mm512_setzero_si512 (),
  637. (__mmask16) __U);
  638. }
  639. extern __inline __m512i
  640. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  641. _mm512_add_epi64 (__m512i __A, __m512i __B)
  642. {
  643. return (__m512i) ((__v8du) __A + (__v8du) __B);
  644. }
  645. extern __inline __m512i
  646. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  647. _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  648. {
  649. return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
  650. (__v8di) __B,
  651. (__v8di) __W,
  652. (__mmask8) __U);
  653. }
  654. extern __inline __m512i
  655. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  656. _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  657. {
  658. return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
  659. (__v8di) __B,
  660. (__v8di)
  661. _mm512_setzero_si512 (),
  662. (__mmask8) __U);
  663. }
  664. extern __inline __m512i
  665. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  666. _mm512_sub_epi64 (__m512i __A, __m512i __B)
  667. {
  668. return (__m512i) ((__v8du) __A - (__v8du) __B);
  669. }
  670. extern __inline __m512i
  671. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  672. _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  673. {
  674. return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
  675. (__v8di) __B,
  676. (__v8di) __W,
  677. (__mmask8) __U);
  678. }
  679. extern __inline __m512i
  680. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  681. _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  682. {
  683. return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
  684. (__v8di) __B,
  685. (__v8di)
  686. _mm512_setzero_si512 (),
  687. (__mmask8) __U);
  688. }
  689. extern __inline __m512i
  690. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  691. _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
  692. {
  693. return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
  694. (__v8di) __Y,
  695. (__v8di)
  696. _mm512_undefined_pd (),
  697. (__mmask8) -1);
  698. }
  699. extern __inline __m512i
  700. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  701. _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
  702. {
  703. return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
  704. (__v8di) __Y,
  705. (__v8di) __W,
  706. (__mmask8) __U);
  707. }
  708. extern __inline __m512i
  709. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  710. _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
  711. {
  712. return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
  713. (__v8di) __Y,
  714. (__v8di)
  715. _mm512_setzero_si512 (),
  716. (__mmask8) __U);
  717. }
  718. extern __inline __m512i
  719. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  720. _mm512_srav_epi64 (__m512i __X, __m512i __Y)
  721. {
  722. return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
  723. (__v8di) __Y,
  724. (__v8di)
  725. _mm512_undefined_epi32 (),
  726. (__mmask8) -1);
  727. }
  728. extern __inline __m512i
  729. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  730. _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
  731. {
  732. return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
  733. (__v8di) __Y,
  734. (__v8di) __W,
  735. (__mmask8) __U);
  736. }
  737. extern __inline __m512i
  738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  739. _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
  740. {
  741. return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
  742. (__v8di) __Y,
  743. (__v8di)
  744. _mm512_setzero_si512 (),
  745. (__mmask8) __U);
  746. }
  747. extern __inline __m512i
  748. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  749. _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
  750. {
  751. return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
  752. (__v8di) __Y,
  753. (__v8di)
  754. _mm512_undefined_epi32 (),
  755. (__mmask8) -1);
  756. }
  757. extern __inline __m512i
  758. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  759. _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
  760. {
  761. return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
  762. (__v8di) __Y,
  763. (__v8di) __W,
  764. (__mmask8) __U);
  765. }
  766. extern __inline __m512i
  767. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  768. _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
  769. {
  770. return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
  771. (__v8di) __Y,
  772. (__v8di)
  773. _mm512_setzero_si512 (),
  774. (__mmask8) __U);
  775. }
  776. extern __inline __m512i
  777. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  778. _mm512_add_epi32 (__m512i __A, __m512i __B)
  779. {
  780. return (__m512i) ((__v16su) __A + (__v16su) __B);
  781. }
  782. extern __inline __m512i
  783. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  784. _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  785. {
  786. return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
  787. (__v16si) __B,
  788. (__v16si) __W,
  789. (__mmask16) __U);
  790. }
  791. extern __inline __m512i
  792. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  793. _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  794. {
  795. return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
  796. (__v16si) __B,
  797. (__v16si)
  798. _mm512_setzero_si512 (),
  799. (__mmask16) __U);
  800. }
  801. extern __inline __m512i
  802. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  803. _mm512_mul_epi32 (__m512i __X, __m512i __Y)
  804. {
  805. return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
  806. (__v16si) __Y,
  807. (__v8di)
  808. _mm512_undefined_epi32 (),
  809. (__mmask8) -1);
  810. }
  811. extern __inline __m512i
  812. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  813. _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
  814. {
  815. return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
  816. (__v16si) __Y,
  817. (__v8di) __W, __M);
  818. }
  819. extern __inline __m512i
  820. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  821. _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
  822. {
  823. return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
  824. (__v16si) __Y,
  825. (__v8di)
  826. _mm512_setzero_si512 (),
  827. __M);
  828. }
  829. extern __inline __m512i
  830. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  831. _mm512_sub_epi32 (__m512i __A, __m512i __B)
  832. {
  833. return (__m512i) ((__v16su) __A - (__v16su) __B);
  834. }
  835. extern __inline __m512i
  836. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  837. _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  838. {
  839. return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
  840. (__v16si) __B,
  841. (__v16si) __W,
  842. (__mmask16) __U);
  843. }
  844. extern __inline __m512i
  845. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  846. _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  847. {
  848. return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
  849. (__v16si) __B,
  850. (__v16si)
  851. _mm512_setzero_si512 (),
  852. (__mmask16) __U);
  853. }
  854. extern __inline __m512i
  855. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  856. _mm512_mul_epu32 (__m512i __X, __m512i __Y)
  857. {
  858. return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
  859. (__v16si) __Y,
  860. (__v8di)
  861. _mm512_undefined_epi32 (),
  862. (__mmask8) -1);
  863. }
  864. extern __inline __m512i
  865. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  866. _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
  867. {
  868. return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
  869. (__v16si) __Y,
  870. (__v8di) __W, __M);
  871. }
  872. extern __inline __m512i
  873. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  874. _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
  875. {
  876. return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
  877. (__v16si) __Y,
  878. (__v8di)
  879. _mm512_setzero_si512 (),
  880. __M);
  881. }
  882. #ifdef __OPTIMIZE__
  883. extern __inline __m512i
  884. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  885. _mm512_slli_epi64 (__m512i __A, unsigned int __B)
  886. {
  887. return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
  888. (__v8di)
  889. _mm512_undefined_epi32 (),
  890. (__mmask8) -1);
  891. }
  892. extern __inline __m512i
  893. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  894. _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
  895. unsigned int __B)
  896. {
  897. return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
  898. (__v8di) __W,
  899. (__mmask8) __U);
  900. }
  901. extern __inline __m512i
  902. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  903. _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
  904. {
  905. return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
  906. (__v8di)
  907. _mm512_setzero_si512 (),
  908. (__mmask8) __U);
  909. }
  910. #else
  911. #define _mm512_slli_epi64(X, C) \
  912. ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  913. (__v8di)(__m512i)_mm512_undefined_epi32 (),\
  914. (__mmask8)-1))
  915. #define _mm512_mask_slli_epi64(W, U, X, C) \
  916. ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  917. (__v8di)(__m512i)(W),\
  918. (__mmask8)(U)))
  919. #define _mm512_maskz_slli_epi64(U, X, C) \
  920. ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  921. (__v8di)(__m512i)_mm512_setzero_si512 (),\
  922. (__mmask8)(U)))
  923. #endif
  924. extern __inline __m512i
  925. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  926. _mm512_sll_epi64 (__m512i __A, __m128i __B)
  927. {
  928. return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
  929. (__v2di) __B,
  930. (__v8di)
  931. _mm512_undefined_epi32 (),
  932. (__mmask8) -1);
  933. }
  934. extern __inline __m512i
  935. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  936. _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
  937. {
  938. return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
  939. (__v2di) __B,
  940. (__v8di) __W,
  941. (__mmask8) __U);
  942. }
  943. extern __inline __m512i
  944. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  945. _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
  946. {
  947. return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
  948. (__v2di) __B,
  949. (__v8di)
  950. _mm512_setzero_si512 (),
  951. (__mmask8) __U);
  952. }
  953. #ifdef __OPTIMIZE__
  954. extern __inline __m512i
  955. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  956. _mm512_srli_epi64 (__m512i __A, unsigned int __B)
  957. {
  958. return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
  959. (__v8di)
  960. _mm512_undefined_epi32 (),
  961. (__mmask8) -1);
  962. }
  963. extern __inline __m512i
  964. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  965. _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
  966. __m512i __A, unsigned int __B)
  967. {
  968. return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
  969. (__v8di) __W,
  970. (__mmask8) __U);
  971. }
  972. extern __inline __m512i
  973. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  974. _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
  975. {
  976. return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
  977. (__v8di)
  978. _mm512_setzero_si512 (),
  979. (__mmask8) __U);
  980. }
  981. #else
  982. #define _mm512_srli_epi64(X, C) \
  983. ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  984. (__v8di)(__m512i)_mm512_undefined_epi32 (),\
  985. (__mmask8)-1))
  986. #define _mm512_mask_srli_epi64(W, U, X, C) \
  987. ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  988. (__v8di)(__m512i)(W),\
  989. (__mmask8)(U)))
  990. #define _mm512_maskz_srli_epi64(U, X, C) \
  991. ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  992. (__v8di)(__m512i)_mm512_setzero_si512 (),\
  993. (__mmask8)(U)))
  994. #endif
  995. extern __inline __m512i
  996. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  997. _mm512_srl_epi64 (__m512i __A, __m128i __B)
  998. {
  999. return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
  1000. (__v2di) __B,
  1001. (__v8di)
  1002. _mm512_undefined_epi32 (),
  1003. (__mmask8) -1);
  1004. }
  1005. extern __inline __m512i
  1006. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1007. _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
  1008. {
  1009. return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
  1010. (__v2di) __B,
  1011. (__v8di) __W,
  1012. (__mmask8) __U);
  1013. }
  1014. extern __inline __m512i
  1015. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1016. _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
  1017. {
  1018. return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
  1019. (__v2di) __B,
  1020. (__v8di)
  1021. _mm512_setzero_si512 (),
  1022. (__mmask8) __U);
  1023. }
  1024. #ifdef __OPTIMIZE__
  1025. extern __inline __m512i
  1026. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1027. _mm512_srai_epi64 (__m512i __A, unsigned int __B)
  1028. {
  1029. return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
  1030. (__v8di)
  1031. _mm512_undefined_epi32 (),
  1032. (__mmask8) -1);
  1033. }
  1034. extern __inline __m512i
  1035. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1036. _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
  1037. unsigned int __B)
  1038. {
  1039. return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
  1040. (__v8di) __W,
  1041. (__mmask8) __U);
  1042. }
  1043. extern __inline __m512i
  1044. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1045. _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
  1046. {
  1047. return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
  1048. (__v8di)
  1049. _mm512_setzero_si512 (),
  1050. (__mmask8) __U);
  1051. }
  1052. #else
  1053. #define _mm512_srai_epi64(X, C) \
  1054. ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  1055. (__v8di)(__m512i)_mm512_undefined_epi32 (),\
  1056. (__mmask8)-1))
  1057. #define _mm512_mask_srai_epi64(W, U, X, C) \
  1058. ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  1059. (__v8di)(__m512i)(W),\
  1060. (__mmask8)(U)))
  1061. #define _mm512_maskz_srai_epi64(U, X, C) \
  1062. ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
  1063. (__v8di)(__m512i)_mm512_setzero_si512 (),\
  1064. (__mmask8)(U)))
  1065. #endif
  1066. extern __inline __m512i
  1067. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1068. _mm512_sra_epi64 (__m512i __A, __m128i __B)
  1069. {
  1070. return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
  1071. (__v2di) __B,
  1072. (__v8di)
  1073. _mm512_undefined_epi32 (),
  1074. (__mmask8) -1);
  1075. }
  1076. extern __inline __m512i
  1077. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1078. _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
  1079. {
  1080. return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
  1081. (__v2di) __B,
  1082. (__v8di) __W,
  1083. (__mmask8) __U);
  1084. }
  1085. extern __inline __m512i
  1086. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1087. _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
  1088. {
  1089. return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
  1090. (__v2di) __B,
  1091. (__v8di)
  1092. _mm512_setzero_si512 (),
  1093. (__mmask8) __U);
  1094. }
  1095. #ifdef __OPTIMIZE__
  1096. extern __inline __m512i
  1097. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1098. _mm512_slli_epi32 (__m512i __A, unsigned int __B)
  1099. {
  1100. return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
  1101. (__v16si)
  1102. _mm512_undefined_epi32 (),
  1103. (__mmask16) -1);
  1104. }
  1105. extern __inline __m512i
  1106. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1107. _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  1108. unsigned int __B)
  1109. {
  1110. return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
  1111. (__v16si) __W,
  1112. (__mmask16) __U);
  1113. }
  1114. extern __inline __m512i
  1115. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1116. _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
  1117. {
  1118. return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
  1119. (__v16si)
  1120. _mm512_setzero_si512 (),
  1121. (__mmask16) __U);
  1122. }
  1123. #else
  1124. #define _mm512_slli_epi32(X, C) \
  1125. ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1126. (__v16si)(__m512i)_mm512_undefined_epi32 (),\
  1127. (__mmask16)-1))
  1128. #define _mm512_mask_slli_epi32(W, U, X, C) \
  1129. ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1130. (__v16si)(__m512i)(W),\
  1131. (__mmask16)(U)))
  1132. #define _mm512_maskz_slli_epi32(U, X, C) \
  1133. ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1134. (__v16si)(__m512i)_mm512_setzero_si512 (),\
  1135. (__mmask16)(U)))
  1136. #endif
  1137. extern __inline __m512i
  1138. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1139. _mm512_sll_epi32 (__m512i __A, __m128i __B)
  1140. {
  1141. return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
  1142. (__v4si) __B,
  1143. (__v16si)
  1144. _mm512_undefined_epi32 (),
  1145. (__mmask16) -1);
  1146. }
  1147. extern __inline __m512i
  1148. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1149. _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
  1150. {
  1151. return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
  1152. (__v4si) __B,
  1153. (__v16si) __W,
  1154. (__mmask16) __U);
  1155. }
  1156. extern __inline __m512i
  1157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1158. _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
  1159. {
  1160. return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
  1161. (__v4si) __B,
  1162. (__v16si)
  1163. _mm512_setzero_si512 (),
  1164. (__mmask16) __U);
  1165. }
  1166. #ifdef __OPTIMIZE__
  1167. extern __inline __m512i
  1168. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1169. _mm512_srli_epi32 (__m512i __A, unsigned int __B)
  1170. {
  1171. return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
  1172. (__v16si)
  1173. _mm512_undefined_epi32 (),
  1174. (__mmask16) -1);
  1175. }
  1176. extern __inline __m512i
  1177. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1178. _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
  1179. __m512i __A, unsigned int __B)
  1180. {
  1181. return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
  1182. (__v16si) __W,
  1183. (__mmask16) __U);
  1184. }
  1185. extern __inline __m512i
  1186. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1187. _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
  1188. {
  1189. return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
  1190. (__v16si)
  1191. _mm512_setzero_si512 (),
  1192. (__mmask16) __U);
  1193. }
  1194. #else
  1195. #define _mm512_srli_epi32(X, C) \
  1196. ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1197. (__v16si)(__m512i)_mm512_undefined_epi32 (),\
  1198. (__mmask16)-1))
  1199. #define _mm512_mask_srli_epi32(W, U, X, C) \
  1200. ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1201. (__v16si)(__m512i)(W),\
  1202. (__mmask16)(U)))
  1203. #define _mm512_maskz_srli_epi32(U, X, C) \
  1204. ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1205. (__v16si)(__m512i)_mm512_setzero_si512 (),\
  1206. (__mmask16)(U)))
  1207. #endif
  1208. extern __inline __m512i
  1209. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1210. _mm512_srl_epi32 (__m512i __A, __m128i __B)
  1211. {
  1212. return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
  1213. (__v4si) __B,
  1214. (__v16si)
  1215. _mm512_undefined_epi32 (),
  1216. (__mmask16) -1);
  1217. }
  1218. extern __inline __m512i
  1219. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1220. _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
  1221. {
  1222. return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
  1223. (__v4si) __B,
  1224. (__v16si) __W,
  1225. (__mmask16) __U);
  1226. }
  1227. extern __inline __m512i
  1228. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1229. _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
  1230. {
  1231. return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
  1232. (__v4si) __B,
  1233. (__v16si)
  1234. _mm512_setzero_si512 (),
  1235. (__mmask16) __U);
  1236. }
  1237. #ifdef __OPTIMIZE__
  1238. extern __inline __m512i
  1239. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1240. _mm512_srai_epi32 (__m512i __A, unsigned int __B)
  1241. {
  1242. return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
  1243. (__v16si)
  1244. _mm512_undefined_epi32 (),
  1245. (__mmask16) -1);
  1246. }
  1247. extern __inline __m512i
  1248. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1249. _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  1250. unsigned int __B)
  1251. {
  1252. return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
  1253. (__v16si) __W,
  1254. (__mmask16) __U);
  1255. }
  1256. extern __inline __m512i
  1257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1258. _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
  1259. {
  1260. return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
  1261. (__v16si)
  1262. _mm512_setzero_si512 (),
  1263. (__mmask16) __U);
  1264. }
  1265. #else
  1266. #define _mm512_srai_epi32(X, C) \
  1267. ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1268. (__v16si)(__m512i)_mm512_undefined_epi32 (),\
  1269. (__mmask16)-1))
  1270. #define _mm512_mask_srai_epi32(W, U, X, C) \
  1271. ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1272. (__v16si)(__m512i)(W),\
  1273. (__mmask16)(U)))
  1274. #define _mm512_maskz_srai_epi32(U, X, C) \
  1275. ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
  1276. (__v16si)(__m512i)_mm512_setzero_si512 (),\
  1277. (__mmask16)(U)))
  1278. #endif
  1279. extern __inline __m512i
  1280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1281. _mm512_sra_epi32 (__m512i __A, __m128i __B)
  1282. {
  1283. return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
  1284. (__v4si) __B,
  1285. (__v16si)
  1286. _mm512_undefined_epi32 (),
  1287. (__mmask16) -1);
  1288. }
  1289. extern __inline __m512i
  1290. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1291. _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
  1292. {
  1293. return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
  1294. (__v4si) __B,
  1295. (__v16si) __W,
  1296. (__mmask16) __U);
  1297. }
  1298. extern __inline __m512i
  1299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1300. _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
  1301. {
  1302. return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
  1303. (__v4si) __B,
  1304. (__v16si)
  1305. _mm512_setzero_si512 (),
  1306. (__mmask16) __U);
  1307. }
  1308. #ifdef __OPTIMIZE__
  1309. extern __inline __m128d
  1310. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1311. _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
  1312. {
  1313. return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
  1314. (__v2df) __B,
  1315. __R);
  1316. }
  1317. extern __inline __m128d
  1318. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1319. _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  1320. __m128d __B, const int __R)
  1321. {
  1322. return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
  1323. (__v2df) __B,
  1324. (__v2df) __W,
  1325. (__mmask8) __U, __R);
  1326. }
  1327. extern __inline __m128d
  1328. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1329. _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  1330. const int __R)
  1331. {
  1332. return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
  1333. (__v2df) __B,
  1334. (__v2df)
  1335. _mm_setzero_pd (),
  1336. (__mmask8) __U, __R);
  1337. }
  1338. extern __inline __m128
  1339. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1340. _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
  1341. {
  1342. return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
  1343. (__v4sf) __B,
  1344. __R);
  1345. }
  1346. extern __inline __m128
  1347. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1348. _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  1349. __m128 __B, const int __R)
  1350. {
  1351. return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
  1352. (__v4sf) __B,
  1353. (__v4sf) __W,
  1354. (__mmask8) __U, __R);
  1355. }
  1356. extern __inline __m128
  1357. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1358. _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  1359. const int __R)
  1360. {
  1361. return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
  1362. (__v4sf) __B,
  1363. (__v4sf)
  1364. _mm_setzero_ps (),
  1365. (__mmask8) __U, __R);
  1366. }
  1367. extern __inline __m128d
  1368. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1369. _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
  1370. {
  1371. return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
  1372. (__v2df) __B,
  1373. __R);
  1374. }
  1375. extern __inline __m128d
  1376. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1377. _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  1378. __m128d __B, const int __R)
  1379. {
  1380. return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
  1381. (__v2df) __B,
  1382. (__v2df) __W,
  1383. (__mmask8) __U, __R);
  1384. }
  1385. extern __inline __m128d
  1386. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1387. _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  1388. const int __R)
  1389. {
  1390. return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
  1391. (__v2df) __B,
  1392. (__v2df)
  1393. _mm_setzero_pd (),
  1394. (__mmask8) __U, __R);
  1395. }
  1396. extern __inline __m128
  1397. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1398. _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
  1399. {
  1400. return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
  1401. (__v4sf) __B,
  1402. __R);
  1403. }
  1404. extern __inline __m128
  1405. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1406. _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  1407. __m128 __B, const int __R)
  1408. {
  1409. return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
  1410. (__v4sf) __B,
  1411. (__v4sf) __W,
  1412. (__mmask8) __U, __R);
  1413. }
  1414. extern __inline __m128
  1415. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1416. _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  1417. const int __R)
  1418. {
  1419. return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
  1420. (__v4sf) __B,
  1421. (__v4sf)
  1422. _mm_setzero_ps (),
  1423. (__mmask8) __U, __R);
  1424. }
  1425. #else
  1426. #define _mm_add_round_sd(A, B, C) \
  1427. (__m128d)__builtin_ia32_addsd_round(A, B, C)
  1428. #define _mm_mask_add_round_sd(W, U, A, B, C) \
  1429. (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
  1430. #define _mm_maskz_add_round_sd(U, A, B, C) \
  1431. (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  1432. #define _mm_add_round_ss(A, B, C) \
  1433. (__m128)__builtin_ia32_addss_round(A, B, C)
  1434. #define _mm_mask_add_round_ss(W, U, A, B, C) \
  1435. (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
  1436. #define _mm_maskz_add_round_ss(U, A, B, C) \
  1437. (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  1438. #define _mm_sub_round_sd(A, B, C) \
  1439. (__m128d)__builtin_ia32_subsd_round(A, B, C)
  1440. #define _mm_mask_sub_round_sd(W, U, A, B, C) \
  1441. (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
  1442. #define _mm_maskz_sub_round_sd(U, A, B, C) \
  1443. (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  1444. #define _mm_sub_round_ss(A, B, C) \
  1445. (__m128)__builtin_ia32_subss_round(A, B, C)
  1446. #define _mm_mask_sub_round_ss(W, U, A, B, C) \
  1447. (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
  1448. #define _mm_maskz_sub_round_ss(U, A, B, C) \
  1449. (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  1450. #endif
  1451. /* Constant helper to represent the ternary logic operations among
  1452. vector A, B and C. */
  1453. typedef enum
  1454. {
  1455. _MM_TERNLOG_A = 0xF0,
  1456. _MM_TERNLOG_B = 0xCC,
  1457. _MM_TERNLOG_C = 0xAA
  1458. } _MM_TERNLOG_ENUM;
  1459. #ifdef __OPTIMIZE__
  1460. extern __inline __m512i
  1461. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1462. _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
  1463. const int __imm)
  1464. {
  1465. return (__m512i)
  1466. __builtin_ia32_pternlogq512_mask ((__v8di) __A,
  1467. (__v8di) __B,
  1468. (__v8di) __C,
  1469. (unsigned char) __imm,
  1470. (__mmask8) -1);
  1471. }
  1472. extern __inline __m512i
  1473. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1474. _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
  1475. __m512i __C, const int __imm)
  1476. {
  1477. return (__m512i)
  1478. __builtin_ia32_pternlogq512_mask ((__v8di) __A,
  1479. (__v8di) __B,
  1480. (__v8di) __C,
  1481. (unsigned char) __imm,
  1482. (__mmask8) __U);
  1483. }
  1484. extern __inline __m512i
  1485. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1486. _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
  1487. __m512i __C, const int __imm)
  1488. {
  1489. return (__m512i)
  1490. __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
  1491. (__v8di) __B,
  1492. (__v8di) __C,
  1493. (unsigned char) __imm,
  1494. (__mmask8) __U);
  1495. }
  1496. extern __inline __m512i
  1497. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1498. _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
  1499. const int __imm)
  1500. {
  1501. return (__m512i)
  1502. __builtin_ia32_pternlogd512_mask ((__v16si) __A,
  1503. (__v16si) __B,
  1504. (__v16si) __C,
  1505. (unsigned char) __imm,
  1506. (__mmask16) -1);
  1507. }
  1508. extern __inline __m512i
  1509. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1510. _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
  1511. __m512i __C, const int __imm)
  1512. {
  1513. return (__m512i)
  1514. __builtin_ia32_pternlogd512_mask ((__v16si) __A,
  1515. (__v16si) __B,
  1516. (__v16si) __C,
  1517. (unsigned char) __imm,
  1518. (__mmask16) __U);
  1519. }
  1520. extern __inline __m512i
  1521. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1522. _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
  1523. __m512i __C, const int __imm)
  1524. {
  1525. return (__m512i)
  1526. __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
  1527. (__v16si) __B,
  1528. (__v16si) __C,
  1529. (unsigned char) __imm,
  1530. (__mmask16) __U);
  1531. }
  1532. #else
  1533. #define _mm512_ternarylogic_epi64(A, B, C, I) \
  1534. ((__m512i) \
  1535. __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \
  1536. (__v8di) (__m512i) (B), \
  1537. (__v8di) (__m512i) (C), \
  1538. (unsigned char) (I), \
  1539. (__mmask8) -1))
  1540. #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
  1541. ((__m512i) \
  1542. __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \
  1543. (__v8di) (__m512i) (B), \
  1544. (__v8di) (__m512i) (C), \
  1545. (unsigned char)(I), \
  1546. (__mmask8) (U)))
  1547. #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
  1548. ((__m512i) \
  1549. __builtin_ia32_pternlogq512_maskz ((__v8di) (__m512i) (A), \
  1550. (__v8di) (__m512i) (B), \
  1551. (__v8di) (__m512i) (C), \
  1552. (unsigned char) (I), \
  1553. (__mmask8) (U)))
  1554. #define _mm512_ternarylogic_epi32(A, B, C, I) \
  1555. ((__m512i) \
  1556. __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \
  1557. (__v16si) (__m512i) (B), \
  1558. (__v16si) (__m512i) (C), \
  1559. (unsigned char) (I), \
  1560. (__mmask16) -1))
  1561. #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
  1562. ((__m512i) \
  1563. __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \
  1564. (__v16si) (__m512i) (B), \
  1565. (__v16si) (__m512i) (C), \
  1566. (unsigned char) (I), \
  1567. (__mmask16) (U)))
  1568. #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
  1569. ((__m512i) \
  1570. __builtin_ia32_pternlogd512_maskz ((__v16si) (__m512i) (A), \
  1571. (__v16si) (__m512i) (B), \
  1572. (__v16si) (__m512i) (C), \
  1573. (unsigned char) (I), \
  1574. (__mmask16) (U)))
  1575. #endif
  1576. extern __inline __m512d
  1577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1578. _mm512_rcp14_pd (__m512d __A)
  1579. {
  1580. return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
  1581. (__v8df)
  1582. _mm512_undefined_pd (),
  1583. (__mmask8) -1);
  1584. }
  1585. extern __inline __m512d
  1586. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1587. _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
  1588. {
  1589. return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
  1590. (__v8df) __W,
  1591. (__mmask8) __U);
  1592. }
  1593. extern __inline __m512d
  1594. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1595. _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
  1596. {
  1597. return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
  1598. (__v8df)
  1599. _mm512_setzero_pd (),
  1600. (__mmask8) __U);
  1601. }
  1602. extern __inline __m512
  1603. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1604. _mm512_rcp14_ps (__m512 __A)
  1605. {
  1606. return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
  1607. (__v16sf)
  1608. _mm512_undefined_ps (),
  1609. (__mmask16) -1);
  1610. }
  1611. extern __inline __m512
  1612. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1613. _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
  1614. {
  1615. return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
  1616. (__v16sf) __W,
  1617. (__mmask16) __U);
  1618. }
  1619. extern __inline __m512
  1620. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1621. _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
  1622. {
  1623. return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
  1624. (__v16sf)
  1625. _mm512_setzero_ps (),
  1626. (__mmask16) __U);
  1627. }
  1628. extern __inline __m128d
  1629. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1630. _mm_rcp14_sd (__m128d __A, __m128d __B)
  1631. {
  1632. return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
  1633. (__v2df) __A);
  1634. }
  1635. extern __inline __m128d
  1636. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1637. _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  1638. {
  1639. return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
  1640. (__v2df) __A,
  1641. (__v2df) __W,
  1642. (__mmask8) __U);
  1643. }
  1644. extern __inline __m128d
  1645. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1646. _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
  1647. {
  1648. return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
  1649. (__v2df) __A,
  1650. (__v2df) _mm_setzero_ps (),
  1651. (__mmask8) __U);
  1652. }
  1653. extern __inline __m128
  1654. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1655. _mm_rcp14_ss (__m128 __A, __m128 __B)
  1656. {
  1657. return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
  1658. (__v4sf) __A);
  1659. }
  1660. extern __inline __m128
  1661. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1662. _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  1663. {
  1664. return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
  1665. (__v4sf) __A,
  1666. (__v4sf) __W,
  1667. (__mmask8) __U);
  1668. }
  1669. extern __inline __m128
  1670. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1671. _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
  1672. {
  1673. return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
  1674. (__v4sf) __A,
  1675. (__v4sf) _mm_setzero_ps (),
  1676. (__mmask8) __U);
  1677. }
  1678. extern __inline __m512d
  1679. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1680. _mm512_rsqrt14_pd (__m512d __A)
  1681. {
  1682. return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
  1683. (__v8df)
  1684. _mm512_undefined_pd (),
  1685. (__mmask8) -1);
  1686. }
  1687. extern __inline __m512d
  1688. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1689. _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
  1690. {
  1691. return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
  1692. (__v8df) __W,
  1693. (__mmask8) __U);
  1694. }
  1695. extern __inline __m512d
  1696. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1697. _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
  1698. {
  1699. return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
  1700. (__v8df)
  1701. _mm512_setzero_pd (),
  1702. (__mmask8) __U);
  1703. }
  1704. extern __inline __m512
  1705. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1706. _mm512_rsqrt14_ps (__m512 __A)
  1707. {
  1708. return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
  1709. (__v16sf)
  1710. _mm512_undefined_ps (),
  1711. (__mmask16) -1);
  1712. }
  1713. extern __inline __m512
  1714. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1715. _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
  1716. {
  1717. return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
  1718. (__v16sf) __W,
  1719. (__mmask16) __U);
  1720. }
  1721. extern __inline __m512
  1722. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1723. _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
  1724. {
  1725. return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
  1726. (__v16sf)
  1727. _mm512_setzero_ps (),
  1728. (__mmask16) __U);
  1729. }
  1730. extern __inline __m128d
  1731. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1732. _mm_rsqrt14_sd (__m128d __A, __m128d __B)
  1733. {
  1734. return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
  1735. (__v2df) __A);
  1736. }
  1737. extern __inline __m128d
  1738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1739. _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  1740. {
  1741. return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
  1742. (__v2df) __A,
  1743. (__v2df) __W,
  1744. (__mmask8) __U);
  1745. }
  1746. extern __inline __m128d
  1747. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1748. _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
  1749. {
  1750. return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
  1751. (__v2df) __A,
  1752. (__v2df) _mm_setzero_pd (),
  1753. (__mmask8) __U);
  1754. }
  1755. extern __inline __m128
  1756. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1757. _mm_rsqrt14_ss (__m128 __A, __m128 __B)
  1758. {
  1759. return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
  1760. (__v4sf) __A);
  1761. }
  1762. extern __inline __m128
  1763. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1764. _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  1765. {
  1766. return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
  1767. (__v4sf) __A,
  1768. (__v4sf) __W,
  1769. (__mmask8) __U);
  1770. }
  1771. extern __inline __m128
  1772. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1773. _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
  1774. {
  1775. return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
  1776. (__v4sf) __A,
  1777. (__v4sf) _mm_setzero_ps (),
  1778. (__mmask8) __U);
  1779. }
  1780. #ifdef __OPTIMIZE__
  1781. extern __inline __m512d
  1782. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1783. _mm512_sqrt_round_pd (__m512d __A, const int __R)
  1784. {
  1785. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  1786. (__v8df)
  1787. _mm512_undefined_pd (),
  1788. (__mmask8) -1, __R);
  1789. }
  1790. extern __inline __m512d
  1791. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1792. _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  1793. const int __R)
  1794. {
  1795. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  1796. (__v8df) __W,
  1797. (__mmask8) __U, __R);
  1798. }
  1799. extern __inline __m512d
  1800. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1801. _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
  1802. {
  1803. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  1804. (__v8df)
  1805. _mm512_setzero_pd (),
  1806. (__mmask8) __U, __R);
  1807. }
  1808. extern __inline __m512
  1809. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1810. _mm512_sqrt_round_ps (__m512 __A, const int __R)
  1811. {
  1812. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  1813. (__v16sf)
  1814. _mm512_undefined_ps (),
  1815. (__mmask16) -1, __R);
  1816. }
  1817. extern __inline __m512
  1818. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1819. _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
  1820. {
  1821. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  1822. (__v16sf) __W,
  1823. (__mmask16) __U, __R);
  1824. }
  1825. extern __inline __m512
  1826. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1827. _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
  1828. {
  1829. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  1830. (__v16sf)
  1831. _mm512_setzero_ps (),
  1832. (__mmask16) __U, __R);
  1833. }
  1834. extern __inline __m128d
  1835. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1836. _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
  1837. {
  1838. return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
  1839. (__v2df) __A,
  1840. (__v2df)
  1841. _mm_setzero_pd (),
  1842. (__mmask8) -1, __R);
  1843. }
  1844. extern __inline __m128d
  1845. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1846. _mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  1847. const int __R)
  1848. {
  1849. return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
  1850. (__v2df) __A,
  1851. (__v2df) __W,
  1852. (__mmask8) __U, __R);
  1853. }
  1854. extern __inline __m128d
  1855. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1856. _mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
  1857. {
  1858. return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
  1859. (__v2df) __A,
  1860. (__v2df)
  1861. _mm_setzero_pd (),
  1862. (__mmask8) __U, __R);
  1863. }
  1864. extern __inline __m128
  1865. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1866. _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
  1867. {
  1868. return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
  1869. (__v4sf) __A,
  1870. (__v4sf)
  1871. _mm_setzero_ps (),
  1872. (__mmask8) -1, __R);
  1873. }
  1874. extern __inline __m128
  1875. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1876. _mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  1877. const int __R)
  1878. {
  1879. return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
  1880. (__v4sf) __A,
  1881. (__v4sf) __W,
  1882. (__mmask8) __U, __R);
  1883. }
  1884. extern __inline __m128
  1885. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1886. _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
  1887. {
  1888. return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
  1889. (__v4sf) __A,
  1890. (__v4sf)
  1891. _mm_setzero_ps (),
  1892. (__mmask8) __U, __R);
  1893. }
  1894. #else
  1895. #define _mm512_sqrt_round_pd(A, C) \
  1896. (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
  1897. #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
  1898. (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
  1899. #define _mm512_maskz_sqrt_round_pd(U, A, C) \
  1900. (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
  1901. #define _mm512_sqrt_round_ps(A, C) \
  1902. (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
  1903. #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
  1904. (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
  1905. #define _mm512_maskz_sqrt_round_ps(U, A, C) \
  1906. (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
  1907. #define _mm_sqrt_round_sd(A, B, C) \
  1908. (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
  1909. (__v2df) _mm_setzero_pd (), -1, C)
  1910. #define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
  1911. (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
  1912. #define _mm_maskz_sqrt_round_sd(U, A, B, C) \
  1913. (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
  1914. (__v2df) _mm_setzero_pd (), U, C)
  1915. #define _mm_sqrt_round_ss(A, B, C) \
  1916. (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
  1917. (__v4sf) _mm_setzero_ps (), -1, C)
  1918. #define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
  1919. (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
  1920. #define _mm_maskz_sqrt_round_ss(U, A, B, C) \
  1921. (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
  1922. (__v4sf) _mm_setzero_ps (), U, C)
  1923. #endif
  1924. #define _mm_mask_sqrt_sd(W, U, A, B) \
  1925. _mm_mask_sqrt_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  1926. #define _mm_maskz_sqrt_sd(U, A, B) \
  1927. _mm_maskz_sqrt_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  1928. #define _mm_mask_sqrt_ss(W, U, A, B) \
  1929. _mm_mask_sqrt_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  1930. #define _mm_maskz_sqrt_ss(U, A, B) \
  1931. _mm_maskz_sqrt_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  1932. extern __inline __m512i
  1933. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1934. _mm512_cvtepi8_epi32 (__m128i __A)
  1935. {
  1936. return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
  1937. (__v16si)
  1938. _mm512_undefined_epi32 (),
  1939. (__mmask16) -1);
  1940. }
  1941. extern __inline __m512i
  1942. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1943. _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
  1944. {
  1945. return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
  1946. (__v16si) __W,
  1947. (__mmask16) __U);
  1948. }
  1949. extern __inline __m512i
  1950. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1951. _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
  1952. {
  1953. return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
  1954. (__v16si)
  1955. _mm512_setzero_si512 (),
  1956. (__mmask16) __U);
  1957. }
  1958. extern __inline __m512i
  1959. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1960. _mm512_cvtepi8_epi64 (__m128i __A)
  1961. {
  1962. return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
  1963. (__v8di)
  1964. _mm512_undefined_epi32 (),
  1965. (__mmask8) -1);
  1966. }
  1967. extern __inline __m512i
  1968. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1969. _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
  1970. {
  1971. return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
  1972. (__v8di) __W,
  1973. (__mmask8) __U);
  1974. }
  1975. extern __inline __m512i
  1976. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1977. _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
  1978. {
  1979. return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
  1980. (__v8di)
  1981. _mm512_setzero_si512 (),
  1982. (__mmask8) __U);
  1983. }
  1984. extern __inline __m512i
  1985. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1986. _mm512_cvtepi16_epi32 (__m256i __A)
  1987. {
  1988. return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
  1989. (__v16si)
  1990. _mm512_undefined_epi32 (),
  1991. (__mmask16) -1);
  1992. }
  1993. extern __inline __m512i
  1994. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  1995. _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
  1996. {
  1997. return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
  1998. (__v16si) __W,
  1999. (__mmask16) __U);
  2000. }
  2001. extern __inline __m512i
  2002. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2003. _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
  2004. {
  2005. return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
  2006. (__v16si)
  2007. _mm512_setzero_si512 (),
  2008. (__mmask16) __U);
  2009. }
  2010. extern __inline __m512i
  2011. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2012. _mm512_cvtepi16_epi64 (__m128i __A)
  2013. {
  2014. return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
  2015. (__v8di)
  2016. _mm512_undefined_epi32 (),
  2017. (__mmask8) -1);
  2018. }
  2019. extern __inline __m512i
  2020. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2021. _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
  2022. {
  2023. return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
  2024. (__v8di) __W,
  2025. (__mmask8) __U);
  2026. }
  2027. extern __inline __m512i
  2028. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2029. _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
  2030. {
  2031. return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
  2032. (__v8di)
  2033. _mm512_setzero_si512 (),
  2034. (__mmask8) __U);
  2035. }
  2036. extern __inline __m512i
  2037. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2038. _mm512_cvtepi32_epi64 (__m256i __X)
  2039. {
  2040. return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
  2041. (__v8di)
  2042. _mm512_undefined_epi32 (),
  2043. (__mmask8) -1);
  2044. }
  2045. extern __inline __m512i
  2046. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2047. _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
  2048. {
  2049. return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
  2050. (__v8di) __W,
  2051. (__mmask8) __U);
  2052. }
  2053. extern __inline __m512i
  2054. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2055. _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
  2056. {
  2057. return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
  2058. (__v8di)
  2059. _mm512_setzero_si512 (),
  2060. (__mmask8) __U);
  2061. }
  2062. extern __inline __m512i
  2063. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2064. _mm512_cvtepu8_epi32 (__m128i __A)
  2065. {
  2066. return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
  2067. (__v16si)
  2068. _mm512_undefined_epi32 (),
  2069. (__mmask16) -1);
  2070. }
  2071. extern __inline __m512i
  2072. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2073. _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
  2074. {
  2075. return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
  2076. (__v16si) __W,
  2077. (__mmask16) __U);
  2078. }
  2079. extern __inline __m512i
  2080. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2081. _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
  2082. {
  2083. return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
  2084. (__v16si)
  2085. _mm512_setzero_si512 (),
  2086. (__mmask16) __U);
  2087. }
  2088. extern __inline __m512i
  2089. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2090. _mm512_cvtepu8_epi64 (__m128i __A)
  2091. {
  2092. return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
  2093. (__v8di)
  2094. _mm512_undefined_epi32 (),
  2095. (__mmask8) -1);
  2096. }
  2097. extern __inline __m512i
  2098. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2099. _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
  2100. {
  2101. return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
  2102. (__v8di) __W,
  2103. (__mmask8) __U);
  2104. }
  2105. extern __inline __m512i
  2106. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2107. _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
  2108. {
  2109. return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
  2110. (__v8di)
  2111. _mm512_setzero_si512 (),
  2112. (__mmask8) __U);
  2113. }
  2114. extern __inline __m512i
  2115. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2116. _mm512_cvtepu16_epi32 (__m256i __A)
  2117. {
  2118. return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
  2119. (__v16si)
  2120. _mm512_undefined_epi32 (),
  2121. (__mmask16) -1);
  2122. }
  2123. extern __inline __m512i
  2124. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2125. _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
  2126. {
  2127. return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
  2128. (__v16si) __W,
  2129. (__mmask16) __U);
  2130. }
  2131. extern __inline __m512i
  2132. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2133. _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
  2134. {
  2135. return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
  2136. (__v16si)
  2137. _mm512_setzero_si512 (),
  2138. (__mmask16) __U);
  2139. }
  2140. extern __inline __m512i
  2141. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2142. _mm512_cvtepu16_epi64 (__m128i __A)
  2143. {
  2144. return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
  2145. (__v8di)
  2146. _mm512_undefined_epi32 (),
  2147. (__mmask8) -1);
  2148. }
  2149. extern __inline __m512i
  2150. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2151. _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
  2152. {
  2153. return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
  2154. (__v8di) __W,
  2155. (__mmask8) __U);
  2156. }
  2157. extern __inline __m512i
  2158. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2159. _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
  2160. {
  2161. return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
  2162. (__v8di)
  2163. _mm512_setzero_si512 (),
  2164. (__mmask8) __U);
  2165. }
  2166. extern __inline __m512i
  2167. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2168. _mm512_cvtepu32_epi64 (__m256i __X)
  2169. {
  2170. return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
  2171. (__v8di)
  2172. _mm512_undefined_epi32 (),
  2173. (__mmask8) -1);
  2174. }
  2175. extern __inline __m512i
  2176. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2177. _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
  2178. {
  2179. return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
  2180. (__v8di) __W,
  2181. (__mmask8) __U);
  2182. }
  2183. extern __inline __m512i
  2184. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2185. _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
  2186. {
  2187. return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
  2188. (__v8di)
  2189. _mm512_setzero_si512 (),
  2190. (__mmask8) __U);
  2191. }
  2192. #ifdef __OPTIMIZE__
  2193. extern __inline __m512d
  2194. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2195. _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
  2196. {
  2197. return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
  2198. (__v8df) __B,
  2199. (__v8df)
  2200. _mm512_undefined_pd (),
  2201. (__mmask8) -1, __R);
  2202. }
  2203. extern __inline __m512d
  2204. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2205. _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2206. __m512d __B, const int __R)
  2207. {
  2208. return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
  2209. (__v8df) __B,
  2210. (__v8df) __W,
  2211. (__mmask8) __U, __R);
  2212. }
  2213. extern __inline __m512d
  2214. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2215. _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2216. const int __R)
  2217. {
  2218. return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
  2219. (__v8df) __B,
  2220. (__v8df)
  2221. _mm512_setzero_pd (),
  2222. (__mmask8) __U, __R);
  2223. }
  2224. extern __inline __m512
  2225. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2226. _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
  2227. {
  2228. return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
  2229. (__v16sf) __B,
  2230. (__v16sf)
  2231. _mm512_undefined_ps (),
  2232. (__mmask16) -1, __R);
  2233. }
  2234. extern __inline __m512
  2235. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2236. _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2237. __m512 __B, const int __R)
  2238. {
  2239. return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
  2240. (__v16sf) __B,
  2241. (__v16sf) __W,
  2242. (__mmask16) __U, __R);
  2243. }
  2244. extern __inline __m512
  2245. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2246. _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2247. {
  2248. return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
  2249. (__v16sf) __B,
  2250. (__v16sf)
  2251. _mm512_setzero_ps (),
  2252. (__mmask16) __U, __R);
  2253. }
  2254. extern __inline __m512d
  2255. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2256. _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
  2257. {
  2258. return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
  2259. (__v8df) __B,
  2260. (__v8df)
  2261. _mm512_undefined_pd (),
  2262. (__mmask8) -1, __R);
  2263. }
  2264. extern __inline __m512d
  2265. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2266. _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2267. __m512d __B, const int __R)
  2268. {
  2269. return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
  2270. (__v8df) __B,
  2271. (__v8df) __W,
  2272. (__mmask8) __U, __R);
  2273. }
  2274. extern __inline __m512d
  2275. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2276. _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2277. const int __R)
  2278. {
  2279. return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
  2280. (__v8df) __B,
  2281. (__v8df)
  2282. _mm512_setzero_pd (),
  2283. (__mmask8) __U, __R);
  2284. }
  2285. extern __inline __m512
  2286. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2287. _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
  2288. {
  2289. return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
  2290. (__v16sf) __B,
  2291. (__v16sf)
  2292. _mm512_undefined_ps (),
  2293. (__mmask16) -1, __R);
  2294. }
  2295. extern __inline __m512
  2296. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2297. _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2298. __m512 __B, const int __R)
  2299. {
  2300. return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
  2301. (__v16sf) __B,
  2302. (__v16sf) __W,
  2303. (__mmask16) __U, __R);
  2304. }
  2305. extern __inline __m512
  2306. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2307. _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2308. {
  2309. return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
  2310. (__v16sf) __B,
  2311. (__v16sf)
  2312. _mm512_setzero_ps (),
  2313. (__mmask16) __U, __R);
  2314. }
  2315. #else
  2316. #define _mm512_add_round_pd(A, B, C) \
  2317. (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
  2318. #define _mm512_mask_add_round_pd(W, U, A, B, C) \
  2319. (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
  2320. #define _mm512_maskz_add_round_pd(U, A, B, C) \
  2321. (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
  2322. #define _mm512_add_round_ps(A, B, C) \
  2323. (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
  2324. #define _mm512_mask_add_round_ps(W, U, A, B, C) \
  2325. (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
  2326. #define _mm512_maskz_add_round_ps(U, A, B, C) \
  2327. (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
  2328. #define _mm512_sub_round_pd(A, B, C) \
  2329. (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
  2330. #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
  2331. (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
  2332. #define _mm512_maskz_sub_round_pd(U, A, B, C) \
  2333. (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
  2334. #define _mm512_sub_round_ps(A, B, C) \
  2335. (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
  2336. #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
  2337. (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
  2338. #define _mm512_maskz_sub_round_ps(U, A, B, C) \
  2339. (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
  2340. #endif
  2341. #ifdef __OPTIMIZE__
  2342. extern __inline __m512d
  2343. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2344. _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
  2345. {
  2346. return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
  2347. (__v8df) __B,
  2348. (__v8df)
  2349. _mm512_undefined_pd (),
  2350. (__mmask8) -1, __R);
  2351. }
  2352. extern __inline __m512d
  2353. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2354. _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2355. __m512d __B, const int __R)
  2356. {
  2357. return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
  2358. (__v8df) __B,
  2359. (__v8df) __W,
  2360. (__mmask8) __U, __R);
  2361. }
  2362. extern __inline __m512d
  2363. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2364. _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2365. const int __R)
  2366. {
  2367. return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
  2368. (__v8df) __B,
  2369. (__v8df)
  2370. _mm512_setzero_pd (),
  2371. (__mmask8) __U, __R);
  2372. }
  2373. extern __inline __m512
  2374. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2375. _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
  2376. {
  2377. return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
  2378. (__v16sf) __B,
  2379. (__v16sf)
  2380. _mm512_undefined_ps (),
  2381. (__mmask16) -1, __R);
  2382. }
  2383. extern __inline __m512
  2384. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2385. _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2386. __m512 __B, const int __R)
  2387. {
  2388. return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
  2389. (__v16sf) __B,
  2390. (__v16sf) __W,
  2391. (__mmask16) __U, __R);
  2392. }
  2393. extern __inline __m512
  2394. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2395. _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2396. {
  2397. return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
  2398. (__v16sf) __B,
  2399. (__v16sf)
  2400. _mm512_setzero_ps (),
  2401. (__mmask16) __U, __R);
  2402. }
  2403. extern __inline __m512d
  2404. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2405. _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
  2406. {
  2407. return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
  2408. (__v8df) __V,
  2409. (__v8df)
  2410. _mm512_undefined_pd (),
  2411. (__mmask8) -1, __R);
  2412. }
  2413. extern __inline __m512d
  2414. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2415. _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
  2416. __m512d __V, const int __R)
  2417. {
  2418. return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
  2419. (__v8df) __V,
  2420. (__v8df) __W,
  2421. (__mmask8) __U, __R);
  2422. }
  2423. extern __inline __m512d
  2424. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2425. _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
  2426. const int __R)
  2427. {
  2428. return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
  2429. (__v8df) __V,
  2430. (__v8df)
  2431. _mm512_setzero_pd (),
  2432. (__mmask8) __U, __R);
  2433. }
  2434. extern __inline __m512
  2435. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2436. _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
  2437. {
  2438. return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
  2439. (__v16sf) __B,
  2440. (__v16sf)
  2441. _mm512_undefined_ps (),
  2442. (__mmask16) -1, __R);
  2443. }
  2444. extern __inline __m512
  2445. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2446. _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2447. __m512 __B, const int __R)
  2448. {
  2449. return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
  2450. (__v16sf) __B,
  2451. (__v16sf) __W,
  2452. (__mmask16) __U, __R);
  2453. }
  2454. extern __inline __m512
  2455. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2456. _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2457. {
  2458. return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
  2459. (__v16sf) __B,
  2460. (__v16sf)
  2461. _mm512_setzero_ps (),
  2462. (__mmask16) __U, __R);
  2463. }
  2464. extern __inline __m128d
  2465. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2466. _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
  2467. {
  2468. return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
  2469. (__v2df) __B,
  2470. __R);
  2471. }
  2472. extern __inline __m128d
  2473. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2474. _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  2475. __m128d __B, const int __R)
  2476. {
  2477. return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
  2478. (__v2df) __B,
  2479. (__v2df) __W,
  2480. (__mmask8) __U, __R);
  2481. }
  2482. extern __inline __m128d
  2483. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2484. _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  2485. const int __R)
  2486. {
  2487. return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
  2488. (__v2df) __B,
  2489. (__v2df)
  2490. _mm_setzero_pd (),
  2491. (__mmask8) __U, __R);
  2492. }
  2493. extern __inline __m128
  2494. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2495. _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
  2496. {
  2497. return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
  2498. (__v4sf) __B,
  2499. __R);
  2500. }
  2501. extern __inline __m128
  2502. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2503. _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  2504. __m128 __B, const int __R)
  2505. {
  2506. return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
  2507. (__v4sf) __B,
  2508. (__v4sf) __W,
  2509. (__mmask8) __U, __R);
  2510. }
  2511. extern __inline __m128
  2512. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2513. _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  2514. const int __R)
  2515. {
  2516. return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
  2517. (__v4sf) __B,
  2518. (__v4sf)
  2519. _mm_setzero_ps (),
  2520. (__mmask8) __U, __R);
  2521. }
  2522. extern __inline __m128d
  2523. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2524. _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
  2525. {
  2526. return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
  2527. (__v2df) __B,
  2528. __R);
  2529. }
  2530. extern __inline __m128d
  2531. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2532. _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  2533. __m128d __B, const int __R)
  2534. {
  2535. return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
  2536. (__v2df) __B,
  2537. (__v2df) __W,
  2538. (__mmask8) __U, __R);
  2539. }
  2540. extern __inline __m128d
  2541. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2542. _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  2543. const int __R)
  2544. {
  2545. return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
  2546. (__v2df) __B,
  2547. (__v2df)
  2548. _mm_setzero_pd (),
  2549. (__mmask8) __U, __R);
  2550. }
  2551. extern __inline __m128
  2552. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2553. _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
  2554. {
  2555. return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
  2556. (__v4sf) __B,
  2557. __R);
  2558. }
  2559. extern __inline __m128
  2560. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2561. _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  2562. __m128 __B, const int __R)
  2563. {
  2564. return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
  2565. (__v4sf) __B,
  2566. (__v4sf) __W,
  2567. (__mmask8) __U, __R);
  2568. }
  2569. extern __inline __m128
  2570. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2571. _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  2572. const int __R)
  2573. {
  2574. return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
  2575. (__v4sf) __B,
  2576. (__v4sf)
  2577. _mm_setzero_ps (),
  2578. (__mmask8) __U, __R);
  2579. }
  2580. #else
  2581. #define _mm512_mul_round_pd(A, B, C) \
  2582. (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
  2583. #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
  2584. (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
  2585. #define _mm512_maskz_mul_round_pd(U, A, B, C) \
  2586. (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
  2587. #define _mm512_mul_round_ps(A, B, C) \
  2588. (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
  2589. #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
  2590. (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
  2591. #define _mm512_maskz_mul_round_ps(U, A, B, C) \
  2592. (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
  2593. #define _mm512_div_round_pd(A, B, C) \
  2594. (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
  2595. #define _mm512_mask_div_round_pd(W, U, A, B, C) \
  2596. (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
  2597. #define _mm512_maskz_div_round_pd(U, A, B, C) \
  2598. (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
  2599. #define _mm512_div_round_ps(A, B, C) \
  2600. (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
  2601. #define _mm512_mask_div_round_ps(W, U, A, B, C) \
  2602. (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
  2603. #define _mm512_maskz_div_round_ps(U, A, B, C) \
  2604. (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
  2605. #define _mm_mul_round_sd(A, B, C) \
  2606. (__m128d)__builtin_ia32_mulsd_round(A, B, C)
  2607. #define _mm_mask_mul_round_sd(W, U, A, B, C) \
  2608. (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
  2609. #define _mm_maskz_mul_round_sd(U, A, B, C) \
  2610. (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  2611. #define _mm_mul_round_ss(A, B, C) \
  2612. (__m128)__builtin_ia32_mulss_round(A, B, C)
  2613. #define _mm_mask_mul_round_ss(W, U, A, B, C) \
  2614. (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
  2615. #define _mm_maskz_mul_round_ss(U, A, B, C) \
  2616. (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  2617. #define _mm_div_round_sd(A, B, C) \
  2618. (__m128d)__builtin_ia32_divsd_round(A, B, C)
  2619. #define _mm_mask_div_round_sd(W, U, A, B, C) \
  2620. (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
  2621. #define _mm_maskz_div_round_sd(U, A, B, C) \
  2622. (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  2623. #define _mm_div_round_ss(A, B, C) \
  2624. (__m128)__builtin_ia32_divss_round(A, B, C)
  2625. #define _mm_mask_div_round_ss(W, U, A, B, C) \
  2626. (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
  2627. #define _mm_maskz_div_round_ss(U, A, B, C) \
  2628. (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  2629. #endif
  2630. #ifdef __OPTIMIZE__
  2631. extern __inline __m512d
  2632. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2633. _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
  2634. {
  2635. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  2636. (__v8df) __B,
  2637. (__v8df)
  2638. _mm512_undefined_pd (),
  2639. (__mmask8) -1, __R);
  2640. }
  2641. extern __inline __m512d
  2642. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2643. _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2644. __m512d __B, const int __R)
  2645. {
  2646. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  2647. (__v8df) __B,
  2648. (__v8df) __W,
  2649. (__mmask8) __U, __R);
  2650. }
  2651. extern __inline __m512d
  2652. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2653. _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2654. const int __R)
  2655. {
  2656. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  2657. (__v8df) __B,
  2658. (__v8df)
  2659. _mm512_setzero_pd (),
  2660. (__mmask8) __U, __R);
  2661. }
  2662. extern __inline __m512
  2663. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2664. _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
  2665. {
  2666. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  2667. (__v16sf) __B,
  2668. (__v16sf)
  2669. _mm512_undefined_ps (),
  2670. (__mmask16) -1, __R);
  2671. }
  2672. extern __inline __m512
  2673. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2674. _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2675. __m512 __B, const int __R)
  2676. {
  2677. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  2678. (__v16sf) __B,
  2679. (__v16sf) __W,
  2680. (__mmask16) __U, __R);
  2681. }
  2682. extern __inline __m512
  2683. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2684. _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2685. {
  2686. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  2687. (__v16sf) __B,
  2688. (__v16sf)
  2689. _mm512_setzero_ps (),
  2690. (__mmask16) __U, __R);
  2691. }
  2692. extern __inline __m512d
  2693. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2694. _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
  2695. {
  2696. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  2697. (__v8df) __B,
  2698. (__v8df)
  2699. _mm512_undefined_pd (),
  2700. (__mmask8) -1, __R);
  2701. }
  2702. extern __inline __m512d
  2703. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2704. _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2705. __m512d __B, const int __R)
  2706. {
  2707. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  2708. (__v8df) __B,
  2709. (__v8df) __W,
  2710. (__mmask8) __U, __R);
  2711. }
  2712. extern __inline __m512d
  2713. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2714. _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2715. const int __R)
  2716. {
  2717. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  2718. (__v8df) __B,
  2719. (__v8df)
  2720. _mm512_setzero_pd (),
  2721. (__mmask8) __U, __R);
  2722. }
  2723. extern __inline __m512
  2724. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2725. _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
  2726. {
  2727. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  2728. (__v16sf) __B,
  2729. (__v16sf)
  2730. _mm512_undefined_ps (),
  2731. (__mmask16) -1, __R);
  2732. }
  2733. extern __inline __m512
  2734. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2735. _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2736. __m512 __B, const int __R)
  2737. {
  2738. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  2739. (__v16sf) __B,
  2740. (__v16sf) __W,
  2741. (__mmask16) __U, __R);
  2742. }
  2743. extern __inline __m512
  2744. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2745. _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
  2746. {
  2747. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  2748. (__v16sf) __B,
  2749. (__v16sf)
  2750. _mm512_setzero_ps (),
  2751. (__mmask16) __U, __R);
  2752. }
  2753. #else
  2754. #define _mm512_max_round_pd(A, B, R) \
  2755. (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
  2756. #define _mm512_mask_max_round_pd(W, U, A, B, R) \
  2757. (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
  2758. #define _mm512_maskz_max_round_pd(U, A, B, R) \
  2759. (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
  2760. #define _mm512_max_round_ps(A, B, R) \
  2761. (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
  2762. #define _mm512_mask_max_round_ps(W, U, A, B, R) \
  2763. (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
  2764. #define _mm512_maskz_max_round_ps(U, A, B, R) \
  2765. (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
  2766. #define _mm512_min_round_pd(A, B, R) \
  2767. (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
  2768. #define _mm512_mask_min_round_pd(W, U, A, B, R) \
  2769. (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
  2770. #define _mm512_maskz_min_round_pd(U, A, B, R) \
  2771. (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
  2772. #define _mm512_min_round_ps(A, B, R) \
  2773. (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
  2774. #define _mm512_mask_min_round_ps(W, U, A, B, R) \
  2775. (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
  2776. #define _mm512_maskz_min_round_ps(U, A, B, R) \
  2777. (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
  2778. #endif
  2779. #ifdef __OPTIMIZE__
  2780. extern __inline __m512d
  2781. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2782. _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
  2783. {
  2784. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  2785. (__v8df) __B,
  2786. (__v8df)
  2787. _mm512_undefined_pd (),
  2788. (__mmask8) -1, __R);
  2789. }
  2790. extern __inline __m512d
  2791. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2792. _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  2793. __m512d __B, const int __R)
  2794. {
  2795. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  2796. (__v8df) __B,
  2797. (__v8df) __W,
  2798. (__mmask8) __U, __R);
  2799. }
  2800. extern __inline __m512d
  2801. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2802. _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2803. const int __R)
  2804. {
  2805. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  2806. (__v8df) __B,
  2807. (__v8df)
  2808. _mm512_setzero_pd (),
  2809. (__mmask8) __U, __R);
  2810. }
  2811. extern __inline __m512
  2812. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2813. _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
  2814. {
  2815. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  2816. (__v16sf) __B,
  2817. (__v16sf)
  2818. _mm512_undefined_ps (),
  2819. (__mmask16) -1, __R);
  2820. }
  2821. extern __inline __m512
  2822. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2823. _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  2824. __m512 __B, const int __R)
  2825. {
  2826. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  2827. (__v16sf) __B,
  2828. (__v16sf) __W,
  2829. (__mmask16) __U, __R);
  2830. }
  2831. extern __inline __m512
  2832. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2833. _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  2834. const int __R)
  2835. {
  2836. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  2837. (__v16sf) __B,
  2838. (__v16sf)
  2839. _mm512_setzero_ps (),
  2840. (__mmask16) __U, __R);
  2841. }
  2842. extern __inline __m128d
  2843. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2844. _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
  2845. {
  2846. return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
  2847. (__v2df) __B,
  2848. (__v2df)
  2849. _mm_setzero_pd (),
  2850. (__mmask8) -1, __R);
  2851. }
  2852. extern __inline __m128d
  2853. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2854. _mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  2855. const int __R)
  2856. {
  2857. return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
  2858. (__v2df) __B,
  2859. (__v2df) __W,
  2860. (__mmask8) __U, __R);
  2861. }
  2862. extern __inline __m128d
  2863. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2864. _mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  2865. const int __R)
  2866. {
  2867. return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
  2868. (__v2df) __B,
  2869. (__v2df)
  2870. _mm_setzero_pd (),
  2871. (__mmask8) __U, __R);
  2872. }
  2873. extern __inline __m128
  2874. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2875. _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
  2876. {
  2877. return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
  2878. (__v4sf) __B,
  2879. (__v4sf)
  2880. _mm_setzero_ps (),
  2881. (__mmask8) -1, __R);
  2882. }
  2883. extern __inline __m128
  2884. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2885. _mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  2886. const int __R)
  2887. {
  2888. return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
  2889. (__v4sf) __B,
  2890. (__v4sf) __W,
  2891. (__mmask8) __U, __R);
  2892. }
  2893. extern __inline __m128
  2894. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2895. _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
  2896. {
  2897. return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
  2898. (__v4sf) __B,
  2899. (__v4sf)
  2900. _mm_setzero_ps (),
  2901. (__mmask8) __U, __R);
  2902. }
  2903. #else
  2904. #define _mm512_scalef_round_pd(A, B, C) \
  2905. ((__m512d) \
  2906. __builtin_ia32_scalefpd512_mask((A), (B), \
  2907. (__v8df) _mm512_undefined_pd(), \
  2908. -1, (C)))
  2909. #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
  2910. ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
  2911. #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
  2912. ((__m512d) \
  2913. __builtin_ia32_scalefpd512_mask((A), (B), \
  2914. (__v8df) _mm512_setzero_pd(), \
  2915. (U), (C)))
  2916. #define _mm512_scalef_round_ps(A, B, C) \
  2917. ((__m512) \
  2918. __builtin_ia32_scalefps512_mask((A), (B), \
  2919. (__v16sf) _mm512_undefined_ps(), \
  2920. -1, (C)))
  2921. #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
  2922. ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
  2923. #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
  2924. ((__m512) \
  2925. __builtin_ia32_scalefps512_mask((A), (B), \
  2926. (__v16sf) _mm512_setzero_ps(), \
  2927. (U), (C)))
  2928. #define _mm_scalef_round_sd(A, B, C) \
  2929. ((__m128d) \
  2930. __builtin_ia32_scalefsd_mask_round ((A), (B), \
  2931. (__v2df) _mm_undefined_pd (), \
  2932. -1, (C)))
  2933. #define _mm_scalef_round_ss(A, B, C) \
  2934. ((__m128) \
  2935. __builtin_ia32_scalefss_mask_round ((A), (B), \
  2936. (__v4sf) _mm_undefined_ps (), \
  2937. -1, (C)))
  2938. #define _mm_mask_scalef_round_sd(W, U, A, B, C) \
  2939. ((__m128d) \
  2940. __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
  2941. #define _mm_mask_scalef_round_ss(W, U, A, B, C) \
  2942. ((__m128) \
  2943. __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
  2944. #define _mm_maskz_scalef_round_sd(U, A, B, C) \
  2945. ((__m128d) \
  2946. __builtin_ia32_scalefsd_mask_round ((A), (B), \
  2947. (__v2df) _mm_setzero_pd (), \
  2948. (U), (C)))
  2949. #define _mm_maskz_scalef_round_ss(U, A, B, C) \
  2950. ((__m128) \
  2951. __builtin_ia32_scalefss_mask_round ((A), (B), \
  2952. (__v4sf) _mm_setzero_ps (), \
  2953. (U), (C)))
  2954. #endif
  2955. #define _mm_mask_scalef_sd(W, U, A, B) \
  2956. _mm_mask_scalef_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  2957. #define _mm_maskz_scalef_sd(U, A, B) \
  2958. _mm_maskz_scalef_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  2959. #define _mm_mask_scalef_ss(W, U, A, B) \
  2960. _mm_mask_scalef_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  2961. #define _mm_maskz_scalef_ss(U, A, B) \
  2962. _mm_maskz_scalef_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  2963. #ifdef __OPTIMIZE__
  2964. extern __inline __m512d
  2965. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2966. _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  2967. {
  2968. return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
  2969. (__v8df) __B,
  2970. (__v8df) __C,
  2971. (__mmask8) -1, __R);
  2972. }
  2973. extern __inline __m512d
  2974. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2975. _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  2976. __m512d __C, const int __R)
  2977. {
  2978. return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
  2979. (__v8df) __B,
  2980. (__v8df) __C,
  2981. (__mmask8) __U, __R);
  2982. }
  2983. extern __inline __m512d
  2984. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2985. _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
  2986. __mmask8 __U, const int __R)
  2987. {
  2988. return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
  2989. (__v8df) __B,
  2990. (__v8df) __C,
  2991. (__mmask8) __U, __R);
  2992. }
  2993. extern __inline __m512d
  2994. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  2995. _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  2996. __m512d __C, const int __R)
  2997. {
  2998. return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
  2999. (__v8df) __B,
  3000. (__v8df) __C,
  3001. (__mmask8) __U, __R);
  3002. }
  3003. extern __inline __m512
  3004. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3005. _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  3006. {
  3007. return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
  3008. (__v16sf) __B,
  3009. (__v16sf) __C,
  3010. (__mmask16) -1, __R);
  3011. }
  3012. extern __inline __m512
  3013. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3014. _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  3015. __m512 __C, const int __R)
  3016. {
  3017. return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
  3018. (__v16sf) __B,
  3019. (__v16sf) __C,
  3020. (__mmask16) __U, __R);
  3021. }
  3022. extern __inline __m512
  3023. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3024. _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
  3025. __mmask16 __U, const int __R)
  3026. {
  3027. return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
  3028. (__v16sf) __B,
  3029. (__v16sf) __C,
  3030. (__mmask16) __U, __R);
  3031. }
  3032. extern __inline __m512
  3033. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3034. _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  3035. __m512 __C, const int __R)
  3036. {
  3037. return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
  3038. (__v16sf) __B,
  3039. (__v16sf) __C,
  3040. (__mmask16) __U, __R);
  3041. }
  3042. extern __inline __m512d
  3043. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3044. _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  3045. {
  3046. return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
  3047. (__v8df) __B,
  3048. (__v8df) __C,
  3049. (__mmask8) -1, __R);
  3050. }
  3051. extern __inline __m512d
  3052. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3053. _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  3054. __m512d __C, const int __R)
  3055. {
  3056. return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
  3057. (__v8df) __B,
  3058. (__v8df) __C,
  3059. (__mmask8) __U, __R);
  3060. }
  3061. extern __inline __m512d
  3062. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3063. _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
  3064. __mmask8 __U, const int __R)
  3065. {
  3066. return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
  3067. (__v8df) __B,
  3068. (__v8df) __C,
  3069. (__mmask8) __U, __R);
  3070. }
  3071. extern __inline __m512d
  3072. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3073. _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  3074. __m512d __C, const int __R)
  3075. {
  3076. return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
  3077. (__v8df) __B,
  3078. (__v8df) __C,
  3079. (__mmask8) __U, __R);
  3080. }
  3081. extern __inline __m512
  3082. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3083. _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  3084. {
  3085. return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
  3086. (__v16sf) __B,
  3087. (__v16sf) __C,
  3088. (__mmask16) -1, __R);
  3089. }
  3090. extern __inline __m512
  3091. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3092. _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  3093. __m512 __C, const int __R)
  3094. {
  3095. return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
  3096. (__v16sf) __B,
  3097. (__v16sf) __C,
  3098. (__mmask16) __U, __R);
  3099. }
  3100. extern __inline __m512
  3101. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3102. _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
  3103. __mmask16 __U, const int __R)
  3104. {
  3105. return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
  3106. (__v16sf) __B,
  3107. (__v16sf) __C,
  3108. (__mmask16) __U, __R);
  3109. }
  3110. extern __inline __m512
  3111. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3112. _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  3113. __m512 __C, const int __R)
  3114. {
  3115. return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
  3116. (__v16sf) __B,
  3117. (__v16sf) __C,
  3118. (__mmask16) __U, __R);
  3119. }
  3120. extern __inline __m512d
  3121. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3122. _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  3123. {
  3124. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  3125. (__v8df) __B,
  3126. (__v8df) __C,
  3127. (__mmask8) -1, __R);
  3128. }
  3129. extern __inline __m512d
  3130. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3131. _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  3132. __m512d __C, const int __R)
  3133. {
  3134. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  3135. (__v8df) __B,
  3136. (__v8df) __C,
  3137. (__mmask8) __U, __R);
  3138. }
  3139. extern __inline __m512d
  3140. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3141. _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
  3142. __mmask8 __U, const int __R)
  3143. {
  3144. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
  3145. (__v8df) __B,
  3146. (__v8df) __C,
  3147. (__mmask8) __U, __R);
  3148. }
  3149. extern __inline __m512d
  3150. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3151. _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  3152. __m512d __C, const int __R)
  3153. {
  3154. return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
  3155. (__v8df) __B,
  3156. (__v8df) __C,
  3157. (__mmask8) __U, __R);
  3158. }
  3159. extern __inline __m512
  3160. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3161. _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  3162. {
  3163. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  3164. (__v16sf) __B,
  3165. (__v16sf) __C,
  3166. (__mmask16) -1, __R);
  3167. }
  3168. extern __inline __m512
  3169. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3170. _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  3171. __m512 __C, const int __R)
  3172. {
  3173. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  3174. (__v16sf) __B,
  3175. (__v16sf) __C,
  3176. (__mmask16) __U, __R);
  3177. }
  3178. extern __inline __m512
  3179. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3180. _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
  3181. __mmask16 __U, const int __R)
  3182. {
  3183. return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
  3184. (__v16sf) __B,
  3185. (__v16sf) __C,
  3186. (__mmask16) __U, __R);
  3187. }
  3188. extern __inline __m512
  3189. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3190. _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  3191. __m512 __C, const int __R)
  3192. {
  3193. return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
  3194. (__v16sf) __B,
  3195. (__v16sf) __C,
  3196. (__mmask16) __U, __R);
  3197. }
  3198. extern __inline __m512d
  3199. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3200. _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  3201. {
  3202. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  3203. (__v8df) __B,
  3204. -(__v8df) __C,
  3205. (__mmask8) -1, __R);
  3206. }
  3207. extern __inline __m512d
  3208. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3209. _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  3210. __m512d __C, const int __R)
  3211. {
  3212. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  3213. (__v8df) __B,
  3214. -(__v8df) __C,
  3215. (__mmask8) __U, __R);
  3216. }
  3217. extern __inline __m512d
  3218. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3219. _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
  3220. __mmask8 __U, const int __R)
  3221. {
  3222. return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
  3223. (__v8df) __B,
  3224. (__v8df) __C,
  3225. (__mmask8) __U, __R);
  3226. }
  3227. extern __inline __m512d
  3228. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3229. _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  3230. __m512d __C, const int __R)
  3231. {
  3232. return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
  3233. (__v8df) __B,
  3234. -(__v8df) __C,
  3235. (__mmask8) __U, __R);
  3236. }
  3237. extern __inline __m512
  3238. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3239. _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  3240. {
  3241. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  3242. (__v16sf) __B,
  3243. -(__v16sf) __C,
  3244. (__mmask16) -1, __R);
  3245. }
  3246. extern __inline __m512
  3247. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3248. _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  3249. __m512 __C, const int __R)
  3250. {
  3251. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  3252. (__v16sf) __B,
  3253. -(__v16sf) __C,
  3254. (__mmask16) __U, __R);
  3255. }
  3256. extern __inline __m512
  3257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3258. _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
  3259. __mmask16 __U, const int __R)
  3260. {
  3261. return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
  3262. (__v16sf) __B,
  3263. (__v16sf) __C,
  3264. (__mmask16) __U, __R);
  3265. }
  3266. extern __inline __m512
  3267. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3268. _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  3269. __m512 __C, const int __R)
  3270. {
  3271. return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
  3272. (__v16sf) __B,
  3273. -(__v16sf) __C,
  3274. (__mmask16) __U, __R);
  3275. }
  3276. extern __inline __m512d
  3277. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3278. _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  3279. {
  3280. return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
  3281. (__v8df) __B,
  3282. (__v8df) __C,
  3283. (__mmask8) -1, __R);
  3284. }
  3285. extern __inline __m512d
  3286. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3287. _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  3288. __m512d __C, const int __R)
  3289. {
  3290. return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
  3291. (__v8df) __B,
  3292. (__v8df) __C,
  3293. (__mmask8) __U, __R);
  3294. }
  3295. extern __inline __m512d
  3296. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3297. _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
  3298. __mmask8 __U, const int __R)
  3299. {
  3300. return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
  3301. (__v8df) __B,
  3302. (__v8df) __C,
  3303. (__mmask8) __U, __R);
  3304. }
  3305. extern __inline __m512d
  3306. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3307. _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  3308. __m512d __C, const int __R)
  3309. {
  3310. return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
  3311. (__v8df) __B,
  3312. (__v8df) __C,
  3313. (__mmask8) __U, __R);
  3314. }
  3315. extern __inline __m512
  3316. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3317. _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  3318. {
  3319. return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
  3320. (__v16sf) __B,
  3321. (__v16sf) __C,
  3322. (__mmask16) -1, __R);
  3323. }
  3324. extern __inline __m512
  3325. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3326. _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  3327. __m512 __C, const int __R)
  3328. {
  3329. return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
  3330. (__v16sf) __B,
  3331. (__v16sf) __C,
  3332. (__mmask16) __U, __R);
  3333. }
  3334. extern __inline __m512
  3335. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3336. _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
  3337. __mmask16 __U, const int __R)
  3338. {
  3339. return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
  3340. (__v16sf) __B,
  3341. (__v16sf) __C,
  3342. (__mmask16) __U, __R);
  3343. }
  3344. extern __inline __m512
  3345. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3346. _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  3347. __m512 __C, const int __R)
  3348. {
  3349. return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
  3350. (__v16sf) __B,
  3351. (__v16sf) __C,
  3352. (__mmask16) __U, __R);
  3353. }
  3354. extern __inline __m512d
  3355. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3356. _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
  3357. {
  3358. return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
  3359. (__v8df) __B,
  3360. (__v8df) __C,
  3361. (__mmask8) -1, __R);
  3362. }
  3363. extern __inline __m512d
  3364. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3365. _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  3366. __m512d __C, const int __R)
  3367. {
  3368. return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
  3369. (__v8df) __B,
  3370. (__v8df) __C,
  3371. (__mmask8) __U, __R);
  3372. }
  3373. extern __inline __m512d
  3374. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3375. _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
  3376. __mmask8 __U, const int __R)
  3377. {
  3378. return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
  3379. (__v8df) __B,
  3380. (__v8df) __C,
  3381. (__mmask8) __U, __R);
  3382. }
  3383. extern __inline __m512d
  3384. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3385. _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  3386. __m512d __C, const int __R)
  3387. {
  3388. return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
  3389. (__v8df) __B,
  3390. (__v8df) __C,
  3391. (__mmask8) __U, __R);
  3392. }
  3393. extern __inline __m512
  3394. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3395. _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
  3396. {
  3397. return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
  3398. (__v16sf) __B,
  3399. (__v16sf) __C,
  3400. (__mmask16) -1, __R);
  3401. }
  3402. extern __inline __m512
  3403. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3404. _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  3405. __m512 __C, const int __R)
  3406. {
  3407. return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
  3408. (__v16sf) __B,
  3409. (__v16sf) __C,
  3410. (__mmask16) __U, __R);
  3411. }
  3412. extern __inline __m512
  3413. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3414. _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
  3415. __mmask16 __U, const int __R)
  3416. {
  3417. return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
  3418. (__v16sf) __B,
  3419. (__v16sf) __C,
  3420. (__mmask16) __U, __R);
  3421. }
  3422. extern __inline __m512
  3423. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3424. _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  3425. __m512 __C, const int __R)
  3426. {
  3427. return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
  3428. (__v16sf) __B,
  3429. (__v16sf) __C,
  3430. (__mmask16) __U, __R);
  3431. }
  3432. #else
  3433. #define _mm512_fmadd_round_pd(A, B, C, R) \
  3434. (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
  3435. #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
  3436. (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
  3437. #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
  3438. (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
  3439. #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
  3440. (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
  3441. #define _mm512_fmadd_round_ps(A, B, C, R) \
  3442. (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
  3443. #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
  3444. (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
  3445. #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
  3446. (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
  3447. #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
  3448. (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
  3449. #define _mm512_fmsub_round_pd(A, B, C, R) \
  3450. (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R)
  3451. #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
  3452. (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R)
  3453. #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
  3454. (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
  3455. #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
  3456. (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R)
  3457. #define _mm512_fmsub_round_ps(A, B, C, R) \
  3458. (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R)
  3459. #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
  3460. (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R)
  3461. #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
  3462. (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
  3463. #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
  3464. (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R)
  3465. #define _mm512_fmaddsub_round_pd(A, B, C, R) \
  3466. (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
  3467. #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
  3468. (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, U, R)
  3469. #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
  3470. (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
  3471. #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
  3472. (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
  3473. #define _mm512_fmaddsub_round_ps(A, B, C, R) \
  3474. (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
  3475. #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
  3476. (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
  3477. #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
  3478. (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
  3479. #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
  3480. (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
  3481. #define _mm512_fmsubadd_round_pd(A, B, C, R) \
  3482. (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
  3483. #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
  3484. (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
  3485. #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
  3486. (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
  3487. #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
  3488. (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
  3489. #define _mm512_fmsubadd_round_ps(A, B, C, R) \
  3490. (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
  3491. #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
  3492. (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
  3493. #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
  3494. (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
  3495. #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
  3496. (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
  3497. #define _mm512_fnmadd_round_pd(A, B, C, R) \
  3498. (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R)
  3499. #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
  3500. (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R)
  3501. #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
  3502. (__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R)
  3503. #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
  3504. (__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R)
  3505. #define _mm512_fnmadd_round_ps(A, B, C, R) \
  3506. (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R)
  3507. #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
  3508. (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R)
  3509. #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
  3510. (__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R)
  3511. #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
  3512. (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
  3513. #define _mm512_fnmsub_round_pd(A, B, C, R) \
  3514. (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R)
  3515. #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
  3516. (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
  3517. #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
  3518. (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
  3519. #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
  3520. (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R)
  3521. #define _mm512_fnmsub_round_ps(A, B, C, R) \
  3522. (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R)
  3523. #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
  3524. (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
  3525. #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
  3526. (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
  3527. #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
  3528. (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R)
  3529. #endif
  3530. extern __inline __m512i
  3531. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3532. _mm512_abs_epi64 (__m512i __A)
  3533. {
  3534. return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
  3535. (__v8di)
  3536. _mm512_undefined_epi32 (),
  3537. (__mmask8) -1);
  3538. }
  3539. extern __inline __m512i
  3540. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3541. _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
  3542. {
  3543. return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
  3544. (__v8di) __W,
  3545. (__mmask8) __U);
  3546. }
  3547. extern __inline __m512i
  3548. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3549. _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
  3550. {
  3551. return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
  3552. (__v8di)
  3553. _mm512_setzero_si512 (),
  3554. (__mmask8) __U);
  3555. }
  3556. extern __inline __m512i
  3557. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3558. _mm512_abs_epi32 (__m512i __A)
  3559. {
  3560. return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
  3561. (__v16si)
  3562. _mm512_undefined_epi32 (),
  3563. (__mmask16) -1);
  3564. }
  3565. extern __inline __m512i
  3566. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3567. _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
  3568. {
  3569. return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
  3570. (__v16si) __W,
  3571. (__mmask16) __U);
  3572. }
  3573. extern __inline __m512i
  3574. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3575. _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
  3576. {
  3577. return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
  3578. (__v16si)
  3579. _mm512_setzero_si512 (),
  3580. (__mmask16) __U);
  3581. }
  3582. extern __inline __m512
  3583. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3584. _mm512_broadcastss_ps (__m128 __A)
  3585. {
  3586. return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
  3587. (__v16sf)
  3588. _mm512_undefined_ps (),
  3589. (__mmask16) -1);
  3590. }
  3591. extern __inline __m512
  3592. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3593. _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
  3594. {
  3595. return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
  3596. (__v16sf) __O, __M);
  3597. }
  3598. extern __inline __m512
  3599. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3600. _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
  3601. {
  3602. return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
  3603. (__v16sf)
  3604. _mm512_setzero_ps (),
  3605. __M);
  3606. }
  3607. extern __inline __m512d
  3608. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3609. _mm512_broadcastsd_pd (__m128d __A)
  3610. {
  3611. return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
  3612. (__v8df)
  3613. _mm512_undefined_pd (),
  3614. (__mmask8) -1);
  3615. }
  3616. extern __inline __m512d
  3617. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3618. _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
  3619. {
  3620. return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
  3621. (__v8df) __O, __M);
  3622. }
  3623. extern __inline __m512d
  3624. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3625. _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
  3626. {
  3627. return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
  3628. (__v8df)
  3629. _mm512_setzero_pd (),
  3630. __M);
  3631. }
  3632. extern __inline __m512i
  3633. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3634. _mm512_broadcastd_epi32 (__m128i __A)
  3635. {
  3636. return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
  3637. (__v16si)
  3638. _mm512_undefined_epi32 (),
  3639. (__mmask16) -1);
  3640. }
  3641. extern __inline __m512i
  3642. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3643. _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
  3644. {
  3645. return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
  3646. (__v16si) __O, __M);
  3647. }
  3648. extern __inline __m512i
  3649. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3650. _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
  3651. {
  3652. return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
  3653. (__v16si)
  3654. _mm512_setzero_si512 (),
  3655. __M);
  3656. }
  3657. extern __inline __m512i
  3658. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3659. _mm512_set1_epi32 (int __A)
  3660. {
  3661. return (__m512i)(__v16si)
  3662. { __A, __A, __A, __A, __A, __A, __A, __A,
  3663. __A, __A, __A, __A, __A, __A, __A, __A };
  3664. }
  3665. extern __inline __m512i
  3666. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3667. _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
  3668. {
  3669. return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
  3670. __M);
  3671. }
  3672. extern __inline __m512i
  3673. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3674. _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
  3675. {
  3676. return (__m512i)
  3677. __builtin_ia32_pbroadcastd512_gpr_mask (__A,
  3678. (__v16si) _mm512_setzero_si512 (),
  3679. __M);
  3680. }
  3681. extern __inline __m512i
  3682. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3683. _mm512_broadcastq_epi64 (__m128i __A)
  3684. {
  3685. return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
  3686. (__v8di)
  3687. _mm512_undefined_epi32 (),
  3688. (__mmask8) -1);
  3689. }
  3690. extern __inline __m512i
  3691. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3692. _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
  3693. {
  3694. return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
  3695. (__v8di) __O, __M);
  3696. }
  3697. extern __inline __m512i
  3698. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3699. _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
  3700. {
  3701. return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
  3702. (__v8di)
  3703. _mm512_setzero_si512 (),
  3704. __M);
  3705. }
  3706. extern __inline __m512i
  3707. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3708. _mm512_set1_epi64 (long long __A)
  3709. {
  3710. return (__m512i)(__v8di) { __A, __A, __A, __A, __A, __A, __A, __A };
  3711. }
  3712. extern __inline __m512i
  3713. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3714. _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
  3715. {
  3716. return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
  3717. __M);
  3718. }
  3719. extern __inline __m512i
  3720. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3721. _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
  3722. {
  3723. return (__m512i)
  3724. __builtin_ia32_pbroadcastq512_gpr_mask (__A,
  3725. (__v8di) _mm512_setzero_si512 (),
  3726. __M);
  3727. }
  3728. extern __inline __m512
  3729. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3730. _mm512_broadcast_f32x4 (__m128 __A)
  3731. {
  3732. return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
  3733. (__v16sf)
  3734. _mm512_undefined_ps (),
  3735. (__mmask16) -1);
  3736. }
  3737. extern __inline __m512
  3738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3739. _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
  3740. {
  3741. return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
  3742. (__v16sf) __O,
  3743. __M);
  3744. }
  3745. extern __inline __m512
  3746. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3747. _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
  3748. {
  3749. return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
  3750. (__v16sf)
  3751. _mm512_setzero_ps (),
  3752. __M);
  3753. }
  3754. extern __inline __m512i
  3755. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3756. _mm512_broadcast_i32x4 (__m128i __A)
  3757. {
  3758. return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
  3759. (__v16si)
  3760. _mm512_undefined_epi32 (),
  3761. (__mmask16) -1);
  3762. }
  3763. extern __inline __m512i
  3764. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3765. _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
  3766. {
  3767. return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
  3768. (__v16si) __O,
  3769. __M);
  3770. }
  3771. extern __inline __m512i
  3772. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3773. _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
  3774. {
  3775. return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
  3776. (__v16si)
  3777. _mm512_setzero_si512 (),
  3778. __M);
  3779. }
  3780. extern __inline __m512d
  3781. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3782. _mm512_broadcast_f64x4 (__m256d __A)
  3783. {
  3784. return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
  3785. (__v8df)
  3786. _mm512_undefined_pd (),
  3787. (__mmask8) -1);
  3788. }
  3789. extern __inline __m512d
  3790. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3791. _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
  3792. {
  3793. return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
  3794. (__v8df) __O,
  3795. __M);
  3796. }
  3797. extern __inline __m512d
  3798. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3799. _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
  3800. {
  3801. return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
  3802. (__v8df)
  3803. _mm512_setzero_pd (),
  3804. __M);
  3805. }
  3806. extern __inline __m512i
  3807. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3808. _mm512_broadcast_i64x4 (__m256i __A)
  3809. {
  3810. return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
  3811. (__v8di)
  3812. _mm512_undefined_epi32 (),
  3813. (__mmask8) -1);
  3814. }
  3815. extern __inline __m512i
  3816. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3817. _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
  3818. {
  3819. return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
  3820. (__v8di) __O,
  3821. __M);
  3822. }
  3823. extern __inline __m512i
  3824. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3825. _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
  3826. {
  3827. return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
  3828. (__v8di)
  3829. _mm512_setzero_si512 (),
  3830. __M);
  3831. }
  3832. typedef enum
  3833. {
  3834. _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
  3835. _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
  3836. _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
  3837. _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
  3838. _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
  3839. _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
  3840. _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
  3841. _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
  3842. _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
  3843. _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
  3844. _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
  3845. _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
  3846. _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
  3847. _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
  3848. _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
  3849. _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
  3850. _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
  3851. _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
  3852. _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
  3853. _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
  3854. _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
  3855. _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
  3856. _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
  3857. _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
  3858. _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
  3859. _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
  3860. _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
  3861. _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
  3862. _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
  3863. _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
  3864. _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
  3865. _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
  3866. _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
  3867. _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
  3868. _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
  3869. _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
  3870. _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
  3871. _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
  3872. _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
  3873. _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
  3874. _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
  3875. _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
  3876. _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
  3877. _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
  3878. _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
  3879. _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
  3880. _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
  3881. _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
  3882. _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
  3883. _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
  3884. _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
  3885. _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
  3886. _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
  3887. _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
  3888. _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
  3889. _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
  3890. _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
  3891. _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
  3892. _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
  3893. _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
  3894. _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
  3895. _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
  3896. _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
  3897. _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
  3898. _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
  3899. _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
  3900. _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
  3901. _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
  3902. _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
  3903. _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
  3904. _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
  3905. _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
  3906. _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
  3907. _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
  3908. _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
  3909. _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
  3910. _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
  3911. _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
  3912. _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
  3913. _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
  3914. _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
  3915. _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
  3916. _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
  3917. _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
  3918. _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
  3919. _MM_PERM_DDDD = 0xFF
  3920. } _MM_PERM_ENUM;
  3921. #ifdef __OPTIMIZE__
  3922. extern __inline __m512i
  3923. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3924. _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
  3925. {
  3926. return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
  3927. __mask,
  3928. (__v16si)
  3929. _mm512_undefined_epi32 (),
  3930. (__mmask16) -1);
  3931. }
  3932. extern __inline __m512i
  3933. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3934. _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  3935. _MM_PERM_ENUM __mask)
  3936. {
  3937. return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
  3938. __mask,
  3939. (__v16si) __W,
  3940. (__mmask16) __U);
  3941. }
  3942. extern __inline __m512i
  3943. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3944. _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
  3945. {
  3946. return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
  3947. __mask,
  3948. (__v16si)
  3949. _mm512_setzero_si512 (),
  3950. (__mmask16) __U);
  3951. }
  3952. extern __inline __m512i
  3953. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3954. _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
  3955. {
  3956. return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
  3957. (__v8di) __B, __imm,
  3958. (__v8di)
  3959. _mm512_undefined_epi32 (),
  3960. (__mmask8) -1);
  3961. }
  3962. extern __inline __m512i
  3963. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3964. _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
  3965. __m512i __B, const int __imm)
  3966. {
  3967. return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
  3968. (__v8di) __B, __imm,
  3969. (__v8di) __W,
  3970. (__mmask8) __U);
  3971. }
  3972. extern __inline __m512i
  3973. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3974. _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
  3975. const int __imm)
  3976. {
  3977. return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
  3978. (__v8di) __B, __imm,
  3979. (__v8di)
  3980. _mm512_setzero_si512 (),
  3981. (__mmask8) __U);
  3982. }
  3983. extern __inline __m512i
  3984. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3985. _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
  3986. {
  3987. return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
  3988. (__v16si) __B,
  3989. __imm,
  3990. (__v16si)
  3991. _mm512_undefined_epi32 (),
  3992. (__mmask16) -1);
  3993. }
  3994. extern __inline __m512i
  3995. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  3996. _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
  3997. __m512i __B, const int __imm)
  3998. {
  3999. return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
  4000. (__v16si) __B,
  4001. __imm,
  4002. (__v16si) __W,
  4003. (__mmask16) __U);
  4004. }
  4005. extern __inline __m512i
  4006. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4007. _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
  4008. const int __imm)
  4009. {
  4010. return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
  4011. (__v16si) __B,
  4012. __imm,
  4013. (__v16si)
  4014. _mm512_setzero_si512 (),
  4015. (__mmask16) __U);
  4016. }
  4017. extern __inline __m512d
  4018. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4019. _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
  4020. {
  4021. return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
  4022. (__v8df) __B, __imm,
  4023. (__v8df)
  4024. _mm512_undefined_pd (),
  4025. (__mmask8) -1);
  4026. }
  4027. extern __inline __m512d
  4028. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4029. _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
  4030. __m512d __B, const int __imm)
  4031. {
  4032. return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
  4033. (__v8df) __B, __imm,
  4034. (__v8df) __W,
  4035. (__mmask8) __U);
  4036. }
  4037. extern __inline __m512d
  4038. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4039. _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
  4040. const int __imm)
  4041. {
  4042. return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
  4043. (__v8df) __B, __imm,
  4044. (__v8df)
  4045. _mm512_setzero_pd (),
  4046. (__mmask8) __U);
  4047. }
  4048. extern __inline __m512
  4049. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4050. _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
  4051. {
  4052. return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
  4053. (__v16sf) __B, __imm,
  4054. (__v16sf)
  4055. _mm512_undefined_ps (),
  4056. (__mmask16) -1);
  4057. }
  4058. extern __inline __m512
  4059. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4060. _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
  4061. __m512 __B, const int __imm)
  4062. {
  4063. return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
  4064. (__v16sf) __B, __imm,
  4065. (__v16sf) __W,
  4066. (__mmask16) __U);
  4067. }
  4068. extern __inline __m512
  4069. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4070. _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
  4071. const int __imm)
  4072. {
  4073. return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
  4074. (__v16sf) __B, __imm,
  4075. (__v16sf)
  4076. _mm512_setzero_ps (),
  4077. (__mmask16) __U);
  4078. }
  4079. #else
  4080. #define _mm512_shuffle_epi32(X, C) \
  4081. ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
  4082. (__v16si)(__m512i)_mm512_undefined_epi32 (),\
  4083. (__mmask16)-1))
  4084. #define _mm512_mask_shuffle_epi32(W, U, X, C) \
  4085. ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
  4086. (__v16si)(__m512i)(W),\
  4087. (__mmask16)(U)))
  4088. #define _mm512_maskz_shuffle_epi32(U, X, C) \
  4089. ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
  4090. (__v16si)(__m512i)_mm512_setzero_si512 (),\
  4091. (__mmask16)(U)))
  4092. #define _mm512_shuffle_i64x2(X, Y, C) \
  4093. ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
  4094. (__v8di)(__m512i)(Y), (int)(C),\
  4095. (__v8di)(__m512i)_mm512_undefined_epi32 (),\
  4096. (__mmask8)-1))
  4097. #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
  4098. ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
  4099. (__v8di)(__m512i)(Y), (int)(C),\
  4100. (__v8di)(__m512i)(W),\
  4101. (__mmask8)(U)))
  4102. #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
  4103. ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
  4104. (__v8di)(__m512i)(Y), (int)(C),\
  4105. (__v8di)(__m512i)_mm512_setzero_si512 (),\
  4106. (__mmask8)(U)))
  4107. #define _mm512_shuffle_i32x4(X, Y, C) \
  4108. ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
  4109. (__v16si)(__m512i)(Y), (int)(C),\
  4110. (__v16si)(__m512i)_mm512_undefined_epi32 (),\
  4111. (__mmask16)-1))
  4112. #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
  4113. ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
  4114. (__v16si)(__m512i)(Y), (int)(C),\
  4115. (__v16si)(__m512i)(W),\
  4116. (__mmask16)(U)))
  4117. #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
  4118. ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
  4119. (__v16si)(__m512i)(Y), (int)(C),\
  4120. (__v16si)(__m512i)_mm512_setzero_si512 (),\
  4121. (__mmask16)(U)))
  4122. #define _mm512_shuffle_f64x2(X, Y, C) \
  4123. ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
  4124. (__v8df)(__m512d)(Y), (int)(C),\
  4125. (__v8df)(__m512d)_mm512_undefined_pd(),\
  4126. (__mmask8)-1))
  4127. #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
  4128. ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
  4129. (__v8df)(__m512d)(Y), (int)(C),\
  4130. (__v8df)(__m512d)(W),\
  4131. (__mmask8)(U)))
  4132. #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
  4133. ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
  4134. (__v8df)(__m512d)(Y), (int)(C),\
  4135. (__v8df)(__m512d)_mm512_setzero_pd(),\
  4136. (__mmask8)(U)))
  4137. #define _mm512_shuffle_f32x4(X, Y, C) \
  4138. ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
  4139. (__v16sf)(__m512)(Y), (int)(C),\
  4140. (__v16sf)(__m512)_mm512_undefined_ps(),\
  4141. (__mmask16)-1))
  4142. #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
  4143. ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
  4144. (__v16sf)(__m512)(Y), (int)(C),\
  4145. (__v16sf)(__m512)(W),\
  4146. (__mmask16)(U)))
  4147. #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
  4148. ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
  4149. (__v16sf)(__m512)(Y), (int)(C),\
  4150. (__v16sf)(__m512)_mm512_setzero_ps(),\
  4151. (__mmask16)(U)))
  4152. #endif
  4153. extern __inline __m512i
  4154. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4155. _mm512_rolv_epi32 (__m512i __A, __m512i __B)
  4156. {
  4157. return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
  4158. (__v16si) __B,
  4159. (__v16si)
  4160. _mm512_undefined_epi32 (),
  4161. (__mmask16) -1);
  4162. }
  4163. extern __inline __m512i
  4164. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4165. _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  4166. {
  4167. return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
  4168. (__v16si) __B,
  4169. (__v16si) __W,
  4170. (__mmask16) __U);
  4171. }
  4172. extern __inline __m512i
  4173. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4174. _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  4175. {
  4176. return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
  4177. (__v16si) __B,
  4178. (__v16si)
  4179. _mm512_setzero_si512 (),
  4180. (__mmask16) __U);
  4181. }
  4182. extern __inline __m512i
  4183. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4184. _mm512_rorv_epi32 (__m512i __A, __m512i __B)
  4185. {
  4186. return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
  4187. (__v16si) __B,
  4188. (__v16si)
  4189. _mm512_undefined_epi32 (),
  4190. (__mmask16) -1);
  4191. }
  4192. extern __inline __m512i
  4193. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4194. _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  4195. {
  4196. return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
  4197. (__v16si) __B,
  4198. (__v16si) __W,
  4199. (__mmask16) __U);
  4200. }
  4201. extern __inline __m512i
  4202. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4203. _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  4204. {
  4205. return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
  4206. (__v16si) __B,
  4207. (__v16si)
  4208. _mm512_setzero_si512 (),
  4209. (__mmask16) __U);
  4210. }
  4211. extern __inline __m512i
  4212. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4213. _mm512_rolv_epi64 (__m512i __A, __m512i __B)
  4214. {
  4215. return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
  4216. (__v8di) __B,
  4217. (__v8di)
  4218. _mm512_undefined_epi32 (),
  4219. (__mmask8) -1);
  4220. }
  4221. extern __inline __m512i
  4222. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4223. _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  4224. {
  4225. return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
  4226. (__v8di) __B,
  4227. (__v8di) __W,
  4228. (__mmask8) __U);
  4229. }
  4230. extern __inline __m512i
  4231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4232. _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  4233. {
  4234. return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
  4235. (__v8di) __B,
  4236. (__v8di)
  4237. _mm512_setzero_si512 (),
  4238. (__mmask8) __U);
  4239. }
  4240. extern __inline __m512i
  4241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4242. _mm512_rorv_epi64 (__m512i __A, __m512i __B)
  4243. {
  4244. return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
  4245. (__v8di) __B,
  4246. (__v8di)
  4247. _mm512_undefined_epi32 (),
  4248. (__mmask8) -1);
  4249. }
  4250. extern __inline __m512i
  4251. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4252. _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  4253. {
  4254. return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
  4255. (__v8di) __B,
  4256. (__v8di) __W,
  4257. (__mmask8) __U);
  4258. }
  4259. extern __inline __m512i
  4260. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4261. _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  4262. {
  4263. return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
  4264. (__v8di) __B,
  4265. (__v8di)
  4266. _mm512_setzero_si512 (),
  4267. (__mmask8) __U);
  4268. }
  4269. #ifdef __OPTIMIZE__
  4270. extern __inline __m256i
  4271. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4272. _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
  4273. {
  4274. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  4275. (__v8si)
  4276. _mm256_undefined_si256 (),
  4277. (__mmask8) -1, __R);
  4278. }
  4279. extern __inline __m256i
  4280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4281. _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
  4282. const int __R)
  4283. {
  4284. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  4285. (__v8si) __W,
  4286. (__mmask8) __U, __R);
  4287. }
  4288. extern __inline __m256i
  4289. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4290. _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
  4291. {
  4292. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  4293. (__v8si)
  4294. _mm256_setzero_si256 (),
  4295. (__mmask8) __U, __R);
  4296. }
  4297. extern __inline __m256i
  4298. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4299. _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
  4300. {
  4301. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  4302. (__v8si)
  4303. _mm256_undefined_si256 (),
  4304. (__mmask8) -1, __R);
  4305. }
  4306. extern __inline __m256i
  4307. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4308. _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
  4309. const int __R)
  4310. {
  4311. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  4312. (__v8si) __W,
  4313. (__mmask8) __U, __R);
  4314. }
  4315. extern __inline __m256i
  4316. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4317. _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
  4318. {
  4319. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  4320. (__v8si)
  4321. _mm256_setzero_si256 (),
  4322. (__mmask8) __U, __R);
  4323. }
  4324. #else
  4325. #define _mm512_cvtt_roundpd_epi32(A, B) \
  4326. ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
  4327. #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
  4328. ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
  4329. #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
  4330. ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
  4331. #define _mm512_cvtt_roundpd_epu32(A, B) \
  4332. ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
  4333. #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
  4334. ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
  4335. #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
  4336. ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
  4337. #endif
  4338. #ifdef __OPTIMIZE__
  4339. extern __inline __m256i
  4340. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4341. _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
  4342. {
  4343. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  4344. (__v8si)
  4345. _mm256_undefined_si256 (),
  4346. (__mmask8) -1, __R);
  4347. }
  4348. extern __inline __m256i
  4349. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4350. _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
  4351. const int __R)
  4352. {
  4353. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  4354. (__v8si) __W,
  4355. (__mmask8) __U, __R);
  4356. }
  4357. extern __inline __m256i
  4358. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4359. _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
  4360. {
  4361. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  4362. (__v8si)
  4363. _mm256_setzero_si256 (),
  4364. (__mmask8) __U, __R);
  4365. }
  4366. extern __inline __m256i
  4367. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4368. _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
  4369. {
  4370. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  4371. (__v8si)
  4372. _mm256_undefined_si256 (),
  4373. (__mmask8) -1, __R);
  4374. }
  4375. extern __inline __m256i
  4376. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4377. _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
  4378. const int __R)
  4379. {
  4380. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  4381. (__v8si) __W,
  4382. (__mmask8) __U, __R);
  4383. }
  4384. extern __inline __m256i
  4385. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4386. _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
  4387. {
  4388. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  4389. (__v8si)
  4390. _mm256_setzero_si256 (),
  4391. (__mmask8) __U, __R);
  4392. }
  4393. #else
  4394. #define _mm512_cvt_roundpd_epi32(A, B) \
  4395. ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
  4396. #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
  4397. ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
  4398. #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
  4399. ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
  4400. #define _mm512_cvt_roundpd_epu32(A, B) \
  4401. ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
  4402. #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
  4403. ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
  4404. #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
  4405. ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
  4406. #endif
  4407. #ifdef __OPTIMIZE__
  4408. extern __inline __m512i
  4409. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4410. _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
  4411. {
  4412. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  4413. (__v16si)
  4414. _mm512_undefined_epi32 (),
  4415. (__mmask16) -1, __R);
  4416. }
  4417. extern __inline __m512i
  4418. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4419. _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
  4420. const int __R)
  4421. {
  4422. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  4423. (__v16si) __W,
  4424. (__mmask16) __U, __R);
  4425. }
  4426. extern __inline __m512i
  4427. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4428. _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
  4429. {
  4430. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  4431. (__v16si)
  4432. _mm512_setzero_si512 (),
  4433. (__mmask16) __U, __R);
  4434. }
  4435. extern __inline __m512i
  4436. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4437. _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
  4438. {
  4439. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  4440. (__v16si)
  4441. _mm512_undefined_epi32 (),
  4442. (__mmask16) -1, __R);
  4443. }
  4444. extern __inline __m512i
  4445. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4446. _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
  4447. const int __R)
  4448. {
  4449. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  4450. (__v16si) __W,
  4451. (__mmask16) __U, __R);
  4452. }
  4453. extern __inline __m512i
  4454. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4455. _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
  4456. {
  4457. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  4458. (__v16si)
  4459. _mm512_setzero_si512 (),
  4460. (__mmask16) __U, __R);
  4461. }
  4462. #else
  4463. #define _mm512_cvtt_roundps_epi32(A, B) \
  4464. ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
  4465. #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
  4466. ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
  4467. #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
  4468. ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
  4469. #define _mm512_cvtt_roundps_epu32(A, B) \
  4470. ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
  4471. #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
  4472. ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
  4473. #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
  4474. ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
  4475. #endif
  4476. #ifdef __OPTIMIZE__
  4477. extern __inline __m512i
  4478. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4479. _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
  4480. {
  4481. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  4482. (__v16si)
  4483. _mm512_undefined_epi32 (),
  4484. (__mmask16) -1, __R);
  4485. }
  4486. extern __inline __m512i
  4487. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4488. _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
  4489. const int __R)
  4490. {
  4491. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  4492. (__v16si) __W,
  4493. (__mmask16) __U, __R);
  4494. }
  4495. extern __inline __m512i
  4496. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4497. _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
  4498. {
  4499. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  4500. (__v16si)
  4501. _mm512_setzero_si512 (),
  4502. (__mmask16) __U, __R);
  4503. }
  4504. extern __inline __m512i
  4505. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4506. _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
  4507. {
  4508. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  4509. (__v16si)
  4510. _mm512_undefined_epi32 (),
  4511. (__mmask16) -1, __R);
  4512. }
  4513. extern __inline __m512i
  4514. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4515. _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
  4516. const int __R)
  4517. {
  4518. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  4519. (__v16si) __W,
  4520. (__mmask16) __U, __R);
  4521. }
  4522. extern __inline __m512i
  4523. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4524. _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
  4525. {
  4526. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  4527. (__v16si)
  4528. _mm512_setzero_si512 (),
  4529. (__mmask16) __U, __R);
  4530. }
  4531. #else
  4532. #define _mm512_cvt_roundps_epi32(A, B) \
  4533. ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
  4534. #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
  4535. ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
  4536. #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
  4537. ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
  4538. #define _mm512_cvt_roundps_epu32(A, B) \
  4539. ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
  4540. #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
  4541. ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
  4542. #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
  4543. ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
  4544. #endif
  4545. extern __inline __m128d
  4546. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4547. _mm_cvtu32_sd (__m128d __A, unsigned __B)
  4548. {
  4549. return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
  4550. }
  4551. #ifdef __x86_64__
  4552. #ifdef __OPTIMIZE__
  4553. extern __inline __m128d
  4554. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4555. _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
  4556. {
  4557. return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
  4558. }
  4559. extern __inline __m128d
  4560. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4561. _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
  4562. {
  4563. return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
  4564. }
  4565. extern __inline __m128d
  4566. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4567. _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
  4568. {
  4569. return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
  4570. }
  4571. #else
  4572. #define _mm_cvt_roundu64_sd(A, B, C) \
  4573. (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
  4574. #define _mm_cvt_roundi64_sd(A, B, C) \
  4575. (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
  4576. #define _mm_cvt_roundsi64_sd(A, B, C) \
  4577. (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
  4578. #endif
  4579. #endif
  4580. #ifdef __OPTIMIZE__
  4581. extern __inline __m128
  4582. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4583. _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
  4584. {
  4585. return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
  4586. }
  4587. extern __inline __m128
  4588. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4589. _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
  4590. {
  4591. return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
  4592. }
  4593. extern __inline __m128
  4594. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4595. _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
  4596. {
  4597. return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
  4598. }
  4599. #else
  4600. #define _mm_cvt_roundu32_ss(A, B, C) \
  4601. (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
  4602. #define _mm_cvt_roundi32_ss(A, B, C) \
  4603. (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
  4604. #define _mm_cvt_roundsi32_ss(A, B, C) \
  4605. (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
  4606. #endif
  4607. #ifdef __x86_64__
  4608. #ifdef __OPTIMIZE__
  4609. extern __inline __m128
  4610. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4611. _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
  4612. {
  4613. return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
  4614. }
  4615. extern __inline __m128
  4616. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4617. _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
  4618. {
  4619. return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
  4620. }
  4621. extern __inline __m128
  4622. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4623. _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
  4624. {
  4625. return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
  4626. }
  4627. #else
  4628. #define _mm_cvt_roundu64_ss(A, B, C) \
  4629. (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
  4630. #define _mm_cvt_roundi64_ss(A, B, C) \
  4631. (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
  4632. #define _mm_cvt_roundsi64_ss(A, B, C) \
  4633. (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
  4634. #endif
  4635. #endif
  4636. extern __inline __m128i
  4637. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4638. _mm512_cvtepi32_epi8 (__m512i __A)
  4639. {
  4640. return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
  4641. (__v16qi)
  4642. _mm_undefined_si128 (),
  4643. (__mmask16) -1);
  4644. }
  4645. extern __inline void
  4646. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4647. _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
  4648. {
  4649. __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
  4650. }
  4651. extern __inline __m128i
  4652. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4653. _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
  4654. {
  4655. return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
  4656. (__v16qi) __O, __M);
  4657. }
  4658. extern __inline __m128i
  4659. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4660. _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
  4661. {
  4662. return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
  4663. (__v16qi)
  4664. _mm_setzero_si128 (),
  4665. __M);
  4666. }
  4667. extern __inline __m128i
  4668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4669. _mm512_cvtsepi32_epi8 (__m512i __A)
  4670. {
  4671. return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
  4672. (__v16qi)
  4673. _mm_undefined_si128 (),
  4674. (__mmask16) -1);
  4675. }
  4676. extern __inline void
  4677. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4678. _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
  4679. {
  4680. __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
  4681. }
  4682. extern __inline __m128i
  4683. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4684. _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
  4685. {
  4686. return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
  4687. (__v16qi) __O, __M);
  4688. }
  4689. extern __inline __m128i
  4690. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4691. _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
  4692. {
  4693. return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
  4694. (__v16qi)
  4695. _mm_setzero_si128 (),
  4696. __M);
  4697. }
  4698. extern __inline __m128i
  4699. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4700. _mm512_cvtusepi32_epi8 (__m512i __A)
  4701. {
  4702. return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
  4703. (__v16qi)
  4704. _mm_undefined_si128 (),
  4705. (__mmask16) -1);
  4706. }
  4707. extern __inline void
  4708. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4709. _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
  4710. {
  4711. __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
  4712. }
  4713. extern __inline __m128i
  4714. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4715. _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
  4716. {
  4717. return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
  4718. (__v16qi) __O,
  4719. __M);
  4720. }
  4721. extern __inline __m128i
  4722. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4723. _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
  4724. {
  4725. return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
  4726. (__v16qi)
  4727. _mm_setzero_si128 (),
  4728. __M);
  4729. }
  4730. extern __inline __m256i
  4731. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4732. _mm512_cvtepi32_epi16 (__m512i __A)
  4733. {
  4734. return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
  4735. (__v16hi)
  4736. _mm256_undefined_si256 (),
  4737. (__mmask16) -1);
  4738. }
  4739. extern __inline void
  4740. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4741. _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
  4742. {
  4743. __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
  4744. }
  4745. extern __inline __m256i
  4746. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4747. _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
  4748. {
  4749. return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
  4750. (__v16hi) __O, __M);
  4751. }
  4752. extern __inline __m256i
  4753. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4754. _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
  4755. {
  4756. return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
  4757. (__v16hi)
  4758. _mm256_setzero_si256 (),
  4759. __M);
  4760. }
  4761. extern __inline __m256i
  4762. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4763. _mm512_cvtsepi32_epi16 (__m512i __A)
  4764. {
  4765. return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
  4766. (__v16hi)
  4767. _mm256_undefined_si256 (),
  4768. (__mmask16) -1);
  4769. }
  4770. extern __inline void
  4771. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4772. _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
  4773. {
  4774. __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
  4775. }
  4776. extern __inline __m256i
  4777. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4778. _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
  4779. {
  4780. return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
  4781. (__v16hi) __O, __M);
  4782. }
  4783. extern __inline __m256i
  4784. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4785. _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
  4786. {
  4787. return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
  4788. (__v16hi)
  4789. _mm256_setzero_si256 (),
  4790. __M);
  4791. }
  4792. extern __inline __m256i
  4793. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4794. _mm512_cvtusepi32_epi16 (__m512i __A)
  4795. {
  4796. return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
  4797. (__v16hi)
  4798. _mm256_undefined_si256 (),
  4799. (__mmask16) -1);
  4800. }
  4801. extern __inline void
  4802. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4803. _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
  4804. {
  4805. __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
  4806. }
  4807. extern __inline __m256i
  4808. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4809. _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
  4810. {
  4811. return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
  4812. (__v16hi) __O,
  4813. __M);
  4814. }
  4815. extern __inline __m256i
  4816. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4817. _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
  4818. {
  4819. return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
  4820. (__v16hi)
  4821. _mm256_setzero_si256 (),
  4822. __M);
  4823. }
  4824. extern __inline __m256i
  4825. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4826. _mm512_cvtepi64_epi32 (__m512i __A)
  4827. {
  4828. return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
  4829. (__v8si)
  4830. _mm256_undefined_si256 (),
  4831. (__mmask8) -1);
  4832. }
  4833. extern __inline void
  4834. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4835. _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
  4836. {
  4837. __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
  4838. }
  4839. extern __inline __m256i
  4840. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4841. _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
  4842. {
  4843. return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
  4844. (__v8si) __O, __M);
  4845. }
  4846. extern __inline __m256i
  4847. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4848. _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
  4849. {
  4850. return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
  4851. (__v8si)
  4852. _mm256_setzero_si256 (),
  4853. __M);
  4854. }
  4855. extern __inline __m256i
  4856. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4857. _mm512_cvtsepi64_epi32 (__m512i __A)
  4858. {
  4859. return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
  4860. (__v8si)
  4861. _mm256_undefined_si256 (),
  4862. (__mmask8) -1);
  4863. }
  4864. extern __inline void
  4865. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4866. _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
  4867. {
  4868. __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
  4869. }
  4870. extern __inline __m256i
  4871. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4872. _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
  4873. {
  4874. return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
  4875. (__v8si) __O, __M);
  4876. }
  4877. extern __inline __m256i
  4878. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4879. _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
  4880. {
  4881. return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
  4882. (__v8si)
  4883. _mm256_setzero_si256 (),
  4884. __M);
  4885. }
  4886. extern __inline __m256i
  4887. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4888. _mm512_cvtusepi64_epi32 (__m512i __A)
  4889. {
  4890. return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
  4891. (__v8si)
  4892. _mm256_undefined_si256 (),
  4893. (__mmask8) -1);
  4894. }
  4895. extern __inline void
  4896. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4897. _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
  4898. {
  4899. __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
  4900. }
  4901. extern __inline __m256i
  4902. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4903. _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
  4904. {
  4905. return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
  4906. (__v8si) __O, __M);
  4907. }
  4908. extern __inline __m256i
  4909. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4910. _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
  4911. {
  4912. return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
  4913. (__v8si)
  4914. _mm256_setzero_si256 (),
  4915. __M);
  4916. }
  4917. extern __inline __m128i
  4918. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4919. _mm512_cvtepi64_epi16 (__m512i __A)
  4920. {
  4921. return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
  4922. (__v8hi)
  4923. _mm_undefined_si128 (),
  4924. (__mmask8) -1);
  4925. }
  4926. extern __inline void
  4927. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4928. _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
  4929. {
  4930. __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
  4931. }
  4932. extern __inline __m128i
  4933. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4934. _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
  4935. {
  4936. return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
  4937. (__v8hi) __O, __M);
  4938. }
  4939. extern __inline __m128i
  4940. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4941. _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
  4942. {
  4943. return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
  4944. (__v8hi)
  4945. _mm_setzero_si128 (),
  4946. __M);
  4947. }
  4948. extern __inline __m128i
  4949. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4950. _mm512_cvtsepi64_epi16 (__m512i __A)
  4951. {
  4952. return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
  4953. (__v8hi)
  4954. _mm_undefined_si128 (),
  4955. (__mmask8) -1);
  4956. }
  4957. extern __inline void
  4958. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4959. _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
  4960. {
  4961. __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
  4962. }
  4963. extern __inline __m128i
  4964. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4965. _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
  4966. {
  4967. return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
  4968. (__v8hi) __O, __M);
  4969. }
  4970. extern __inline __m128i
  4971. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4972. _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
  4973. {
  4974. return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
  4975. (__v8hi)
  4976. _mm_setzero_si128 (),
  4977. __M);
  4978. }
  4979. extern __inline __m128i
  4980. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4981. _mm512_cvtusepi64_epi16 (__m512i __A)
  4982. {
  4983. return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
  4984. (__v8hi)
  4985. _mm_undefined_si128 (),
  4986. (__mmask8) -1);
  4987. }
  4988. extern __inline void
  4989. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4990. _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
  4991. {
  4992. __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
  4993. }
  4994. extern __inline __m128i
  4995. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  4996. _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
  4997. {
  4998. return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
  4999. (__v8hi) __O, __M);
  5000. }
  5001. extern __inline __m128i
  5002. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5003. _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
  5004. {
  5005. return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
  5006. (__v8hi)
  5007. _mm_setzero_si128 (),
  5008. __M);
  5009. }
  5010. extern __inline __m128i
  5011. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5012. _mm512_cvtepi64_epi8 (__m512i __A)
  5013. {
  5014. return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
  5015. (__v16qi)
  5016. _mm_undefined_si128 (),
  5017. (__mmask8) -1);
  5018. }
  5019. extern __inline void
  5020. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5021. _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
  5022. {
  5023. __builtin_ia32_pmovqb512mem_mask ((unsigned long long *) __P,
  5024. (__v8di) __A, __M);
  5025. }
  5026. extern __inline __m128i
  5027. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5028. _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
  5029. {
  5030. return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
  5031. (__v16qi) __O, __M);
  5032. }
  5033. extern __inline __m128i
  5034. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5035. _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
  5036. {
  5037. return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
  5038. (__v16qi)
  5039. _mm_setzero_si128 (),
  5040. __M);
  5041. }
  5042. extern __inline __m128i
  5043. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5044. _mm512_cvtsepi64_epi8 (__m512i __A)
  5045. {
  5046. return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
  5047. (__v16qi)
  5048. _mm_undefined_si128 (),
  5049. (__mmask8) -1);
  5050. }
  5051. extern __inline void
  5052. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5053. _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
  5054. {
  5055. __builtin_ia32_pmovsqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
  5056. }
  5057. extern __inline __m128i
  5058. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5059. _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
  5060. {
  5061. return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
  5062. (__v16qi) __O, __M);
  5063. }
  5064. extern __inline __m128i
  5065. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5066. _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
  5067. {
  5068. return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
  5069. (__v16qi)
  5070. _mm_setzero_si128 (),
  5071. __M);
  5072. }
  5073. extern __inline __m128i
  5074. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5075. _mm512_cvtusepi64_epi8 (__m512i __A)
  5076. {
  5077. return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
  5078. (__v16qi)
  5079. _mm_undefined_si128 (),
  5080. (__mmask8) -1);
  5081. }
  5082. extern __inline void
  5083. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5084. _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
  5085. {
  5086. __builtin_ia32_pmovusqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
  5087. }
  5088. extern __inline __m128i
  5089. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5090. _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
  5091. {
  5092. return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
  5093. (__v16qi) __O,
  5094. __M);
  5095. }
  5096. extern __inline __m128i
  5097. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5098. _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
  5099. {
  5100. return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
  5101. (__v16qi)
  5102. _mm_setzero_si128 (),
  5103. __M);
  5104. }
  5105. extern __inline __m512d
  5106. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5107. _mm512_cvtepi32_pd (__m256i __A)
  5108. {
  5109. return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
  5110. (__v8df)
  5111. _mm512_undefined_pd (),
  5112. (__mmask8) -1);
  5113. }
  5114. extern __inline __m512d
  5115. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5116. _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
  5117. {
  5118. return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
  5119. (__v8df) __W,
  5120. (__mmask8) __U);
  5121. }
  5122. extern __inline __m512d
  5123. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5124. _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
  5125. {
  5126. return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
  5127. (__v8df)
  5128. _mm512_setzero_pd (),
  5129. (__mmask8) __U);
  5130. }
  5131. extern __inline __m512d
  5132. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5133. _mm512_cvtepu32_pd (__m256i __A)
  5134. {
  5135. return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
  5136. (__v8df)
  5137. _mm512_undefined_pd (),
  5138. (__mmask8) -1);
  5139. }
  5140. extern __inline __m512d
  5141. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5142. _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
  5143. {
  5144. return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
  5145. (__v8df) __W,
  5146. (__mmask8) __U);
  5147. }
  5148. extern __inline __m512d
  5149. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5150. _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
  5151. {
  5152. return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
  5153. (__v8df)
  5154. _mm512_setzero_pd (),
  5155. (__mmask8) __U);
  5156. }
  5157. #ifdef __OPTIMIZE__
  5158. extern __inline __m512
  5159. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5160. _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
  5161. {
  5162. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  5163. (__v16sf)
  5164. _mm512_undefined_ps (),
  5165. (__mmask16) -1, __R);
  5166. }
  5167. extern __inline __m512
  5168. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5169. _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
  5170. const int __R)
  5171. {
  5172. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  5173. (__v16sf) __W,
  5174. (__mmask16) __U, __R);
  5175. }
  5176. extern __inline __m512
  5177. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5178. _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
  5179. {
  5180. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  5181. (__v16sf)
  5182. _mm512_setzero_ps (),
  5183. (__mmask16) __U, __R);
  5184. }
  5185. extern __inline __m512
  5186. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5187. _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
  5188. {
  5189. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  5190. (__v16sf)
  5191. _mm512_undefined_ps (),
  5192. (__mmask16) -1, __R);
  5193. }
  5194. extern __inline __m512
  5195. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5196. _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
  5197. const int __R)
  5198. {
  5199. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  5200. (__v16sf) __W,
  5201. (__mmask16) __U, __R);
  5202. }
  5203. extern __inline __m512
  5204. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5205. _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
  5206. {
  5207. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  5208. (__v16sf)
  5209. _mm512_setzero_ps (),
  5210. (__mmask16) __U, __R);
  5211. }
  5212. #else
  5213. #define _mm512_cvt_roundepi32_ps(A, B) \
  5214. (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
  5215. #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
  5216. (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
  5217. #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
  5218. (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
  5219. #define _mm512_cvt_roundepu32_ps(A, B) \
  5220. (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
  5221. #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
  5222. (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
  5223. #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
  5224. (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
  5225. #endif
  5226. #ifdef __OPTIMIZE__
  5227. extern __inline __m256d
  5228. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5229. _mm512_extractf64x4_pd (__m512d __A, const int __imm)
  5230. {
  5231. return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
  5232. __imm,
  5233. (__v4df)
  5234. _mm256_undefined_pd (),
  5235. (__mmask8) -1);
  5236. }
  5237. extern __inline __m256d
  5238. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5239. _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
  5240. const int __imm)
  5241. {
  5242. return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
  5243. __imm,
  5244. (__v4df) __W,
  5245. (__mmask8) __U);
  5246. }
  5247. extern __inline __m256d
  5248. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5249. _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
  5250. {
  5251. return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
  5252. __imm,
  5253. (__v4df)
  5254. _mm256_setzero_pd (),
  5255. (__mmask8) __U);
  5256. }
  5257. extern __inline __m128
  5258. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5259. _mm512_extractf32x4_ps (__m512 __A, const int __imm)
  5260. {
  5261. return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
  5262. __imm,
  5263. (__v4sf)
  5264. _mm_undefined_ps (),
  5265. (__mmask8) -1);
  5266. }
  5267. extern __inline __m128
  5268. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5269. _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
  5270. const int __imm)
  5271. {
  5272. return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
  5273. __imm,
  5274. (__v4sf) __W,
  5275. (__mmask8) __U);
  5276. }
  5277. extern __inline __m128
  5278. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5279. _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
  5280. {
  5281. return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
  5282. __imm,
  5283. (__v4sf)
  5284. _mm_setzero_ps (),
  5285. (__mmask8) __U);
  5286. }
  5287. extern __inline __m256i
  5288. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5289. _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
  5290. {
  5291. return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
  5292. __imm,
  5293. (__v4di)
  5294. _mm256_undefined_si256 (),
  5295. (__mmask8) -1);
  5296. }
  5297. extern __inline __m256i
  5298. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5299. _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
  5300. const int __imm)
  5301. {
  5302. return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
  5303. __imm,
  5304. (__v4di) __W,
  5305. (__mmask8) __U);
  5306. }
  5307. extern __inline __m256i
  5308. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5309. _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
  5310. {
  5311. return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
  5312. __imm,
  5313. (__v4di)
  5314. _mm256_setzero_si256 (),
  5315. (__mmask8) __U);
  5316. }
  5317. extern __inline __m128i
  5318. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5319. _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
  5320. {
  5321. return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
  5322. __imm,
  5323. (__v4si)
  5324. _mm_undefined_si128 (),
  5325. (__mmask8) -1);
  5326. }
  5327. extern __inline __m128i
  5328. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5329. _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
  5330. const int __imm)
  5331. {
  5332. return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
  5333. __imm,
  5334. (__v4si) __W,
  5335. (__mmask8) __U);
  5336. }
  5337. extern __inline __m128i
  5338. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5339. _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
  5340. {
  5341. return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
  5342. __imm,
  5343. (__v4si)
  5344. _mm_setzero_si128 (),
  5345. (__mmask8) __U);
  5346. }
  5347. #else
  5348. #define _mm512_extractf64x4_pd(X, C) \
  5349. ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
  5350. (int) (C),\
  5351. (__v4df)(__m256d)_mm256_undefined_pd(),\
  5352. (__mmask8)-1))
  5353. #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
  5354. ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
  5355. (int) (C),\
  5356. (__v4df)(__m256d)(W),\
  5357. (__mmask8)(U)))
  5358. #define _mm512_maskz_extractf64x4_pd(U, X, C) \
  5359. ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
  5360. (int) (C),\
  5361. (__v4df)(__m256d)_mm256_setzero_pd(),\
  5362. (__mmask8)(U)))
  5363. #define _mm512_extractf32x4_ps(X, C) \
  5364. ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
  5365. (int) (C),\
  5366. (__v4sf)(__m128)_mm_undefined_ps(),\
  5367. (__mmask8)-1))
  5368. #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
  5369. ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
  5370. (int) (C),\
  5371. (__v4sf)(__m128)(W),\
  5372. (__mmask8)(U)))
  5373. #define _mm512_maskz_extractf32x4_ps(U, X, C) \
  5374. ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
  5375. (int) (C),\
  5376. (__v4sf)(__m128)_mm_setzero_ps(),\
  5377. (__mmask8)(U)))
  5378. #define _mm512_extracti64x4_epi64(X, C) \
  5379. ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
  5380. (int) (C),\
  5381. (__v4di)(__m256i)_mm256_undefined_si256 (),\
  5382. (__mmask8)-1))
  5383. #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
  5384. ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
  5385. (int) (C),\
  5386. (__v4di)(__m256i)(W),\
  5387. (__mmask8)(U)))
  5388. #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
  5389. ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
  5390. (int) (C),\
  5391. (__v4di)(__m256i)_mm256_setzero_si256 (),\
  5392. (__mmask8)(U)))
  5393. #define _mm512_extracti32x4_epi32(X, C) \
  5394. ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
  5395. (int) (C),\
  5396. (__v4si)(__m128i)_mm_undefined_si128 (),\
  5397. (__mmask8)-1))
  5398. #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
  5399. ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
  5400. (int) (C),\
  5401. (__v4si)(__m128i)(W),\
  5402. (__mmask8)(U)))
  5403. #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
  5404. ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
  5405. (int) (C),\
  5406. (__v4si)(__m128i)_mm_setzero_si128 (),\
  5407. (__mmask8)(U)))
  5408. #endif
  5409. #ifdef __OPTIMIZE__
  5410. extern __inline __m512i
  5411. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5412. _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
  5413. {
  5414. return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
  5415. (__v4si) __B,
  5416. __imm,
  5417. (__v16si) __A, -1);
  5418. }
  5419. extern __inline __m512
  5420. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5421. _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
  5422. {
  5423. return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
  5424. (__v4sf) __B,
  5425. __imm,
  5426. (__v16sf) __A, -1);
  5427. }
  5428. extern __inline __m512i
  5429. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5430. _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
  5431. {
  5432. return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
  5433. (__v4di) __B,
  5434. __imm,
  5435. (__v8di)
  5436. _mm512_undefined_epi32 (),
  5437. (__mmask8) -1);
  5438. }
  5439. extern __inline __m512i
  5440. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5441. _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
  5442. __m256i __B, const int __imm)
  5443. {
  5444. return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
  5445. (__v4di) __B,
  5446. __imm,
  5447. (__v8di) __W,
  5448. (__mmask8) __U);
  5449. }
  5450. extern __inline __m512i
  5451. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5452. _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
  5453. const int __imm)
  5454. {
  5455. return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
  5456. (__v4di) __B,
  5457. __imm,
  5458. (__v8di)
  5459. _mm512_setzero_si512 (),
  5460. (__mmask8) __U);
  5461. }
  5462. extern __inline __m512d
  5463. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5464. _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
  5465. {
  5466. return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
  5467. (__v4df) __B,
  5468. __imm,
  5469. (__v8df)
  5470. _mm512_undefined_pd (),
  5471. (__mmask8) -1);
  5472. }
  5473. extern __inline __m512d
  5474. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5475. _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
  5476. __m256d __B, const int __imm)
  5477. {
  5478. return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
  5479. (__v4df) __B,
  5480. __imm,
  5481. (__v8df) __W,
  5482. (__mmask8) __U);
  5483. }
  5484. extern __inline __m512d
  5485. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5486. _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
  5487. const int __imm)
  5488. {
  5489. return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
  5490. (__v4df) __B,
  5491. __imm,
  5492. (__v8df)
  5493. _mm512_setzero_pd (),
  5494. (__mmask8) __U);
  5495. }
  5496. #else
  5497. #define _mm512_insertf32x4(X, Y, C) \
  5498. ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
  5499. (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
  5500. #define _mm512_inserti32x4(X, Y, C) \
  5501. ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
  5502. (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
  5503. #define _mm512_insertf64x4(X, Y, C) \
  5504. ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
  5505. (__v4df)(__m256d) (Y), (int) (C), \
  5506. (__v8df)(__m512d)_mm512_undefined_pd(), \
  5507. (__mmask8)-1))
  5508. #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
  5509. ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
  5510. (__v4df)(__m256d) (Y), (int) (C), \
  5511. (__v8df)(__m512d)(W), \
  5512. (__mmask8)(U)))
  5513. #define _mm512_maskz_insertf64x4(U, X, Y, C) \
  5514. ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
  5515. (__v4df)(__m256d) (Y), (int) (C), \
  5516. (__v8df)(__m512d)_mm512_setzero_pd(), \
  5517. (__mmask8)(U)))
  5518. #define _mm512_inserti64x4(X, Y, C) \
  5519. ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
  5520. (__v4di)(__m256i) (Y), (int) (C), \
  5521. (__v8di)(__m512i)_mm512_undefined_epi32 (), \
  5522. (__mmask8)-1))
  5523. #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
  5524. ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
  5525. (__v4di)(__m256i) (Y), (int) (C),\
  5526. (__v8di)(__m512i)(W),\
  5527. (__mmask8)(U)))
  5528. #define _mm512_maskz_inserti64x4(U, X, Y, C) \
  5529. ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
  5530. (__v4di)(__m256i) (Y), (int) (C), \
  5531. (__v8di)(__m512i)_mm512_setzero_si512 (), \
  5532. (__mmask8)(U)))
  5533. #endif
  5534. extern __inline __m512d
  5535. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5536. _mm512_loadu_pd (void const *__P)
  5537. {
  5538. return *(__m512d_u *)__P;
  5539. }
  5540. extern __inline __m512d
  5541. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5542. _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
  5543. {
  5544. return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
  5545. (__v8df) __W,
  5546. (__mmask8) __U);
  5547. }
  5548. extern __inline __m512d
  5549. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5550. _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
  5551. {
  5552. return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
  5553. (__v8df)
  5554. _mm512_setzero_pd (),
  5555. (__mmask8) __U);
  5556. }
  5557. extern __inline void
  5558. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5559. _mm512_storeu_pd (void *__P, __m512d __A)
  5560. {
  5561. *(__m512d_u *)__P = __A;
  5562. }
  5563. extern __inline void
  5564. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5565. _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
  5566. {
  5567. __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
  5568. (__mmask8) __U);
  5569. }
  5570. extern __inline __m512
  5571. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5572. _mm512_loadu_ps (void const *__P)
  5573. {
  5574. return *(__m512_u *)__P;
  5575. }
  5576. extern __inline __m512
  5577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5578. _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
  5579. {
  5580. return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
  5581. (__v16sf) __W,
  5582. (__mmask16) __U);
  5583. }
  5584. extern __inline __m512
  5585. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5586. _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
  5587. {
  5588. return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
  5589. (__v16sf)
  5590. _mm512_setzero_ps (),
  5591. (__mmask16) __U);
  5592. }
  5593. extern __inline void
  5594. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5595. _mm512_storeu_ps (void *__P, __m512 __A)
  5596. {
  5597. *(__m512_u *)__P = __A;
  5598. }
  5599. extern __inline void
  5600. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5601. _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
  5602. {
  5603. __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
  5604. (__mmask16) __U);
  5605. }
  5606. extern __inline __m128
  5607. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5608. _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
  5609. {
  5610. return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
  5611. }
  5612. extern __inline __m128
  5613. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5614. _mm_maskz_load_ss (__mmask8 __U, const float *__P)
  5615. {
  5616. return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (),
  5617. __U);
  5618. }
  5619. extern __inline __m128d
  5620. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5621. _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
  5622. {
  5623. return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
  5624. }
  5625. extern __inline __m128d
  5626. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5627. _mm_maskz_load_sd (__mmask8 __U, const double *__P)
  5628. {
  5629. return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (),
  5630. __U);
  5631. }
  5632. extern __inline __m128
  5633. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5634. _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  5635. {
  5636. return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
  5637. (__v4sf) __W, __U);
  5638. }
  5639. extern __inline __m128
  5640. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5641. _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
  5642. {
  5643. return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
  5644. (__v4sf) _mm_setzero_ps (), __U);
  5645. }
  5646. extern __inline __m128d
  5647. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5648. _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  5649. {
  5650. return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
  5651. (__v2df) __W, __U);
  5652. }
  5653. extern __inline __m128d
  5654. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5655. _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
  5656. {
  5657. return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
  5658. (__v2df) _mm_setzero_pd (),
  5659. __U);
  5660. }
  5661. extern __inline void
  5662. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5663. _mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
  5664. {
  5665. __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
  5666. }
  5667. extern __inline void
  5668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5669. _mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
  5670. {
  5671. __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
  5672. }
  5673. extern __inline __m512i
  5674. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5675. _mm512_loadu_epi64 (void const *__P)
  5676. {
  5677. return *(__m512i_u *) __P;
  5678. }
  5679. extern __inline __m512i
  5680. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5681. _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
  5682. {
  5683. return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
  5684. (__v8di) __W,
  5685. (__mmask8) __U);
  5686. }
  5687. extern __inline __m512i
  5688. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5689. _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
  5690. {
  5691. return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
  5692. (__v8di)
  5693. _mm512_setzero_si512 (),
  5694. (__mmask8) __U);
  5695. }
  5696. extern __inline void
  5697. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5698. _mm512_storeu_epi64 (void *__P, __m512i __A)
  5699. {
  5700. *(__m512i_u *) __P = (__m512i_u) __A;
  5701. }
  5702. extern __inline void
  5703. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5704. _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
  5705. {
  5706. __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
  5707. (__mmask8) __U);
  5708. }
  5709. extern __inline __m512i
  5710. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5711. _mm512_loadu_si512 (void const *__P)
  5712. {
  5713. return *(__m512i_u *)__P;
  5714. }
  5715. extern __inline __m512i
  5716. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5717. _mm512_loadu_epi32 (void const *__P)
  5718. {
  5719. return *(__m512i_u *) __P;
  5720. }
  5721. extern __inline __m512i
  5722. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5723. _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
  5724. {
  5725. return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
  5726. (__v16si) __W,
  5727. (__mmask16) __U);
  5728. }
  5729. extern __inline __m512i
  5730. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5731. _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
  5732. {
  5733. return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
  5734. (__v16si)
  5735. _mm512_setzero_si512 (),
  5736. (__mmask16) __U);
  5737. }
  5738. extern __inline void
  5739. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5740. _mm512_storeu_si512 (void *__P, __m512i __A)
  5741. {
  5742. *(__m512i_u *)__P = __A;
  5743. }
  5744. extern __inline void
  5745. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5746. _mm512_storeu_epi32 (void *__P, __m512i __A)
  5747. {
  5748. *(__m512i_u *) __P = (__m512i_u) __A;
  5749. }
  5750. extern __inline void
  5751. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5752. _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
  5753. {
  5754. __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
  5755. (__mmask16) __U);
  5756. }
  5757. extern __inline __m512d
  5758. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5759. _mm512_permutevar_pd (__m512d __A, __m512i __C)
  5760. {
  5761. return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
  5762. (__v8di) __C,
  5763. (__v8df)
  5764. _mm512_undefined_pd (),
  5765. (__mmask8) -1);
  5766. }
  5767. extern __inline __m512d
  5768. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5769. _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
  5770. {
  5771. return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
  5772. (__v8di) __C,
  5773. (__v8df) __W,
  5774. (__mmask8) __U);
  5775. }
  5776. extern __inline __m512d
  5777. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5778. _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
  5779. {
  5780. return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
  5781. (__v8di) __C,
  5782. (__v8df)
  5783. _mm512_setzero_pd (),
  5784. (__mmask8) __U);
  5785. }
  5786. extern __inline __m512
  5787. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5788. _mm512_permutevar_ps (__m512 __A, __m512i __C)
  5789. {
  5790. return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
  5791. (__v16si) __C,
  5792. (__v16sf)
  5793. _mm512_undefined_ps (),
  5794. (__mmask16) -1);
  5795. }
  5796. extern __inline __m512
  5797. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5798. _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
  5799. {
  5800. return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
  5801. (__v16si) __C,
  5802. (__v16sf) __W,
  5803. (__mmask16) __U);
  5804. }
  5805. extern __inline __m512
  5806. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5807. _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
  5808. {
  5809. return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
  5810. (__v16si) __C,
  5811. (__v16sf)
  5812. _mm512_setzero_ps (),
  5813. (__mmask16) __U);
  5814. }
  5815. extern __inline __m512i
  5816. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5817. _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
  5818. {
  5819. return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
  5820. /* idx */ ,
  5821. (__v8di) __A,
  5822. (__v8di) __B,
  5823. (__mmask8) -1);
  5824. }
  5825. extern __inline __m512i
  5826. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5827. _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
  5828. __m512i __B)
  5829. {
  5830. return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
  5831. /* idx */ ,
  5832. (__v8di) __A,
  5833. (__v8di) __B,
  5834. (__mmask8) __U);
  5835. }
  5836. extern __inline __m512i
  5837. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5838. _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
  5839. __mmask8 __U, __m512i __B)
  5840. {
  5841. return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
  5842. (__v8di) __I
  5843. /* idx */ ,
  5844. (__v8di) __B,
  5845. (__mmask8) __U);
  5846. }
  5847. extern __inline __m512i
  5848. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5849. _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
  5850. __m512i __I, __m512i __B)
  5851. {
  5852. return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
  5853. /* idx */ ,
  5854. (__v8di) __A,
  5855. (__v8di) __B,
  5856. (__mmask8) __U);
  5857. }
  5858. extern __inline __m512i
  5859. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5860. _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
  5861. {
  5862. return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
  5863. /* idx */ ,
  5864. (__v16si) __A,
  5865. (__v16si) __B,
  5866. (__mmask16) -1);
  5867. }
  5868. extern __inline __m512i
  5869. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5870. _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
  5871. __m512i __I, __m512i __B)
  5872. {
  5873. return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
  5874. /* idx */ ,
  5875. (__v16si) __A,
  5876. (__v16si) __B,
  5877. (__mmask16) __U);
  5878. }
  5879. extern __inline __m512i
  5880. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5881. _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
  5882. __mmask16 __U, __m512i __B)
  5883. {
  5884. return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
  5885. (__v16si) __I
  5886. /* idx */ ,
  5887. (__v16si) __B,
  5888. (__mmask16) __U);
  5889. }
  5890. extern __inline __m512i
  5891. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5892. _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
  5893. __m512i __I, __m512i __B)
  5894. {
  5895. return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
  5896. /* idx */ ,
  5897. (__v16si) __A,
  5898. (__v16si) __B,
  5899. (__mmask16) __U);
  5900. }
  5901. extern __inline __m512d
  5902. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5903. _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
  5904. {
  5905. return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
  5906. /* idx */ ,
  5907. (__v8df) __A,
  5908. (__v8df) __B,
  5909. (__mmask8) -1);
  5910. }
  5911. extern __inline __m512d
  5912. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5913. _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
  5914. __m512d __B)
  5915. {
  5916. return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
  5917. /* idx */ ,
  5918. (__v8df) __A,
  5919. (__v8df) __B,
  5920. (__mmask8) __U);
  5921. }
  5922. extern __inline __m512d
  5923. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5924. _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
  5925. __m512d __B)
  5926. {
  5927. return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
  5928. (__v8di) __I
  5929. /* idx */ ,
  5930. (__v8df) __B,
  5931. (__mmask8) __U);
  5932. }
  5933. extern __inline __m512d
  5934. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5935. _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
  5936. __m512d __B)
  5937. {
  5938. return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
  5939. /* idx */ ,
  5940. (__v8df) __A,
  5941. (__v8df) __B,
  5942. (__mmask8) __U);
  5943. }
  5944. extern __inline __m512
  5945. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5946. _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
  5947. {
  5948. return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
  5949. /* idx */ ,
  5950. (__v16sf) __A,
  5951. (__v16sf) __B,
  5952. (__mmask16) -1);
  5953. }
  5954. extern __inline __m512
  5955. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5956. _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
  5957. {
  5958. return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
  5959. /* idx */ ,
  5960. (__v16sf) __A,
  5961. (__v16sf) __B,
  5962. (__mmask16) __U);
  5963. }
  5964. extern __inline __m512
  5965. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5966. _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
  5967. __m512 __B)
  5968. {
  5969. return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
  5970. (__v16si) __I
  5971. /* idx */ ,
  5972. (__v16sf) __B,
  5973. (__mmask16) __U);
  5974. }
  5975. extern __inline __m512
  5976. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5977. _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
  5978. __m512 __B)
  5979. {
  5980. return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
  5981. /* idx */ ,
  5982. (__v16sf) __A,
  5983. (__v16sf) __B,
  5984. (__mmask16) __U);
  5985. }
  5986. #ifdef __OPTIMIZE__
  5987. extern __inline __m512d
  5988. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5989. _mm512_permute_pd (__m512d __X, const int __C)
  5990. {
  5991. return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
  5992. (__v8df)
  5993. _mm512_undefined_pd (),
  5994. (__mmask8) -1);
  5995. }
  5996. extern __inline __m512d
  5997. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  5998. _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
  5999. {
  6000. return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
  6001. (__v8df) __W,
  6002. (__mmask8) __U);
  6003. }
  6004. extern __inline __m512d
  6005. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6006. _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
  6007. {
  6008. return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
  6009. (__v8df)
  6010. _mm512_setzero_pd (),
  6011. (__mmask8) __U);
  6012. }
  6013. extern __inline __m512
  6014. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6015. _mm512_permute_ps (__m512 __X, const int __C)
  6016. {
  6017. return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
  6018. (__v16sf)
  6019. _mm512_undefined_ps (),
  6020. (__mmask16) -1);
  6021. }
  6022. extern __inline __m512
  6023. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6024. _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
  6025. {
  6026. return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
  6027. (__v16sf) __W,
  6028. (__mmask16) __U);
  6029. }
  6030. extern __inline __m512
  6031. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6032. _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
  6033. {
  6034. return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
  6035. (__v16sf)
  6036. _mm512_setzero_ps (),
  6037. (__mmask16) __U);
  6038. }
  6039. #else
  6040. #define _mm512_permute_pd(X, C) \
  6041. ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
  6042. (__v8df)(__m512d)_mm512_undefined_pd(),\
  6043. (__mmask8)(-1)))
  6044. #define _mm512_mask_permute_pd(W, U, X, C) \
  6045. ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
  6046. (__v8df)(__m512d)(W), \
  6047. (__mmask8)(U)))
  6048. #define _mm512_maskz_permute_pd(U, X, C) \
  6049. ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
  6050. (__v8df)(__m512d)_mm512_setzero_pd(), \
  6051. (__mmask8)(U)))
  6052. #define _mm512_permute_ps(X, C) \
  6053. ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
  6054. (__v16sf)(__m512)_mm512_undefined_ps(),\
  6055. (__mmask16)(-1)))
  6056. #define _mm512_mask_permute_ps(W, U, X, C) \
  6057. ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
  6058. (__v16sf)(__m512)(W), \
  6059. (__mmask16)(U)))
  6060. #define _mm512_maskz_permute_ps(U, X, C) \
  6061. ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
  6062. (__v16sf)(__m512)_mm512_setzero_ps(), \
  6063. (__mmask16)(U)))
  6064. #endif
  6065. #ifdef __OPTIMIZE__
  6066. extern __inline __m512i
  6067. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6068. _mm512_permutex_epi64 (__m512i __X, const int __I)
  6069. {
  6070. return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
  6071. (__v8di)
  6072. _mm512_undefined_epi32 (),
  6073. (__mmask8) (-1));
  6074. }
  6075. extern __inline __m512i
  6076. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6077. _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
  6078. __m512i __X, const int __I)
  6079. {
  6080. return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
  6081. (__v8di) __W,
  6082. (__mmask8) __M);
  6083. }
  6084. extern __inline __m512i
  6085. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6086. _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
  6087. {
  6088. return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
  6089. (__v8di)
  6090. _mm512_setzero_si512 (),
  6091. (__mmask8) __M);
  6092. }
  6093. extern __inline __m512d
  6094. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6095. _mm512_permutex_pd (__m512d __X, const int __M)
  6096. {
  6097. return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
  6098. (__v8df)
  6099. _mm512_undefined_pd (),
  6100. (__mmask8) -1);
  6101. }
  6102. extern __inline __m512d
  6103. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6104. _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
  6105. {
  6106. return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
  6107. (__v8df) __W,
  6108. (__mmask8) __U);
  6109. }
  6110. extern __inline __m512d
  6111. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6112. _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
  6113. {
  6114. return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
  6115. (__v8df)
  6116. _mm512_setzero_pd (),
  6117. (__mmask8) __U);
  6118. }
  6119. #else
  6120. #define _mm512_permutex_pd(X, M) \
  6121. ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
  6122. (__v8df)(__m512d)_mm512_undefined_pd(),\
  6123. (__mmask8)-1))
  6124. #define _mm512_mask_permutex_pd(W, U, X, M) \
  6125. ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
  6126. (__v8df)(__m512d)(W), (__mmask8)(U)))
  6127. #define _mm512_maskz_permutex_pd(U, X, M) \
  6128. ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
  6129. (__v8df)(__m512d)_mm512_setzero_pd(),\
  6130. (__mmask8)(U)))
  6131. #define _mm512_permutex_epi64(X, I) \
  6132. ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
  6133. (int)(I), \
  6134. (__v8di)(__m512i) \
  6135. (_mm512_undefined_epi32 ()),\
  6136. (__mmask8)(-1)))
  6137. #define _mm512_maskz_permutex_epi64(M, X, I) \
  6138. ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
  6139. (int)(I), \
  6140. (__v8di)(__m512i) \
  6141. (_mm512_setzero_si512 ()),\
  6142. (__mmask8)(M)))
  6143. #define _mm512_mask_permutex_epi64(W, M, X, I) \
  6144. ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
  6145. (int)(I), \
  6146. (__v8di)(__m512i)(W), \
  6147. (__mmask8)(M)))
  6148. #endif
  6149. extern __inline __m512i
  6150. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6151. _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
  6152. {
  6153. return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
  6154. (__v8di) __X,
  6155. (__v8di)
  6156. _mm512_setzero_si512 (),
  6157. __M);
  6158. }
  6159. extern __inline __m512i
  6160. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6161. _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
  6162. {
  6163. return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
  6164. (__v8di) __X,
  6165. (__v8di)
  6166. _mm512_undefined_epi32 (),
  6167. (__mmask8) -1);
  6168. }
  6169. extern __inline __m512i
  6170. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6171. _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
  6172. __m512i __Y)
  6173. {
  6174. return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
  6175. (__v8di) __X,
  6176. (__v8di) __W,
  6177. __M);
  6178. }
  6179. extern __inline __m512i
  6180. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6181. _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
  6182. {
  6183. return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
  6184. (__v16si) __X,
  6185. (__v16si)
  6186. _mm512_setzero_si512 (),
  6187. __M);
  6188. }
  6189. extern __inline __m512i
  6190. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6191. _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
  6192. {
  6193. return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
  6194. (__v16si) __X,
  6195. (__v16si)
  6196. _mm512_undefined_epi32 (),
  6197. (__mmask16) -1);
  6198. }
  6199. extern __inline __m512i
  6200. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6201. _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
  6202. __m512i __Y)
  6203. {
  6204. return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
  6205. (__v16si) __X,
  6206. (__v16si) __W,
  6207. __M);
  6208. }
  6209. extern __inline __m512d
  6210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6211. _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
  6212. {
  6213. return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
  6214. (__v8di) __X,
  6215. (__v8df)
  6216. _mm512_undefined_pd (),
  6217. (__mmask8) -1);
  6218. }
  6219. extern __inline __m512d
  6220. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6221. _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
  6222. {
  6223. return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
  6224. (__v8di) __X,
  6225. (__v8df) __W,
  6226. (__mmask8) __U);
  6227. }
  6228. extern __inline __m512d
  6229. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6230. _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
  6231. {
  6232. return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
  6233. (__v8di) __X,
  6234. (__v8df)
  6235. _mm512_setzero_pd (),
  6236. (__mmask8) __U);
  6237. }
  6238. extern __inline __m512
  6239. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6240. _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
  6241. {
  6242. return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
  6243. (__v16si) __X,
  6244. (__v16sf)
  6245. _mm512_undefined_ps (),
  6246. (__mmask16) -1);
  6247. }
  6248. extern __inline __m512
  6249. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6250. _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
  6251. {
  6252. return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
  6253. (__v16si) __X,
  6254. (__v16sf) __W,
  6255. (__mmask16) __U);
  6256. }
  6257. extern __inline __m512
  6258. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6259. _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
  6260. {
  6261. return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
  6262. (__v16si) __X,
  6263. (__v16sf)
  6264. _mm512_setzero_ps (),
  6265. (__mmask16) __U);
  6266. }
  6267. #ifdef __OPTIMIZE__
  6268. extern __inline __m512
  6269. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6270. _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
  6271. {
  6272. return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
  6273. (__v16sf) __V, __imm,
  6274. (__v16sf)
  6275. _mm512_undefined_ps (),
  6276. (__mmask16) -1);
  6277. }
  6278. extern __inline __m512
  6279. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6280. _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
  6281. __m512 __V, const int __imm)
  6282. {
  6283. return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
  6284. (__v16sf) __V, __imm,
  6285. (__v16sf) __W,
  6286. (__mmask16) __U);
  6287. }
  6288. extern __inline __m512
  6289. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6290. _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
  6291. {
  6292. return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
  6293. (__v16sf) __V, __imm,
  6294. (__v16sf)
  6295. _mm512_setzero_ps (),
  6296. (__mmask16) __U);
  6297. }
  6298. extern __inline __m512d
  6299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6300. _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
  6301. {
  6302. return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
  6303. (__v8df) __V, __imm,
  6304. (__v8df)
  6305. _mm512_undefined_pd (),
  6306. (__mmask8) -1);
  6307. }
  6308. extern __inline __m512d
  6309. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6310. _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
  6311. __m512d __V, const int __imm)
  6312. {
  6313. return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
  6314. (__v8df) __V, __imm,
  6315. (__v8df) __W,
  6316. (__mmask8) __U);
  6317. }
  6318. extern __inline __m512d
  6319. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6320. _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
  6321. const int __imm)
  6322. {
  6323. return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
  6324. (__v8df) __V, __imm,
  6325. (__v8df)
  6326. _mm512_setzero_pd (),
  6327. (__mmask8) __U);
  6328. }
  6329. extern __inline __m512d
  6330. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6331. _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
  6332. const int __imm, const int __R)
  6333. {
  6334. return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
  6335. (__v8df) __B,
  6336. (__v8di) __C,
  6337. __imm,
  6338. (__mmask8) -1, __R);
  6339. }
  6340. extern __inline __m512d
  6341. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6342. _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
  6343. __m512i __C, const int __imm, const int __R)
  6344. {
  6345. return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
  6346. (__v8df) __B,
  6347. (__v8di) __C,
  6348. __imm,
  6349. (__mmask8) __U, __R);
  6350. }
  6351. extern __inline __m512d
  6352. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6353. _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
  6354. __m512i __C, const int __imm, const int __R)
  6355. {
  6356. return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
  6357. (__v8df) __B,
  6358. (__v8di) __C,
  6359. __imm,
  6360. (__mmask8) __U, __R);
  6361. }
  6362. extern __inline __m512
  6363. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6364. _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
  6365. const int __imm, const int __R)
  6366. {
  6367. return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
  6368. (__v16sf) __B,
  6369. (__v16si) __C,
  6370. __imm,
  6371. (__mmask16) -1, __R);
  6372. }
  6373. extern __inline __m512
  6374. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6375. _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
  6376. __m512i __C, const int __imm, const int __R)
  6377. {
  6378. return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
  6379. (__v16sf) __B,
  6380. (__v16si) __C,
  6381. __imm,
  6382. (__mmask16) __U, __R);
  6383. }
  6384. extern __inline __m512
  6385. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6386. _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
  6387. __m512i __C, const int __imm, const int __R)
  6388. {
  6389. return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
  6390. (__v16sf) __B,
  6391. (__v16si) __C,
  6392. __imm,
  6393. (__mmask16) __U, __R);
  6394. }
  6395. extern __inline __m128d
  6396. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6397. _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
  6398. const int __imm, const int __R)
  6399. {
  6400. return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
  6401. (__v2df) __B,
  6402. (__v2di) __C, __imm,
  6403. (__mmask8) -1, __R);
  6404. }
  6405. extern __inline __m128d
  6406. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6407. _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
  6408. __m128i __C, const int __imm, const int __R)
  6409. {
  6410. return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
  6411. (__v2df) __B,
  6412. (__v2di) __C, __imm,
  6413. (__mmask8) __U, __R);
  6414. }
  6415. extern __inline __m128d
  6416. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6417. _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  6418. __m128i __C, const int __imm, const int __R)
  6419. {
  6420. return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
  6421. (__v2df) __B,
  6422. (__v2di) __C,
  6423. __imm,
  6424. (__mmask8) __U, __R);
  6425. }
  6426. extern __inline __m128
  6427. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6428. _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
  6429. const int __imm, const int __R)
  6430. {
  6431. return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
  6432. (__v4sf) __B,
  6433. (__v4si) __C, __imm,
  6434. (__mmask8) -1, __R);
  6435. }
  6436. extern __inline __m128
  6437. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6438. _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
  6439. __m128i __C, const int __imm, const int __R)
  6440. {
  6441. return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
  6442. (__v4sf) __B,
  6443. (__v4si) __C, __imm,
  6444. (__mmask8) __U, __R);
  6445. }
  6446. extern __inline __m128
  6447. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6448. _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  6449. __m128i __C, const int __imm, const int __R)
  6450. {
  6451. return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
  6452. (__v4sf) __B,
  6453. (__v4si) __C, __imm,
  6454. (__mmask8) __U, __R);
  6455. }
  6456. #else
  6457. #define _mm512_shuffle_pd(X, Y, C) \
  6458. ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
  6459. (__v8df)(__m512d)(Y), (int)(C),\
  6460. (__v8df)(__m512d)_mm512_undefined_pd(),\
  6461. (__mmask8)-1))
  6462. #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
  6463. ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
  6464. (__v8df)(__m512d)(Y), (int)(C),\
  6465. (__v8df)(__m512d)(W),\
  6466. (__mmask8)(U)))
  6467. #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
  6468. ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
  6469. (__v8df)(__m512d)(Y), (int)(C),\
  6470. (__v8df)(__m512d)_mm512_setzero_pd(),\
  6471. (__mmask8)(U)))
  6472. #define _mm512_shuffle_ps(X, Y, C) \
  6473. ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
  6474. (__v16sf)(__m512)(Y), (int)(C),\
  6475. (__v16sf)(__m512)_mm512_undefined_ps(),\
  6476. (__mmask16)-1))
  6477. #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
  6478. ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
  6479. (__v16sf)(__m512)(Y), (int)(C),\
  6480. (__v16sf)(__m512)(W),\
  6481. (__mmask16)(U)))
  6482. #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
  6483. ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
  6484. (__v16sf)(__m512)(Y), (int)(C),\
  6485. (__v16sf)(__m512)_mm512_setzero_ps(),\
  6486. (__mmask16)(U)))
  6487. #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
  6488. ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
  6489. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  6490. (__mmask8)(-1), (R)))
  6491. #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
  6492. ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
  6493. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  6494. (__mmask8)(U), (R)))
  6495. #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
  6496. ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
  6497. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  6498. (__mmask8)(U), (R)))
  6499. #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
  6500. ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
  6501. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  6502. (__mmask16)(-1), (R)))
  6503. #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
  6504. ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
  6505. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  6506. (__mmask16)(U), (R)))
  6507. #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
  6508. ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
  6509. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  6510. (__mmask16)(U), (R)))
  6511. #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
  6512. ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
  6513. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  6514. (__mmask8)(-1), (R)))
  6515. #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
  6516. ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
  6517. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  6518. (__mmask8)(U), (R)))
  6519. #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
  6520. ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
  6521. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  6522. (__mmask8)(U), (R)))
  6523. #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
  6524. ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
  6525. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  6526. (__mmask8)(-1), (R)))
  6527. #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
  6528. ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
  6529. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  6530. (__mmask8)(U), (R)))
  6531. #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
  6532. ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
  6533. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  6534. (__mmask8)(U), (R)))
  6535. #endif
  6536. extern __inline __m512
  6537. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6538. _mm512_movehdup_ps (__m512 __A)
  6539. {
  6540. return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
  6541. (__v16sf)
  6542. _mm512_undefined_ps (),
  6543. (__mmask16) -1);
  6544. }
  6545. extern __inline __m512
  6546. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6547. _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
  6548. {
  6549. return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
  6550. (__v16sf) __W,
  6551. (__mmask16) __U);
  6552. }
  6553. extern __inline __m512
  6554. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6555. _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
  6556. {
  6557. return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
  6558. (__v16sf)
  6559. _mm512_setzero_ps (),
  6560. (__mmask16) __U);
  6561. }
  6562. extern __inline __m512
  6563. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6564. _mm512_moveldup_ps (__m512 __A)
  6565. {
  6566. return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
  6567. (__v16sf)
  6568. _mm512_undefined_ps (),
  6569. (__mmask16) -1);
  6570. }
  6571. extern __inline __m512
  6572. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6573. _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
  6574. {
  6575. return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
  6576. (__v16sf) __W,
  6577. (__mmask16) __U);
  6578. }
  6579. extern __inline __m512
  6580. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6581. _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
  6582. {
  6583. return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
  6584. (__v16sf)
  6585. _mm512_setzero_ps (),
  6586. (__mmask16) __U);
  6587. }
  6588. extern __inline __m512i
  6589. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6590. _mm512_or_si512 (__m512i __A, __m512i __B)
  6591. {
  6592. return (__m512i) ((__v16su) __A | (__v16su) __B);
  6593. }
  6594. extern __inline __m512i
  6595. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6596. _mm512_or_epi32 (__m512i __A, __m512i __B)
  6597. {
  6598. return (__m512i) ((__v16su) __A | (__v16su) __B);
  6599. }
  6600. extern __inline __m512i
  6601. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6602. _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  6603. {
  6604. return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
  6605. (__v16si) __B,
  6606. (__v16si) __W,
  6607. (__mmask16) __U);
  6608. }
  6609. extern __inline __m512i
  6610. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6611. _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  6612. {
  6613. return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
  6614. (__v16si) __B,
  6615. (__v16si)
  6616. _mm512_setzero_si512 (),
  6617. (__mmask16) __U);
  6618. }
  6619. extern __inline __m512i
  6620. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6621. _mm512_or_epi64 (__m512i __A, __m512i __B)
  6622. {
  6623. return (__m512i) ((__v8du) __A | (__v8du) __B);
  6624. }
  6625. extern __inline __m512i
  6626. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6627. _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  6628. {
  6629. return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
  6630. (__v8di) __B,
  6631. (__v8di) __W,
  6632. (__mmask8) __U);
  6633. }
  6634. extern __inline __m512i
  6635. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6636. _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  6637. {
  6638. return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
  6639. (__v8di) __B,
  6640. (__v8di)
  6641. _mm512_setzero_si512 (),
  6642. (__mmask8) __U);
  6643. }
  6644. extern __inline __m512i
  6645. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6646. _mm512_xor_si512 (__m512i __A, __m512i __B)
  6647. {
  6648. return (__m512i) ((__v16su) __A ^ (__v16su) __B);
  6649. }
  6650. extern __inline __m512i
  6651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6652. _mm512_xor_epi32 (__m512i __A, __m512i __B)
  6653. {
  6654. return (__m512i) ((__v16su) __A ^ (__v16su) __B);
  6655. }
  6656. extern __inline __m512i
  6657. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6658. _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  6659. {
  6660. return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
  6661. (__v16si) __B,
  6662. (__v16si) __W,
  6663. (__mmask16) __U);
  6664. }
  6665. extern __inline __m512i
  6666. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6667. _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  6668. {
  6669. return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
  6670. (__v16si) __B,
  6671. (__v16si)
  6672. _mm512_setzero_si512 (),
  6673. (__mmask16) __U);
  6674. }
  6675. extern __inline __m512i
  6676. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6677. _mm512_xor_epi64 (__m512i __A, __m512i __B)
  6678. {
  6679. return (__m512i) ((__v8du) __A ^ (__v8du) __B);
  6680. }
  6681. extern __inline __m512i
  6682. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6683. _mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  6684. {
  6685. return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
  6686. (__v8di) __B,
  6687. (__v8di) __W,
  6688. (__mmask8) __U);
  6689. }
  6690. extern __inline __m512i
  6691. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6692. _mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  6693. {
  6694. return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
  6695. (__v8di) __B,
  6696. (__v8di)
  6697. _mm512_setzero_si512 (),
  6698. (__mmask8) __U);
  6699. }
  6700. #ifdef __OPTIMIZE__
  6701. extern __inline __m512i
  6702. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6703. _mm512_rol_epi32 (__m512i __A, const int __B)
  6704. {
  6705. return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
  6706. (__v16si)
  6707. _mm512_undefined_epi32 (),
  6708. (__mmask16) -1);
  6709. }
  6710. extern __inline __m512i
  6711. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6712. _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
  6713. {
  6714. return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
  6715. (__v16si) __W,
  6716. (__mmask16) __U);
  6717. }
  6718. extern __inline __m512i
  6719. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6720. _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
  6721. {
  6722. return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
  6723. (__v16si)
  6724. _mm512_setzero_si512 (),
  6725. (__mmask16) __U);
  6726. }
  6727. extern __inline __m512i
  6728. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6729. _mm512_ror_epi32 (__m512i __A, int __B)
  6730. {
  6731. return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
  6732. (__v16si)
  6733. _mm512_undefined_epi32 (),
  6734. (__mmask16) -1);
  6735. }
  6736. extern __inline __m512i
  6737. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6738. _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
  6739. {
  6740. return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
  6741. (__v16si) __W,
  6742. (__mmask16) __U);
  6743. }
  6744. extern __inline __m512i
  6745. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6746. _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
  6747. {
  6748. return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
  6749. (__v16si)
  6750. _mm512_setzero_si512 (),
  6751. (__mmask16) __U);
  6752. }
  6753. extern __inline __m512i
  6754. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6755. _mm512_rol_epi64 (__m512i __A, const int __B)
  6756. {
  6757. return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
  6758. (__v8di)
  6759. _mm512_undefined_epi32 (),
  6760. (__mmask8) -1);
  6761. }
  6762. extern __inline __m512i
  6763. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6764. _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
  6765. {
  6766. return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
  6767. (__v8di) __W,
  6768. (__mmask8) __U);
  6769. }
  6770. extern __inline __m512i
  6771. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6772. _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
  6773. {
  6774. return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
  6775. (__v8di)
  6776. _mm512_setzero_si512 (),
  6777. (__mmask8) __U);
  6778. }
  6779. extern __inline __m512i
  6780. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6781. _mm512_ror_epi64 (__m512i __A, int __B)
  6782. {
  6783. return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
  6784. (__v8di)
  6785. _mm512_undefined_epi32 (),
  6786. (__mmask8) -1);
  6787. }
  6788. extern __inline __m512i
  6789. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6790. _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
  6791. {
  6792. return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
  6793. (__v8di) __W,
  6794. (__mmask8) __U);
  6795. }
  6796. extern __inline __m512i
  6797. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6798. _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
  6799. {
  6800. return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
  6801. (__v8di)
  6802. _mm512_setzero_si512 (),
  6803. (__mmask8) __U);
  6804. }
  6805. #else
  6806. #define _mm512_rol_epi32(A, B) \
  6807. ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
  6808. (int)(B), \
  6809. (__v16si)_mm512_undefined_epi32 (), \
  6810. (__mmask16)(-1)))
  6811. #define _mm512_mask_rol_epi32(W, U, A, B) \
  6812. ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
  6813. (int)(B), \
  6814. (__v16si)(__m512i)(W), \
  6815. (__mmask16)(U)))
  6816. #define _mm512_maskz_rol_epi32(U, A, B) \
  6817. ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
  6818. (int)(B), \
  6819. (__v16si)_mm512_setzero_si512 (), \
  6820. (__mmask16)(U)))
  6821. #define _mm512_ror_epi32(A, B) \
  6822. ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
  6823. (int)(B), \
  6824. (__v16si)_mm512_undefined_epi32 (), \
  6825. (__mmask16)(-1)))
  6826. #define _mm512_mask_ror_epi32(W, U, A, B) \
  6827. ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
  6828. (int)(B), \
  6829. (__v16si)(__m512i)(W), \
  6830. (__mmask16)(U)))
  6831. #define _mm512_maskz_ror_epi32(U, A, B) \
  6832. ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
  6833. (int)(B), \
  6834. (__v16si)_mm512_setzero_si512 (), \
  6835. (__mmask16)(U)))
  6836. #define _mm512_rol_epi64(A, B) \
  6837. ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
  6838. (int)(B), \
  6839. (__v8di)_mm512_undefined_epi32 (), \
  6840. (__mmask8)(-1)))
  6841. #define _mm512_mask_rol_epi64(W, U, A, B) \
  6842. ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
  6843. (int)(B), \
  6844. (__v8di)(__m512i)(W), \
  6845. (__mmask8)(U)))
  6846. #define _mm512_maskz_rol_epi64(U, A, B) \
  6847. ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
  6848. (int)(B), \
  6849. (__v8di)_mm512_setzero_si512 (), \
  6850. (__mmask8)(U)))
  6851. #define _mm512_ror_epi64(A, B) \
  6852. ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
  6853. (int)(B), \
  6854. (__v8di)_mm512_undefined_epi32 (), \
  6855. (__mmask8)(-1)))
  6856. #define _mm512_mask_ror_epi64(W, U, A, B) \
  6857. ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
  6858. (int)(B), \
  6859. (__v8di)(__m512i)(W), \
  6860. (__mmask8)(U)))
  6861. #define _mm512_maskz_ror_epi64(U, A, B) \
  6862. ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
  6863. (int)(B), \
  6864. (__v8di)_mm512_setzero_si512 (), \
  6865. (__mmask8)(U)))
  6866. #endif
  6867. extern __inline __m512i
  6868. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6869. _mm512_and_si512 (__m512i __A, __m512i __B)
  6870. {
  6871. return (__m512i) ((__v16su) __A & (__v16su) __B);
  6872. }
  6873. extern __inline __m512i
  6874. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6875. _mm512_and_epi32 (__m512i __A, __m512i __B)
  6876. {
  6877. return (__m512i) ((__v16su) __A & (__v16su) __B);
  6878. }
  6879. extern __inline __m512i
  6880. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6881. _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  6882. {
  6883. return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
  6884. (__v16si) __B,
  6885. (__v16si) __W,
  6886. (__mmask16) __U);
  6887. }
  6888. extern __inline __m512i
  6889. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6890. _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  6891. {
  6892. return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
  6893. (__v16si) __B,
  6894. (__v16si)
  6895. _mm512_setzero_si512 (),
  6896. (__mmask16) __U);
  6897. }
  6898. extern __inline __m512i
  6899. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6900. _mm512_and_epi64 (__m512i __A, __m512i __B)
  6901. {
  6902. return (__m512i) ((__v8du) __A & (__v8du) __B);
  6903. }
  6904. extern __inline __m512i
  6905. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6906. _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  6907. {
  6908. return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
  6909. (__v8di) __B,
  6910. (__v8di) __W, __U);
  6911. }
  6912. extern __inline __m512i
  6913. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6914. _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  6915. {
  6916. return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
  6917. (__v8di) __B,
  6918. (__v8di)
  6919. _mm512_setzero_pd (),
  6920. __U);
  6921. }
  6922. extern __inline __m512i
  6923. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6924. _mm512_andnot_si512 (__m512i __A, __m512i __B)
  6925. {
  6926. return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
  6927. (__v16si) __B,
  6928. (__v16si)
  6929. _mm512_undefined_epi32 (),
  6930. (__mmask16) -1);
  6931. }
  6932. extern __inline __m512i
  6933. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6934. _mm512_andnot_epi32 (__m512i __A, __m512i __B)
  6935. {
  6936. return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
  6937. (__v16si) __B,
  6938. (__v16si)
  6939. _mm512_undefined_epi32 (),
  6940. (__mmask16) -1);
  6941. }
  6942. extern __inline __m512i
  6943. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6944. _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
  6945. {
  6946. return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
  6947. (__v16si) __B,
  6948. (__v16si) __W,
  6949. (__mmask16) __U);
  6950. }
  6951. extern __inline __m512i
  6952. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6953. _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  6954. {
  6955. return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
  6956. (__v16si) __B,
  6957. (__v16si)
  6958. _mm512_setzero_si512 (),
  6959. (__mmask16) __U);
  6960. }
  6961. extern __inline __m512i
  6962. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6963. _mm512_andnot_epi64 (__m512i __A, __m512i __B)
  6964. {
  6965. return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
  6966. (__v8di) __B,
  6967. (__v8di)
  6968. _mm512_undefined_epi32 (),
  6969. (__mmask8) -1);
  6970. }
  6971. extern __inline __m512i
  6972. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6973. _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  6974. {
  6975. return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
  6976. (__v8di) __B,
  6977. (__v8di) __W, __U);
  6978. }
  6979. extern __inline __m512i
  6980. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6981. _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  6982. {
  6983. return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
  6984. (__v8di) __B,
  6985. (__v8di)
  6986. _mm512_setzero_pd (),
  6987. __U);
  6988. }
  6989. extern __inline __mmask16
  6990. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6991. _mm512_test_epi32_mask (__m512i __A, __m512i __B)
  6992. {
  6993. return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
  6994. (__v16si) __B,
  6995. (__mmask16) -1);
  6996. }
  6997. extern __inline __mmask16
  6998. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  6999. _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  7000. {
  7001. return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
  7002. (__v16si) __B, __U);
  7003. }
  7004. extern __inline __mmask8
  7005. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7006. _mm512_test_epi64_mask (__m512i __A, __m512i __B)
  7007. {
  7008. return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
  7009. (__v8di) __B,
  7010. (__mmask8) -1);
  7011. }
  7012. extern __inline __mmask8
  7013. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7014. _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  7015. {
  7016. return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
  7017. }
  7018. extern __inline __mmask16
  7019. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7020. _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
  7021. {
  7022. return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
  7023. (__v16si) __B,
  7024. (__mmask16) -1);
  7025. }
  7026. extern __inline __mmask16
  7027. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7028. _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  7029. {
  7030. return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
  7031. (__v16si) __B, __U);
  7032. }
  7033. extern __inline __mmask8
  7034. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7035. _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
  7036. {
  7037. return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
  7038. (__v8di) __B,
  7039. (__mmask8) -1);
  7040. }
  7041. extern __inline __mmask8
  7042. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7043. _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  7044. {
  7045. return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
  7046. (__v8di) __B, __U);
  7047. }
  7048. extern __inline __m512
  7049. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7050. _mm512_abs_ps (__m512 __A)
  7051. {
  7052. return (__m512) _mm512_and_epi32 ((__m512i) __A,
  7053. _mm512_set1_epi32 (0x7fffffff));
  7054. }
  7055. extern __inline __m512
  7056. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7057. _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
  7058. {
  7059. return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
  7060. _mm512_set1_epi32 (0x7fffffff));
  7061. }
  7062. extern __inline __m512d
  7063. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7064. _mm512_abs_pd (__m512d __A)
  7065. {
  7066. return (__m512d) _mm512_and_epi64 ((__m512i) __A,
  7067. _mm512_set1_epi64 (0x7fffffffffffffffLL));
  7068. }
  7069. extern __inline __m512d
  7070. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7071. _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A)
  7072. {
  7073. return (__m512d)
  7074. _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
  7075. _mm512_set1_epi64 (0x7fffffffffffffffLL));
  7076. }
  7077. extern __inline __m512i
  7078. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7079. _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
  7080. {
  7081. return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
  7082. (__v16si) __B,
  7083. (__v16si)
  7084. _mm512_undefined_epi32 (),
  7085. (__mmask16) -1);
  7086. }
  7087. extern __inline __m512i
  7088. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7089. _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  7090. __m512i __B)
  7091. {
  7092. return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
  7093. (__v16si) __B,
  7094. (__v16si) __W,
  7095. (__mmask16) __U);
  7096. }
  7097. extern __inline __m512i
  7098. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7099. _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  7100. {
  7101. return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
  7102. (__v16si) __B,
  7103. (__v16si)
  7104. _mm512_setzero_si512 (),
  7105. (__mmask16) __U);
  7106. }
  7107. extern __inline __m512i
  7108. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7109. _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
  7110. {
  7111. return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
  7112. (__v8di) __B,
  7113. (__v8di)
  7114. _mm512_undefined_epi32 (),
  7115. (__mmask8) -1);
  7116. }
  7117. extern __inline __m512i
  7118. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7119. _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  7120. {
  7121. return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
  7122. (__v8di) __B,
  7123. (__v8di) __W,
  7124. (__mmask8) __U);
  7125. }
  7126. extern __inline __m512i
  7127. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7128. _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  7129. {
  7130. return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
  7131. (__v8di) __B,
  7132. (__v8di)
  7133. _mm512_setzero_si512 (),
  7134. (__mmask8) __U);
  7135. }
  7136. extern __inline __m512i
  7137. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7138. _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
  7139. {
  7140. return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
  7141. (__v16si) __B,
  7142. (__v16si)
  7143. _mm512_undefined_epi32 (),
  7144. (__mmask16) -1);
  7145. }
  7146. extern __inline __m512i
  7147. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7148. _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  7149. __m512i __B)
  7150. {
  7151. return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
  7152. (__v16si) __B,
  7153. (__v16si) __W,
  7154. (__mmask16) __U);
  7155. }
  7156. extern __inline __m512i
  7157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7158. _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
  7159. {
  7160. return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
  7161. (__v16si) __B,
  7162. (__v16si)
  7163. _mm512_setzero_si512 (),
  7164. (__mmask16) __U);
  7165. }
  7166. extern __inline __m512i
  7167. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7168. _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
  7169. {
  7170. return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
  7171. (__v8di) __B,
  7172. (__v8di)
  7173. _mm512_undefined_epi32 (),
  7174. (__mmask8) -1);
  7175. }
  7176. extern __inline __m512i
  7177. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7178. _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
  7179. {
  7180. return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
  7181. (__v8di) __B,
  7182. (__v8di) __W,
  7183. (__mmask8) __U);
  7184. }
  7185. extern __inline __m512i
  7186. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7187. _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
  7188. {
  7189. return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
  7190. (__v8di) __B,
  7191. (__v8di)
  7192. _mm512_setzero_si512 (),
  7193. (__mmask8) __U);
  7194. }
  7195. #ifdef __x86_64__
  7196. #ifdef __OPTIMIZE__
  7197. extern __inline unsigned long long
  7198. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7199. _mm_cvt_roundss_u64 (__m128 __A, const int __R)
  7200. {
  7201. return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
  7202. }
  7203. extern __inline long long
  7204. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7205. _mm_cvt_roundss_si64 (__m128 __A, const int __R)
  7206. {
  7207. return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
  7208. }
  7209. extern __inline long long
  7210. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7211. _mm_cvt_roundss_i64 (__m128 __A, const int __R)
  7212. {
  7213. return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
  7214. }
  7215. extern __inline unsigned long long
  7216. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7217. _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
  7218. {
  7219. return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
  7220. }
  7221. extern __inline long long
  7222. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7223. _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
  7224. {
  7225. return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
  7226. }
  7227. extern __inline long long
  7228. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7229. _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
  7230. {
  7231. return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
  7232. }
  7233. #else
  7234. #define _mm_cvt_roundss_u64(A, B) \
  7235. ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
  7236. #define _mm_cvt_roundss_si64(A, B) \
  7237. ((long long)__builtin_ia32_vcvtss2si64(A, B))
  7238. #define _mm_cvt_roundss_i64(A, B) \
  7239. ((long long)__builtin_ia32_vcvtss2si64(A, B))
  7240. #define _mm_cvtt_roundss_u64(A, B) \
  7241. ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
  7242. #define _mm_cvtt_roundss_i64(A, B) \
  7243. ((long long)__builtin_ia32_vcvttss2si64(A, B))
  7244. #define _mm_cvtt_roundss_si64(A, B) \
  7245. ((long long)__builtin_ia32_vcvttss2si64(A, B))
  7246. #endif
  7247. #endif
  7248. #ifdef __OPTIMIZE__
  7249. extern __inline unsigned
  7250. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7251. _mm_cvt_roundss_u32 (__m128 __A, const int __R)
  7252. {
  7253. return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
  7254. }
  7255. extern __inline int
  7256. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7257. _mm_cvt_roundss_si32 (__m128 __A, const int __R)
  7258. {
  7259. return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
  7260. }
  7261. extern __inline int
  7262. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7263. _mm_cvt_roundss_i32 (__m128 __A, const int __R)
  7264. {
  7265. return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
  7266. }
  7267. extern __inline unsigned
  7268. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7269. _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
  7270. {
  7271. return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
  7272. }
  7273. extern __inline int
  7274. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7275. _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
  7276. {
  7277. return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
  7278. }
  7279. extern __inline int
  7280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7281. _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
  7282. {
  7283. return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
  7284. }
  7285. #else
  7286. #define _mm_cvt_roundss_u32(A, B) \
  7287. ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
  7288. #define _mm_cvt_roundss_si32(A, B) \
  7289. ((int)__builtin_ia32_vcvtss2si32(A, B))
  7290. #define _mm_cvt_roundss_i32(A, B) \
  7291. ((int)__builtin_ia32_vcvtss2si32(A, B))
  7292. #define _mm_cvtt_roundss_u32(A, B) \
  7293. ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
  7294. #define _mm_cvtt_roundss_si32(A, B) \
  7295. ((int)__builtin_ia32_vcvttss2si32(A, B))
  7296. #define _mm_cvtt_roundss_i32(A, B) \
  7297. ((int)__builtin_ia32_vcvttss2si32(A, B))
  7298. #endif
  7299. #ifdef __x86_64__
  7300. #ifdef __OPTIMIZE__
  7301. extern __inline unsigned long long
  7302. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7303. _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
  7304. {
  7305. return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
  7306. }
  7307. extern __inline long long
  7308. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7309. _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
  7310. {
  7311. return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
  7312. }
  7313. extern __inline long long
  7314. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7315. _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
  7316. {
  7317. return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
  7318. }
  7319. extern __inline unsigned long long
  7320. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7321. _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
  7322. {
  7323. return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
  7324. }
  7325. extern __inline long long
  7326. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7327. _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
  7328. {
  7329. return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
  7330. }
  7331. extern __inline long long
  7332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7333. _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
  7334. {
  7335. return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
  7336. }
  7337. #else
  7338. #define _mm_cvt_roundsd_u64(A, B) \
  7339. ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
  7340. #define _mm_cvt_roundsd_si64(A, B) \
  7341. ((long long)__builtin_ia32_vcvtsd2si64(A, B))
  7342. #define _mm_cvt_roundsd_i64(A, B) \
  7343. ((long long)__builtin_ia32_vcvtsd2si64(A, B))
  7344. #define _mm_cvtt_roundsd_u64(A, B) \
  7345. ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
  7346. #define _mm_cvtt_roundsd_si64(A, B) \
  7347. ((long long)__builtin_ia32_vcvttsd2si64(A, B))
  7348. #define _mm_cvtt_roundsd_i64(A, B) \
  7349. ((long long)__builtin_ia32_vcvttsd2si64(A, B))
  7350. #endif
  7351. #endif
  7352. #ifdef __OPTIMIZE__
  7353. extern __inline unsigned
  7354. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7355. _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
  7356. {
  7357. return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
  7358. }
  7359. extern __inline int
  7360. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7361. _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
  7362. {
  7363. return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
  7364. }
  7365. extern __inline int
  7366. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7367. _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
  7368. {
  7369. return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
  7370. }
  7371. extern __inline unsigned
  7372. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7373. _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
  7374. {
  7375. return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
  7376. }
  7377. extern __inline int
  7378. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7379. _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
  7380. {
  7381. return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
  7382. }
  7383. extern __inline int
  7384. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7385. _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
  7386. {
  7387. return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
  7388. }
  7389. #else
  7390. #define _mm_cvt_roundsd_u32(A, B) \
  7391. ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
  7392. #define _mm_cvt_roundsd_si32(A, B) \
  7393. ((int)__builtin_ia32_vcvtsd2si32(A, B))
  7394. #define _mm_cvt_roundsd_i32(A, B) \
  7395. ((int)__builtin_ia32_vcvtsd2si32(A, B))
  7396. #define _mm_cvtt_roundsd_u32(A, B) \
  7397. ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
  7398. #define _mm_cvtt_roundsd_si32(A, B) \
  7399. ((int)__builtin_ia32_vcvttsd2si32(A, B))
  7400. #define _mm_cvtt_roundsd_i32(A, B) \
  7401. ((int)__builtin_ia32_vcvttsd2si32(A, B))
  7402. #endif
  7403. extern __inline __m512d
  7404. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7405. _mm512_movedup_pd (__m512d __A)
  7406. {
  7407. return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
  7408. (__v8df)
  7409. _mm512_undefined_pd (),
  7410. (__mmask8) -1);
  7411. }
  7412. extern __inline __m512d
  7413. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7414. _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
  7415. {
  7416. return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
  7417. (__v8df) __W,
  7418. (__mmask8) __U);
  7419. }
  7420. extern __inline __m512d
  7421. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7422. _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
  7423. {
  7424. return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
  7425. (__v8df)
  7426. _mm512_setzero_pd (),
  7427. (__mmask8) __U);
  7428. }
  7429. extern __inline __m512d
  7430. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7431. _mm512_unpacklo_pd (__m512d __A, __m512d __B)
  7432. {
  7433. return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
  7434. (__v8df) __B,
  7435. (__v8df)
  7436. _mm512_undefined_pd (),
  7437. (__mmask8) -1);
  7438. }
  7439. extern __inline __m512d
  7440. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7441. _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  7442. {
  7443. return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
  7444. (__v8df) __B,
  7445. (__v8df) __W,
  7446. (__mmask8) __U);
  7447. }
  7448. extern __inline __m512d
  7449. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7450. _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
  7451. {
  7452. return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
  7453. (__v8df) __B,
  7454. (__v8df)
  7455. _mm512_setzero_pd (),
  7456. (__mmask8) __U);
  7457. }
  7458. extern __inline __m512d
  7459. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7460. _mm512_unpackhi_pd (__m512d __A, __m512d __B)
  7461. {
  7462. return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
  7463. (__v8df) __B,
  7464. (__v8df)
  7465. _mm512_undefined_pd (),
  7466. (__mmask8) -1);
  7467. }
  7468. extern __inline __m512d
  7469. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7470. _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  7471. {
  7472. return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
  7473. (__v8df) __B,
  7474. (__v8df) __W,
  7475. (__mmask8) __U);
  7476. }
  7477. extern __inline __m512d
  7478. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7479. _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
  7480. {
  7481. return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
  7482. (__v8df) __B,
  7483. (__v8df)
  7484. _mm512_setzero_pd (),
  7485. (__mmask8) __U);
  7486. }
  7487. extern __inline __m512
  7488. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7489. _mm512_unpackhi_ps (__m512 __A, __m512 __B)
  7490. {
  7491. return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
  7492. (__v16sf) __B,
  7493. (__v16sf)
  7494. _mm512_undefined_ps (),
  7495. (__mmask16) -1);
  7496. }
  7497. extern __inline __m512
  7498. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7499. _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  7500. {
  7501. return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
  7502. (__v16sf) __B,
  7503. (__v16sf) __W,
  7504. (__mmask16) __U);
  7505. }
  7506. extern __inline __m512
  7507. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7508. _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
  7509. {
  7510. return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
  7511. (__v16sf) __B,
  7512. (__v16sf)
  7513. _mm512_setzero_ps (),
  7514. (__mmask16) __U);
  7515. }
  7516. #ifdef __OPTIMIZE__
  7517. extern __inline __m512d
  7518. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7519. _mm512_cvt_roundps_pd (__m256 __A, const int __R)
  7520. {
  7521. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  7522. (__v8df)
  7523. _mm512_undefined_pd (),
  7524. (__mmask8) -1, __R);
  7525. }
  7526. extern __inline __m512d
  7527. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7528. _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
  7529. const int __R)
  7530. {
  7531. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  7532. (__v8df) __W,
  7533. (__mmask8) __U, __R);
  7534. }
  7535. extern __inline __m512d
  7536. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7537. _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
  7538. {
  7539. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  7540. (__v8df)
  7541. _mm512_setzero_pd (),
  7542. (__mmask8) __U, __R);
  7543. }
  7544. extern __inline __m512
  7545. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7546. _mm512_cvt_roundph_ps (__m256i __A, const int __R)
  7547. {
  7548. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  7549. (__v16sf)
  7550. _mm512_undefined_ps (),
  7551. (__mmask16) -1, __R);
  7552. }
  7553. extern __inline __m512
  7554. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7555. _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
  7556. const int __R)
  7557. {
  7558. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  7559. (__v16sf) __W,
  7560. (__mmask16) __U, __R);
  7561. }
  7562. extern __inline __m512
  7563. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7564. _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
  7565. {
  7566. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  7567. (__v16sf)
  7568. _mm512_setzero_ps (),
  7569. (__mmask16) __U, __R);
  7570. }
  7571. extern __inline __m256i
  7572. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7573. _mm512_cvt_roundps_ph (__m512 __A, const int __I)
  7574. {
  7575. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7576. __I,
  7577. (__v16hi)
  7578. _mm256_undefined_si256 (),
  7579. -1);
  7580. }
  7581. extern __inline __m256i
  7582. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7583. _mm512_cvtps_ph (__m512 __A, const int __I)
  7584. {
  7585. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7586. __I,
  7587. (__v16hi)
  7588. _mm256_undefined_si256 (),
  7589. -1);
  7590. }
  7591. extern __inline __m256i
  7592. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7593. _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
  7594. const int __I)
  7595. {
  7596. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7597. __I,
  7598. (__v16hi) __U,
  7599. (__mmask16) __W);
  7600. }
  7601. extern __inline __m256i
  7602. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7603. _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
  7604. {
  7605. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7606. __I,
  7607. (__v16hi) __U,
  7608. (__mmask16) __W);
  7609. }
  7610. extern __inline __m256i
  7611. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7612. _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
  7613. {
  7614. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7615. __I,
  7616. (__v16hi)
  7617. _mm256_setzero_si256 (),
  7618. (__mmask16) __W);
  7619. }
  7620. extern __inline __m256i
  7621. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7622. _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
  7623. {
  7624. return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
  7625. __I,
  7626. (__v16hi)
  7627. _mm256_setzero_si256 (),
  7628. (__mmask16) __W);
  7629. }
  7630. #else
  7631. #define _mm512_cvt_roundps_pd(A, B) \
  7632. (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
  7633. #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
  7634. (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
  7635. #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
  7636. (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
  7637. #define _mm512_cvt_roundph_ps(A, B) \
  7638. (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
  7639. #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
  7640. (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
  7641. #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
  7642. (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
  7643. #define _mm512_cvt_roundps_ph(A, I) \
  7644. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
  7645. (__v16hi)_mm256_undefined_si256 (), -1))
  7646. #define _mm512_cvtps_ph(A, I) \
  7647. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
  7648. (__v16hi)_mm256_undefined_si256 (), -1))
  7649. #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
  7650. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
  7651. (__v16hi)(__m256i)(U), (__mmask16) (W)))
  7652. #define _mm512_mask_cvtps_ph(U, W, A, I) \
  7653. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
  7654. (__v16hi)(__m256i)(U), (__mmask16) (W)))
  7655. #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
  7656. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
  7657. (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
  7658. #define _mm512_maskz_cvtps_ph(W, A, I) \
  7659. ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
  7660. (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
  7661. #endif
  7662. #ifdef __OPTIMIZE__
  7663. extern __inline __m256
  7664. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7665. _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
  7666. {
  7667. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  7668. (__v8sf)
  7669. _mm256_undefined_ps (),
  7670. (__mmask8) -1, __R);
  7671. }
  7672. extern __inline __m256
  7673. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7674. _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
  7675. const int __R)
  7676. {
  7677. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  7678. (__v8sf) __W,
  7679. (__mmask8) __U, __R);
  7680. }
  7681. extern __inline __m256
  7682. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7683. _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
  7684. {
  7685. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  7686. (__v8sf)
  7687. _mm256_setzero_ps (),
  7688. (__mmask8) __U, __R);
  7689. }
  7690. extern __inline __m128
  7691. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7692. _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
  7693. {
  7694. return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
  7695. (__v2df) __B,
  7696. __R);
  7697. }
  7698. extern __inline __m128
  7699. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7700. _mm_mask_cvt_roundsd_ss (__m128 __W, __mmask8 __U, __m128 __A,
  7701. __m128d __B, const int __R)
  7702. {
  7703. return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
  7704. (__v2df) __B,
  7705. (__v4sf) __W,
  7706. __U,
  7707. __R);
  7708. }
  7709. extern __inline __m128
  7710. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7711. _mm_maskz_cvt_roundsd_ss (__mmask8 __U, __m128 __A,
  7712. __m128d __B, const int __R)
  7713. {
  7714. return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
  7715. (__v2df) __B,
  7716. _mm_setzero_ps (),
  7717. __U,
  7718. __R);
  7719. }
  7720. extern __inline __m128d
  7721. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7722. _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
  7723. {
  7724. return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
  7725. (__v4sf) __B,
  7726. __R);
  7727. }
  7728. extern __inline __m128d
  7729. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7730. _mm_mask_cvt_roundss_sd (__m128d __W, __mmask8 __U, __m128d __A,
  7731. __m128 __B, const int __R)
  7732. {
  7733. return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
  7734. (__v4sf) __B,
  7735. (__v2df) __W,
  7736. __U,
  7737. __R);
  7738. }
  7739. extern __inline __m128d
  7740. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7741. _mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A,
  7742. __m128 __B, const int __R)
  7743. {
  7744. return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
  7745. (__v4sf) __B,
  7746. _mm_setzero_pd (),
  7747. __U,
  7748. __R);
  7749. }
  7750. #else
  7751. #define _mm512_cvt_roundpd_ps(A, B) \
  7752. (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
  7753. #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
  7754. (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
  7755. #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
  7756. (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
  7757. #define _mm_cvt_roundsd_ss(A, B, C) \
  7758. (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
  7759. #define _mm_mask_cvt_roundsd_ss(W, U, A, B, C) \
  7760. (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), (W), (U), (C))
  7761. #define _mm_maskz_cvt_roundsd_ss(U, A, B, C) \
  7762. (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), _mm_setzero_ps (), \
  7763. (U), (C))
  7764. #define _mm_cvt_roundss_sd(A, B, C) \
  7765. (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
  7766. #define _mm_mask_cvt_roundss_sd(W, U, A, B, C) \
  7767. (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), (W), (U), (C))
  7768. #define _mm_maskz_cvt_roundss_sd(U, A, B, C) \
  7769. (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), _mm_setzero_pd (), \
  7770. (U), (C))
  7771. #endif
  7772. #define _mm_mask_cvtss_sd(W, U, A, B) \
  7773. _mm_mask_cvt_roundss_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  7774. #define _mm_maskz_cvtss_sd(U, A, B) \
  7775. _mm_maskz_cvt_roundss_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  7776. #define _mm_mask_cvtsd_ss(W, U, A, B) \
  7777. _mm_mask_cvt_roundsd_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  7778. #define _mm_maskz_cvtsd_ss(U, A, B) \
  7779. _mm_maskz_cvt_roundsd_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
  7780. extern __inline void
  7781. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7782. _mm512_stream_si512 (__m512i * __P, __m512i __A)
  7783. {
  7784. __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
  7785. }
  7786. extern __inline void
  7787. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7788. _mm512_stream_ps (float *__P, __m512 __A)
  7789. {
  7790. __builtin_ia32_movntps512 (__P, (__v16sf) __A);
  7791. }
  7792. extern __inline void
  7793. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7794. _mm512_stream_pd (double *__P, __m512d __A)
  7795. {
  7796. __builtin_ia32_movntpd512 (__P, (__v8df) __A);
  7797. }
  7798. extern __inline __m512i
  7799. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7800. _mm512_stream_load_si512 (void *__P)
  7801. {
  7802. return __builtin_ia32_movntdqa512 ((__v8di *)__P);
  7803. }
  7804. /* Constants for mantissa extraction */
  7805. typedef enum
  7806. {
  7807. _MM_MANT_NORM_1_2, /* interval [1, 2) */
  7808. _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
  7809. _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
  7810. _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
  7811. } _MM_MANTISSA_NORM_ENUM;
  7812. typedef enum
  7813. {
  7814. _MM_MANT_SIGN_src, /* sign = sign(SRC) */
  7815. _MM_MANT_SIGN_zero, /* sign = 0 */
  7816. _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
  7817. } _MM_MANTISSA_SIGN_ENUM;
  7818. #ifdef __OPTIMIZE__
  7819. extern __inline __m128
  7820. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7821. _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
  7822. {
  7823. return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
  7824. (__v4sf) __B,
  7825. __R);
  7826. }
  7827. extern __inline __m128
  7828. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7829. _mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  7830. __m128 __B, const int __R)
  7831. {
  7832. return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
  7833. (__v4sf) __B,
  7834. (__v4sf) __W,
  7835. (__mmask8) __U, __R);
  7836. }
  7837. extern __inline __m128
  7838. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7839. _mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  7840. const int __R)
  7841. {
  7842. return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
  7843. (__v4sf) __B,
  7844. (__v4sf)
  7845. _mm_setzero_ps (),
  7846. (__mmask8) __U, __R);
  7847. }
  7848. extern __inline __m128d
  7849. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7850. _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
  7851. {
  7852. return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
  7853. (__v2df) __B,
  7854. __R);
  7855. }
  7856. extern __inline __m128d
  7857. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7858. _mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  7859. __m128d __B, const int __R)
  7860. {
  7861. return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
  7862. (__v2df) __B,
  7863. (__v2df) __W,
  7864. (__mmask8) __U, __R);
  7865. }
  7866. extern __inline __m128d
  7867. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7868. _mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  7869. const int __R)
  7870. {
  7871. return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
  7872. (__v2df) __B,
  7873. (__v2df)
  7874. _mm_setzero_pd (),
  7875. (__mmask8) __U, __R);
  7876. }
  7877. extern __inline __m512
  7878. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7879. _mm512_getexp_round_ps (__m512 __A, const int __R)
  7880. {
  7881. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  7882. (__v16sf)
  7883. _mm512_undefined_ps (),
  7884. (__mmask16) -1, __R);
  7885. }
  7886. extern __inline __m512
  7887. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7888. _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  7889. const int __R)
  7890. {
  7891. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  7892. (__v16sf) __W,
  7893. (__mmask16) __U, __R);
  7894. }
  7895. extern __inline __m512
  7896. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7897. _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
  7898. {
  7899. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  7900. (__v16sf)
  7901. _mm512_setzero_ps (),
  7902. (__mmask16) __U, __R);
  7903. }
  7904. extern __inline __m512d
  7905. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7906. _mm512_getexp_round_pd (__m512d __A, const int __R)
  7907. {
  7908. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  7909. (__v8df)
  7910. _mm512_undefined_pd (),
  7911. (__mmask8) -1, __R);
  7912. }
  7913. extern __inline __m512d
  7914. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7915. _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  7916. const int __R)
  7917. {
  7918. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  7919. (__v8df) __W,
  7920. (__mmask8) __U, __R);
  7921. }
  7922. extern __inline __m512d
  7923. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7924. _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
  7925. {
  7926. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  7927. (__v8df)
  7928. _mm512_setzero_pd (),
  7929. (__mmask8) __U, __R);
  7930. }
  7931. extern __inline __m512d
  7932. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7933. _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
  7934. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7935. {
  7936. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  7937. (__C << 2) | __B,
  7938. _mm512_undefined_pd (),
  7939. (__mmask8) -1, __R);
  7940. }
  7941. extern __inline __m512d
  7942. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7943. _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
  7944. _MM_MANTISSA_NORM_ENUM __B,
  7945. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7946. {
  7947. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  7948. (__C << 2) | __B,
  7949. (__v8df) __W, __U,
  7950. __R);
  7951. }
  7952. extern __inline __m512d
  7953. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7954. _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
  7955. _MM_MANTISSA_NORM_ENUM __B,
  7956. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7957. {
  7958. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  7959. (__C << 2) | __B,
  7960. (__v8df)
  7961. _mm512_setzero_pd (),
  7962. __U, __R);
  7963. }
  7964. extern __inline __m512
  7965. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7966. _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
  7967. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7968. {
  7969. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  7970. (__C << 2) | __B,
  7971. _mm512_undefined_ps (),
  7972. (__mmask16) -1, __R);
  7973. }
  7974. extern __inline __m512
  7975. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7976. _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
  7977. _MM_MANTISSA_NORM_ENUM __B,
  7978. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7979. {
  7980. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  7981. (__C << 2) | __B,
  7982. (__v16sf) __W, __U,
  7983. __R);
  7984. }
  7985. extern __inline __m512
  7986. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7987. _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
  7988. _MM_MANTISSA_NORM_ENUM __B,
  7989. _MM_MANTISSA_SIGN_ENUM __C, const int __R)
  7990. {
  7991. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  7992. (__C << 2) | __B,
  7993. (__v16sf)
  7994. _mm512_setzero_ps (),
  7995. __U, __R);
  7996. }
  7997. extern __inline __m128d
  7998. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  7999. _mm_getmant_round_sd (__m128d __A, __m128d __B,
  8000. _MM_MANTISSA_NORM_ENUM __C,
  8001. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  8002. {
  8003. return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
  8004. (__v2df) __B,
  8005. (__D << 2) | __C,
  8006. __R);
  8007. }
  8008. extern __inline __m128d
  8009. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8010. _mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  8011. __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
  8012. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  8013. {
  8014. return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
  8015. (__v2df) __B,
  8016. (__D << 2) | __C,
  8017. (__v2df) __W,
  8018. __U, __R);
  8019. }
  8020. extern __inline __m128d
  8021. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8022. _mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  8023. _MM_MANTISSA_NORM_ENUM __C,
  8024. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  8025. {
  8026. return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
  8027. (__v2df) __B,
  8028. (__D << 2) | __C,
  8029. (__v2df)
  8030. _mm_setzero_pd(),
  8031. __U, __R);
  8032. }
  8033. extern __inline __m128
  8034. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8035. _mm_getmant_round_ss (__m128 __A, __m128 __B,
  8036. _MM_MANTISSA_NORM_ENUM __C,
  8037. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  8038. {
  8039. return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
  8040. (__v4sf) __B,
  8041. (__D << 2) | __C,
  8042. __R);
  8043. }
  8044. extern __inline __m128
  8045. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8046. _mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  8047. __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
  8048. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  8049. {
  8050. return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
  8051. (__v4sf) __B,
  8052. (__D << 2) | __C,
  8053. (__v4sf) __W,
  8054. __U, __R);
  8055. }
  8056. extern __inline __m128
  8057. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8058. _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  8059. _MM_MANTISSA_NORM_ENUM __C,
  8060. _MM_MANTISSA_SIGN_ENUM __D, const int __R)
  8061. {
  8062. return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
  8063. (__v4sf) __B,
  8064. (__D << 2) | __C,
  8065. (__v4sf)
  8066. _mm_setzero_ps(),
  8067. __U, __R);
  8068. }
  8069. #else
  8070. #define _mm512_getmant_round_pd(X, B, C, R) \
  8071. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  8072. (int)(((C)<<2) | (B)), \
  8073. (__v8df)(__m512d)_mm512_undefined_pd(), \
  8074. (__mmask8)-1,\
  8075. (R)))
  8076. #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
  8077. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  8078. (int)(((C)<<2) | (B)), \
  8079. (__v8df)(__m512d)(W), \
  8080. (__mmask8)(U),\
  8081. (R)))
  8082. #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
  8083. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  8084. (int)(((C)<<2) | (B)), \
  8085. (__v8df)(__m512d)_mm512_setzero_pd(), \
  8086. (__mmask8)(U),\
  8087. (R)))
  8088. #define _mm512_getmant_round_ps(X, B, C, R) \
  8089. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  8090. (int)(((C)<<2) | (B)), \
  8091. (__v16sf)(__m512)_mm512_undefined_ps(), \
  8092. (__mmask16)-1,\
  8093. (R)))
  8094. #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
  8095. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  8096. (int)(((C)<<2) | (B)), \
  8097. (__v16sf)(__m512)(W), \
  8098. (__mmask16)(U),\
  8099. (R)))
  8100. #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
  8101. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  8102. (int)(((C)<<2) | (B)), \
  8103. (__v16sf)(__m512)_mm512_setzero_ps(), \
  8104. (__mmask16)(U),\
  8105. (R)))
  8106. #define _mm_getmant_round_sd(X, Y, C, D, R) \
  8107. ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
  8108. (__v2df)(__m128d)(Y), \
  8109. (int)(((D)<<2) | (C)), \
  8110. (R)))
  8111. #define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
  8112. ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
  8113. (__v2df)(__m128d)(Y), \
  8114. (int)(((D)<<2) | (C)), \
  8115. (__v2df)(__m128d)(W), \
  8116. (__mmask8)(U),\
  8117. (R)))
  8118. #define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
  8119. ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
  8120. (__v2df)(__m128d)(Y), \
  8121. (int)(((D)<<2) | (C)), \
  8122. (__v2df)(__m128d)_mm_setzero_pd(), \
  8123. (__mmask8)(U),\
  8124. (R)))
  8125. #define _mm_getmant_round_ss(X, Y, C, D, R) \
  8126. ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
  8127. (__v4sf)(__m128)(Y), \
  8128. (int)(((D)<<2) | (C)), \
  8129. (R)))
  8130. #define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
  8131. ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
  8132. (__v4sf)(__m128)(Y), \
  8133. (int)(((D)<<2) | (C)), \
  8134. (__v4sf)(__m128)(W), \
  8135. (__mmask8)(U),\
  8136. (R)))
  8137. #define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
  8138. ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
  8139. (__v4sf)(__m128)(Y), \
  8140. (int)(((D)<<2) | (C)), \
  8141. (__v4sf)(__m128)_mm_setzero_ps(), \
  8142. (__mmask8)(U),\
  8143. (R)))
  8144. #define _mm_getexp_round_ss(A, B, R) \
  8145. ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
  8146. #define _mm_mask_getexp_round_ss(W, U, A, B, C) \
  8147. (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
  8148. #define _mm_maskz_getexp_round_ss(U, A, B, C) \
  8149. (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  8150. #define _mm_getexp_round_sd(A, B, R) \
  8151. ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
  8152. #define _mm_mask_getexp_round_sd(W, U, A, B, C) \
  8153. (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
  8154. #define _mm_maskz_getexp_round_sd(U, A, B, C) \
  8155. (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  8156. #define _mm512_getexp_round_ps(A, R) \
  8157. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  8158. (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
  8159. #define _mm512_mask_getexp_round_ps(W, U, A, R) \
  8160. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  8161. (__v16sf)(__m512)(W), (__mmask16)(U), R))
  8162. #define _mm512_maskz_getexp_round_ps(U, A, R) \
  8163. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  8164. (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
  8165. #define _mm512_getexp_round_pd(A, R) \
  8166. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  8167. (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
  8168. #define _mm512_mask_getexp_round_pd(W, U, A, R) \
  8169. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  8170. (__v8df)(__m512d)(W), (__mmask8)(U), R))
  8171. #define _mm512_maskz_getexp_round_pd(U, A, R) \
  8172. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  8173. (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
  8174. #endif
  8175. #ifdef __OPTIMIZE__
  8176. extern __inline __m512
  8177. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8178. _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
  8179. {
  8180. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
  8181. (__v16sf)
  8182. _mm512_undefined_ps (),
  8183. -1, __R);
  8184. }
  8185. extern __inline __m512
  8186. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8187. _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
  8188. const int __imm, const int __R)
  8189. {
  8190. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
  8191. (__v16sf) __A,
  8192. (__mmask16) __B, __R);
  8193. }
  8194. extern __inline __m512
  8195. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8196. _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
  8197. const int __imm, const int __R)
  8198. {
  8199. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
  8200. __imm,
  8201. (__v16sf)
  8202. _mm512_setzero_ps (),
  8203. (__mmask16) __A, __R);
  8204. }
  8205. extern __inline __m512d
  8206. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8207. _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
  8208. {
  8209. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
  8210. (__v8df)
  8211. _mm512_undefined_pd (),
  8212. -1, __R);
  8213. }
  8214. extern __inline __m512d
  8215. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8216. _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
  8217. __m512d __C, const int __imm, const int __R)
  8218. {
  8219. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
  8220. (__v8df) __A,
  8221. (__mmask8) __B, __R);
  8222. }
  8223. extern __inline __m512d
  8224. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8225. _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
  8226. const int __imm, const int __R)
  8227. {
  8228. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
  8229. __imm,
  8230. (__v8df)
  8231. _mm512_setzero_pd (),
  8232. (__mmask8) __A, __R);
  8233. }
  8234. extern __inline __m128
  8235. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8236. _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm,
  8237. const int __R)
  8238. {
  8239. return (__m128)
  8240. __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
  8241. (__v4sf) __B, __imm,
  8242. (__v4sf)
  8243. _mm_setzero_ps (),
  8244. (__mmask8) -1,
  8245. __R);
  8246. }
  8247. extern __inline __m128
  8248. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8249. _mm_mask_roundscale_round_ss (__m128 __A, __mmask8 __B, __m128 __C,
  8250. __m128 __D, const int __imm, const int __R)
  8251. {
  8252. return (__m128)
  8253. __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
  8254. (__v4sf) __D, __imm,
  8255. (__v4sf) __A,
  8256. (__mmask8) __B,
  8257. __R);
  8258. }
  8259. extern __inline __m128
  8260. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8261. _mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C,
  8262. const int __imm, const int __R)
  8263. {
  8264. return (__m128)
  8265. __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
  8266. (__v4sf) __C, __imm,
  8267. (__v4sf)
  8268. _mm_setzero_ps (),
  8269. (__mmask8) __A,
  8270. __R);
  8271. }
  8272. extern __inline __m128d
  8273. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8274. _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
  8275. const int __R)
  8276. {
  8277. return (__m128d)
  8278. __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
  8279. (__v2df) __B, __imm,
  8280. (__v2df)
  8281. _mm_setzero_pd (),
  8282. (__mmask8) -1,
  8283. __R);
  8284. }
  8285. extern __inline __m128d
  8286. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8287. _mm_mask_roundscale_round_sd (__m128d __A, __mmask8 __B, __m128d __C,
  8288. __m128d __D, const int __imm, const int __R)
  8289. {
  8290. return (__m128d)
  8291. __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
  8292. (__v2df) __D, __imm,
  8293. (__v2df) __A,
  8294. (__mmask8) __B,
  8295. __R);
  8296. }
  8297. extern __inline __m128d
  8298. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8299. _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
  8300. const int __imm, const int __R)
  8301. {
  8302. return (__m128d)
  8303. __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
  8304. (__v2df) __C, __imm,
  8305. (__v2df)
  8306. _mm_setzero_pd (),
  8307. (__mmask8) __A,
  8308. __R);
  8309. }
  8310. #else
  8311. #define _mm512_roundscale_round_ps(A, B, R) \
  8312. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
  8313. (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
  8314. #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
  8315. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
  8316. (int)(D), \
  8317. (__v16sf)(__m512)(A), \
  8318. (__mmask16)(B), R))
  8319. #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
  8320. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
  8321. (int)(C), \
  8322. (__v16sf)_mm512_setzero_ps(),\
  8323. (__mmask16)(A), R))
  8324. #define _mm512_roundscale_round_pd(A, B, R) \
  8325. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
  8326. (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
  8327. #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
  8328. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
  8329. (int)(D), \
  8330. (__v8df)(__m512d)(A), \
  8331. (__mmask8)(B), R))
  8332. #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
  8333. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
  8334. (int)(C), \
  8335. (__v8df)_mm512_setzero_pd(),\
  8336. (__mmask8)(A), R))
  8337. #define _mm_roundscale_round_ss(A, B, I, R) \
  8338. ((__m128) \
  8339. __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
  8340. (__v4sf) (__m128) (B), \
  8341. (int) (I), \
  8342. (__v4sf) _mm_setzero_ps (), \
  8343. (__mmask8) (-1), \
  8344. (int) (R)))
  8345. #define _mm_mask_roundscale_round_ss(A, U, B, C, I, R) \
  8346. ((__m128) \
  8347. __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
  8348. (__v4sf) (__m128) (C), \
  8349. (int) (I), \
  8350. (__v4sf) (__m128) (A), \
  8351. (__mmask8) (U), \
  8352. (int) (R)))
  8353. #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
  8354. ((__m128) \
  8355. __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
  8356. (__v4sf) (__m128) (B), \
  8357. (int) (I), \
  8358. (__v4sf) _mm_setzero_ps (), \
  8359. (__mmask8) (U), \
  8360. (int) (R)))
  8361. #define _mm_roundscale_round_sd(A, B, I, R) \
  8362. ((__m128d) \
  8363. __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
  8364. (__v2df) (__m128d) (B), \
  8365. (int) (I), \
  8366. (__v2df) _mm_setzero_pd (), \
  8367. (__mmask8) (-1), \
  8368. (int) (R)))
  8369. #define _mm_mask_roundscale_round_sd(A, U, B, C, I, R) \
  8370. ((__m128d) \
  8371. __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
  8372. (__v2df) (__m128d) (C), \
  8373. (int) (I), \
  8374. (__v2df) (__m128d) (A), \
  8375. (__mmask8) (U), \
  8376. (int) (R)))
  8377. #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
  8378. ((__m128d) \
  8379. __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
  8380. (__v2df) (__m128d) (B), \
  8381. (int) (I), \
  8382. (__v2df) _mm_setzero_pd (), \
  8383. (__mmask8) (U), \
  8384. (int) (R)))
  8385. #endif
  8386. extern __inline __m512
  8387. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8388. _mm512_floor_ps (__m512 __A)
  8389. {
  8390. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
  8391. _MM_FROUND_FLOOR,
  8392. (__v16sf) __A, -1,
  8393. _MM_FROUND_CUR_DIRECTION);
  8394. }
  8395. extern __inline __m512d
  8396. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8397. _mm512_floor_pd (__m512d __A)
  8398. {
  8399. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
  8400. _MM_FROUND_FLOOR,
  8401. (__v8df) __A, -1,
  8402. _MM_FROUND_CUR_DIRECTION);
  8403. }
  8404. extern __inline __m512
  8405. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8406. _mm512_ceil_ps (__m512 __A)
  8407. {
  8408. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
  8409. _MM_FROUND_CEIL,
  8410. (__v16sf) __A, -1,
  8411. _MM_FROUND_CUR_DIRECTION);
  8412. }
  8413. extern __inline __m512d
  8414. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8415. _mm512_ceil_pd (__m512d __A)
  8416. {
  8417. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
  8418. _MM_FROUND_CEIL,
  8419. (__v8df) __A, -1,
  8420. _MM_FROUND_CUR_DIRECTION);
  8421. }
  8422. extern __inline __m512
  8423. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8424. _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
  8425. {
  8426. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
  8427. _MM_FROUND_FLOOR,
  8428. (__v16sf) __W, __U,
  8429. _MM_FROUND_CUR_DIRECTION);
  8430. }
  8431. extern __inline __m512d
  8432. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8433. _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
  8434. {
  8435. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
  8436. _MM_FROUND_FLOOR,
  8437. (__v8df) __W, __U,
  8438. _MM_FROUND_CUR_DIRECTION);
  8439. }
  8440. extern __inline __m512
  8441. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8442. _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
  8443. {
  8444. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
  8445. _MM_FROUND_CEIL,
  8446. (__v16sf) __W, __U,
  8447. _MM_FROUND_CUR_DIRECTION);
  8448. }
  8449. extern __inline __m512d
  8450. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8451. _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
  8452. {
  8453. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
  8454. _MM_FROUND_CEIL,
  8455. (__v8df) __W, __U,
  8456. _MM_FROUND_CUR_DIRECTION);
  8457. }
  8458. #ifdef __OPTIMIZE__
  8459. extern __inline __m512i
  8460. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8461. _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
  8462. {
  8463. return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
  8464. (__v16si) __B, __imm,
  8465. (__v16si)
  8466. _mm512_undefined_epi32 (),
  8467. (__mmask16) -1);
  8468. }
  8469. extern __inline __m512i
  8470. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8471. _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
  8472. __m512i __B, const int __imm)
  8473. {
  8474. return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
  8475. (__v16si) __B, __imm,
  8476. (__v16si) __W,
  8477. (__mmask16) __U);
  8478. }
  8479. extern __inline __m512i
  8480. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8481. _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
  8482. const int __imm)
  8483. {
  8484. return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
  8485. (__v16si) __B, __imm,
  8486. (__v16si)
  8487. _mm512_setzero_si512 (),
  8488. (__mmask16) __U);
  8489. }
  8490. extern __inline __m512i
  8491. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8492. _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
  8493. {
  8494. return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
  8495. (__v8di) __B, __imm,
  8496. (__v8di)
  8497. _mm512_undefined_epi32 (),
  8498. (__mmask8) -1);
  8499. }
  8500. extern __inline __m512i
  8501. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8502. _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
  8503. __m512i __B, const int __imm)
  8504. {
  8505. return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
  8506. (__v8di) __B, __imm,
  8507. (__v8di) __W,
  8508. (__mmask8) __U);
  8509. }
  8510. extern __inline __m512i
  8511. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8512. _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
  8513. const int __imm)
  8514. {
  8515. return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
  8516. (__v8di) __B, __imm,
  8517. (__v8di)
  8518. _mm512_setzero_si512 (),
  8519. (__mmask8) __U);
  8520. }
  8521. #else
  8522. #define _mm512_alignr_epi32(X, Y, C) \
  8523. ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
  8524. (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
  8525. (__mmask16)-1))
  8526. #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
  8527. ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
  8528. (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
  8529. (__mmask16)(U)))
  8530. #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
  8531. ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
  8532. (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
  8533. (__mmask16)(U)))
  8534. #define _mm512_alignr_epi64(X, Y, C) \
  8535. ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
  8536. (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
  8537. (__mmask8)-1))
  8538. #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
  8539. ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
  8540. (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
  8541. #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
  8542. ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
  8543. (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
  8544. (__mmask8)(U)))
  8545. #endif
  8546. extern __inline __mmask16
  8547. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8548. _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
  8549. {
  8550. return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
  8551. (__v16si) __B,
  8552. (__mmask16) -1);
  8553. }
  8554. extern __inline __mmask16
  8555. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8556. _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  8557. {
  8558. return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
  8559. (__v16si) __B, __U);
  8560. }
  8561. extern __inline __mmask8
  8562. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8563. _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  8564. {
  8565. return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
  8566. (__v8di) __B, __U);
  8567. }
  8568. extern __inline __mmask8
  8569. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8570. _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
  8571. {
  8572. return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
  8573. (__v8di) __B,
  8574. (__mmask8) -1);
  8575. }
  8576. extern __inline __mmask16
  8577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8578. _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
  8579. {
  8580. return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
  8581. (__v16si) __B,
  8582. (__mmask16) -1);
  8583. }
  8584. extern __inline __mmask16
  8585. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8586. _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  8587. {
  8588. return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
  8589. (__v16si) __B, __U);
  8590. }
  8591. extern __inline __mmask8
  8592. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8593. _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  8594. {
  8595. return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
  8596. (__v8di) __B, __U);
  8597. }
  8598. extern __inline __mmask8
  8599. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8600. _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
  8601. {
  8602. return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
  8603. (__v8di) __B,
  8604. (__mmask8) -1);
  8605. }
  8606. extern __inline __mmask16
  8607. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8608. _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
  8609. {
  8610. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8611. (__v16si) __Y, 5,
  8612. (__mmask16) -1);
  8613. }
  8614. extern __inline __mmask16
  8615. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8616. _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8617. {
  8618. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8619. (__v16si) __Y, 5,
  8620. (__mmask16) __M);
  8621. }
  8622. extern __inline __mmask16
  8623. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8624. _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8625. {
  8626. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8627. (__v16si) __Y, 5,
  8628. (__mmask16) __M);
  8629. }
  8630. extern __inline __mmask16
  8631. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8632. _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
  8633. {
  8634. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8635. (__v16si) __Y, 5,
  8636. (__mmask16) -1);
  8637. }
  8638. extern __inline __mmask8
  8639. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8640. _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8641. {
  8642. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8643. (__v8di) __Y, 5,
  8644. (__mmask8) __M);
  8645. }
  8646. extern __inline __mmask8
  8647. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8648. _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
  8649. {
  8650. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8651. (__v8di) __Y, 5,
  8652. (__mmask8) -1);
  8653. }
  8654. extern __inline __mmask8
  8655. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8656. _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8657. {
  8658. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8659. (__v8di) __Y, 5,
  8660. (__mmask8) __M);
  8661. }
  8662. extern __inline __mmask8
  8663. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8664. _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
  8665. {
  8666. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8667. (__v8di) __Y, 5,
  8668. (__mmask8) -1);
  8669. }
  8670. extern __inline __mmask16
  8671. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8672. _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8673. {
  8674. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8675. (__v16si) __Y, 2,
  8676. (__mmask16) __M);
  8677. }
  8678. extern __inline __mmask16
  8679. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8680. _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
  8681. {
  8682. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8683. (__v16si) __Y, 2,
  8684. (__mmask16) -1);
  8685. }
  8686. extern __inline __mmask16
  8687. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8688. _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8689. {
  8690. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8691. (__v16si) __Y, 2,
  8692. (__mmask16) __M);
  8693. }
  8694. extern __inline __mmask16
  8695. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8696. _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
  8697. {
  8698. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8699. (__v16si) __Y, 2,
  8700. (__mmask16) -1);
  8701. }
  8702. extern __inline __mmask8
  8703. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8704. _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8705. {
  8706. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8707. (__v8di) __Y, 2,
  8708. (__mmask8) __M);
  8709. }
  8710. extern __inline __mmask8
  8711. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8712. _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
  8713. {
  8714. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8715. (__v8di) __Y, 2,
  8716. (__mmask8) -1);
  8717. }
  8718. extern __inline __mmask8
  8719. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8720. _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8721. {
  8722. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8723. (__v8di) __Y, 2,
  8724. (__mmask8) __M);
  8725. }
  8726. extern __inline __mmask8
  8727. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8728. _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
  8729. {
  8730. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8731. (__v8di) __Y, 2,
  8732. (__mmask8) -1);
  8733. }
  8734. extern __inline __mmask16
  8735. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8736. _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8737. {
  8738. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8739. (__v16si) __Y, 1,
  8740. (__mmask16) __M);
  8741. }
  8742. extern __inline __mmask16
  8743. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8744. _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
  8745. {
  8746. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8747. (__v16si) __Y, 1,
  8748. (__mmask16) -1);
  8749. }
  8750. extern __inline __mmask16
  8751. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8752. _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8753. {
  8754. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8755. (__v16si) __Y, 1,
  8756. (__mmask16) __M);
  8757. }
  8758. extern __inline __mmask16
  8759. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8760. _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
  8761. {
  8762. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8763. (__v16si) __Y, 1,
  8764. (__mmask16) -1);
  8765. }
  8766. extern __inline __mmask8
  8767. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8768. _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8769. {
  8770. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8771. (__v8di) __Y, 1,
  8772. (__mmask8) __M);
  8773. }
  8774. extern __inline __mmask8
  8775. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8776. _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
  8777. {
  8778. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8779. (__v8di) __Y, 1,
  8780. (__mmask8) -1);
  8781. }
  8782. extern __inline __mmask8
  8783. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8784. _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8785. {
  8786. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8787. (__v8di) __Y, 1,
  8788. (__mmask8) __M);
  8789. }
  8790. extern __inline __mmask8
  8791. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8792. _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
  8793. {
  8794. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8795. (__v8di) __Y, 1,
  8796. (__mmask8) -1);
  8797. }
  8798. extern __inline __mmask16
  8799. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8800. _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
  8801. {
  8802. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8803. (__v16si) __Y, 4,
  8804. (__mmask16) -1);
  8805. }
  8806. extern __inline __mmask16
  8807. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8808. _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8809. {
  8810. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8811. (__v16si) __Y, 4,
  8812. (__mmask16) __M);
  8813. }
  8814. extern __inline __mmask16
  8815. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8816. _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
  8817. {
  8818. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8819. (__v16si) __Y, 4,
  8820. (__mmask16) __M);
  8821. }
  8822. extern __inline __mmask16
  8823. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8824. _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
  8825. {
  8826. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8827. (__v16si) __Y, 4,
  8828. (__mmask16) -1);
  8829. }
  8830. extern __inline __mmask8
  8831. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8832. _mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8833. {
  8834. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8835. (__v8di) __Y, 4,
  8836. (__mmask8) __M);
  8837. }
  8838. extern __inline __mmask8
  8839. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8840. _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
  8841. {
  8842. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8843. (__v8di) __Y, 4,
  8844. (__mmask8) -1);
  8845. }
  8846. extern __inline __mmask8
  8847. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8848. _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
  8849. {
  8850. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8851. (__v8di) __Y, 4,
  8852. (__mmask8) __M);
  8853. }
  8854. extern __inline __mmask8
  8855. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8856. _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
  8857. {
  8858. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8859. (__v8di) __Y, 4,
  8860. (__mmask8) -1);
  8861. }
  8862. #define _MM_CMPINT_EQ 0x0
  8863. #define _MM_CMPINT_LT 0x1
  8864. #define _MM_CMPINT_LE 0x2
  8865. #define _MM_CMPINT_UNUSED 0x3
  8866. #define _MM_CMPINT_NE 0x4
  8867. #define _MM_CMPINT_NLT 0x5
  8868. #define _MM_CMPINT_GE 0x5
  8869. #define _MM_CMPINT_NLE 0x6
  8870. #define _MM_CMPINT_GT 0x6
  8871. #ifdef __OPTIMIZE__
  8872. extern __inline __mmask16
  8873. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8874. _kshiftli_mask16 (__mmask16 __A, unsigned int __B)
  8875. {
  8876. return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
  8877. (__mmask8) __B);
  8878. }
  8879. extern __inline __mmask16
  8880. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8881. _kshiftri_mask16 (__mmask16 __A, unsigned int __B)
  8882. {
  8883. return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
  8884. (__mmask8) __B);
  8885. }
  8886. extern __inline __mmask8
  8887. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8888. _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
  8889. {
  8890. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8891. (__v8di) __Y, __P,
  8892. (__mmask8) -1);
  8893. }
  8894. extern __inline __mmask16
  8895. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8896. _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
  8897. {
  8898. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8899. (__v16si) __Y, __P,
  8900. (__mmask16) -1);
  8901. }
  8902. extern __inline __mmask8
  8903. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8904. _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
  8905. {
  8906. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8907. (__v8di) __Y, __P,
  8908. (__mmask8) -1);
  8909. }
  8910. extern __inline __mmask16
  8911. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8912. _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
  8913. {
  8914. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8915. (__v16si) __Y, __P,
  8916. (__mmask16) -1);
  8917. }
  8918. extern __inline __mmask8
  8919. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8920. _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
  8921. const int __R)
  8922. {
  8923. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  8924. (__v8df) __Y, __P,
  8925. (__mmask8) -1, __R);
  8926. }
  8927. extern __inline __mmask16
  8928. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8929. _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
  8930. {
  8931. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  8932. (__v16sf) __Y, __P,
  8933. (__mmask16) -1, __R);
  8934. }
  8935. extern __inline __mmask8
  8936. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8937. _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
  8938. const int __P)
  8939. {
  8940. return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
  8941. (__v8di) __Y, __P,
  8942. (__mmask8) __U);
  8943. }
  8944. extern __inline __mmask16
  8945. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8946. _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
  8947. const int __P)
  8948. {
  8949. return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
  8950. (__v16si) __Y, __P,
  8951. (__mmask16) __U);
  8952. }
  8953. extern __inline __mmask8
  8954. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8955. _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
  8956. const int __P)
  8957. {
  8958. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
  8959. (__v8di) __Y, __P,
  8960. (__mmask8) __U);
  8961. }
  8962. extern __inline __mmask16
  8963. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8964. _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
  8965. const int __P)
  8966. {
  8967. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
  8968. (__v16si) __Y, __P,
  8969. (__mmask16) __U);
  8970. }
  8971. extern __inline __mmask8
  8972. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8973. _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
  8974. const int __P, const int __R)
  8975. {
  8976. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  8977. (__v8df) __Y, __P,
  8978. (__mmask8) __U, __R);
  8979. }
  8980. extern __inline __mmask16
  8981. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8982. _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
  8983. const int __P, const int __R)
  8984. {
  8985. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  8986. (__v16sf) __Y, __P,
  8987. (__mmask16) __U, __R);
  8988. }
  8989. extern __inline __mmask8
  8990. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8991. _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
  8992. {
  8993. return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
  8994. (__v2df) __Y, __P,
  8995. (__mmask8) -1, __R);
  8996. }
  8997. extern __inline __mmask8
  8998. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  8999. _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
  9000. const int __P, const int __R)
  9001. {
  9002. return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
  9003. (__v2df) __Y, __P,
  9004. (__mmask8) __M, __R);
  9005. }
  9006. extern __inline __mmask8
  9007. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9008. _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
  9009. {
  9010. return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
  9011. (__v4sf) __Y, __P,
  9012. (__mmask8) -1, __R);
  9013. }
  9014. extern __inline __mmask8
  9015. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9016. _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
  9017. const int __P, const int __R)
  9018. {
  9019. return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
  9020. (__v4sf) __Y, __P,
  9021. (__mmask8) __M, __R);
  9022. }
  9023. #else
  9024. #define _kshiftli_mask16(X, Y) \
  9025. ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
  9026. #define _kshiftri_mask16(X, Y) \
  9027. ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
  9028. #define _mm512_cmp_epi64_mask(X, Y, P) \
  9029. ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
  9030. (__v8di)(__m512i)(Y), (int)(P),\
  9031. (__mmask8)-1))
  9032. #define _mm512_cmp_epi32_mask(X, Y, P) \
  9033. ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
  9034. (__v16si)(__m512i)(Y), (int)(P), \
  9035. (__mmask16)-1))
  9036. #define _mm512_cmp_epu64_mask(X, Y, P) \
  9037. ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
  9038. (__v8di)(__m512i)(Y), (int)(P),\
  9039. (__mmask8)-1))
  9040. #define _mm512_cmp_epu32_mask(X, Y, P) \
  9041. ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
  9042. (__v16si)(__m512i)(Y), (int)(P), \
  9043. (__mmask16)-1))
  9044. #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
  9045. ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
  9046. (__v8df)(__m512d)(Y), (int)(P),\
  9047. (__mmask8)-1, R))
  9048. #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
  9049. ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
  9050. (__v16sf)(__m512)(Y), (int)(P),\
  9051. (__mmask16)-1, R))
  9052. #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
  9053. ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
  9054. (__v8di)(__m512i)(Y), (int)(P),\
  9055. (__mmask8)(M)))
  9056. #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
  9057. ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
  9058. (__v16si)(__m512i)(Y), (int)(P), \
  9059. (__mmask16)(M)))
  9060. #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
  9061. ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
  9062. (__v8di)(__m512i)(Y), (int)(P),\
  9063. (__mmask8)(M)))
  9064. #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
  9065. ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
  9066. (__v16si)(__m512i)(Y), (int)(P), \
  9067. (__mmask16)(M)))
  9068. #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
  9069. ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
  9070. (__v8df)(__m512d)(Y), (int)(P),\
  9071. (__mmask8)(M), R))
  9072. #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
  9073. ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
  9074. (__v16sf)(__m512)(Y), (int)(P),\
  9075. (__mmask16)(M), R))
  9076. #define _mm_cmp_round_sd_mask(X, Y, P, R) \
  9077. ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
  9078. (__v2df)(__m128d)(Y), (int)(P),\
  9079. (__mmask8)-1, R))
  9080. #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
  9081. ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
  9082. (__v2df)(__m128d)(Y), (int)(P),\
  9083. (M), R))
  9084. #define _mm_cmp_round_ss_mask(X, Y, P, R) \
  9085. ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
  9086. (__v4sf)(__m128)(Y), (int)(P), \
  9087. (__mmask8)-1, R))
  9088. #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
  9089. ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
  9090. (__v4sf)(__m128)(Y), (int)(P), \
  9091. (M), R))
  9092. #endif
  9093. #ifdef __OPTIMIZE__
  9094. extern __inline __m512
  9095. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9096. _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
  9097. {
  9098. __m512 __v1_old = _mm512_undefined_ps ();
  9099. __mmask16 __mask = 0xFFFF;
  9100. return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
  9101. __addr,
  9102. (__v16si) __index,
  9103. __mask, __scale);
  9104. }
  9105. extern __inline __m512
  9106. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9107. _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
  9108. __m512i __index, void const *__addr, int __scale)
  9109. {
  9110. return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
  9111. __addr,
  9112. (__v16si) __index,
  9113. __mask, __scale);
  9114. }
  9115. extern __inline __m512d
  9116. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9117. _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
  9118. {
  9119. __m512d __v1_old = _mm512_undefined_pd ();
  9120. __mmask8 __mask = 0xFF;
  9121. return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
  9122. __addr,
  9123. (__v8si) __index, __mask,
  9124. __scale);
  9125. }
  9126. extern __inline __m512d
  9127. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9128. _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
  9129. __m256i __index, void const *__addr, int __scale)
  9130. {
  9131. return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
  9132. __addr,
  9133. (__v8si) __index,
  9134. __mask, __scale);
  9135. }
  9136. extern __inline __m256
  9137. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9138. _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
  9139. {
  9140. __m256 __v1_old = _mm256_undefined_ps ();
  9141. __mmask8 __mask = 0xFF;
  9142. return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
  9143. __addr,
  9144. (__v8di) __index, __mask,
  9145. __scale);
  9146. }
  9147. extern __inline __m256
  9148. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9149. _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
  9150. __m512i __index, void const *__addr, int __scale)
  9151. {
  9152. return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
  9153. __addr,
  9154. (__v8di) __index,
  9155. __mask, __scale);
  9156. }
  9157. extern __inline __m512d
  9158. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9159. _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
  9160. {
  9161. __m512d __v1_old = _mm512_undefined_pd ();
  9162. __mmask8 __mask = 0xFF;
  9163. return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
  9164. __addr,
  9165. (__v8di) __index, __mask,
  9166. __scale);
  9167. }
  9168. extern __inline __m512d
  9169. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9170. _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
  9171. __m512i __index, void const *__addr, int __scale)
  9172. {
  9173. return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
  9174. __addr,
  9175. (__v8di) __index,
  9176. __mask, __scale);
  9177. }
  9178. extern __inline __m512i
  9179. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9180. _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
  9181. {
  9182. __m512i __v1_old = _mm512_undefined_epi32 ();
  9183. __mmask16 __mask = 0xFFFF;
  9184. return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
  9185. __addr,
  9186. (__v16si) __index,
  9187. __mask, __scale);
  9188. }
  9189. extern __inline __m512i
  9190. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9191. _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
  9192. __m512i __index, void const *__addr, int __scale)
  9193. {
  9194. return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
  9195. __addr,
  9196. (__v16si) __index,
  9197. __mask, __scale);
  9198. }
  9199. extern __inline __m512i
  9200. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9201. _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
  9202. {
  9203. __m512i __v1_old = _mm512_undefined_epi32 ();
  9204. __mmask8 __mask = 0xFF;
  9205. return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
  9206. __addr,
  9207. (__v8si) __index, __mask,
  9208. __scale);
  9209. }
  9210. extern __inline __m512i
  9211. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9212. _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
  9213. __m256i __index, void const *__addr,
  9214. int __scale)
  9215. {
  9216. return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
  9217. __addr,
  9218. (__v8si) __index,
  9219. __mask, __scale);
  9220. }
  9221. extern __inline __m256i
  9222. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9223. _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
  9224. {
  9225. __m256i __v1_old = _mm256_undefined_si256 ();
  9226. __mmask8 __mask = 0xFF;
  9227. return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
  9228. __addr,
  9229. (__v8di) __index,
  9230. __mask, __scale);
  9231. }
  9232. extern __inline __m256i
  9233. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9234. _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
  9235. __m512i __index, void const *__addr, int __scale)
  9236. {
  9237. return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
  9238. __addr,
  9239. (__v8di) __index,
  9240. __mask, __scale);
  9241. }
  9242. extern __inline __m512i
  9243. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9244. _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
  9245. {
  9246. __m512i __v1_old = _mm512_undefined_epi32 ();
  9247. __mmask8 __mask = 0xFF;
  9248. return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
  9249. __addr,
  9250. (__v8di) __index, __mask,
  9251. __scale);
  9252. }
  9253. extern __inline __m512i
  9254. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9255. _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
  9256. __m512i __index, void const *__addr,
  9257. int __scale)
  9258. {
  9259. return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
  9260. __addr,
  9261. (__v8di) __index,
  9262. __mask, __scale);
  9263. }
  9264. extern __inline void
  9265. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9266. _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
  9267. {
  9268. __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
  9269. (__v16si) __index, (__v16sf) __v1, __scale);
  9270. }
  9271. extern __inline void
  9272. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9273. _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
  9274. __m512i __index, __m512 __v1, int __scale)
  9275. {
  9276. __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
  9277. (__v16sf) __v1, __scale);
  9278. }
  9279. extern __inline void
  9280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9281. _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
  9282. int __scale)
  9283. {
  9284. __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
  9285. (__v8si) __index, (__v8df) __v1, __scale);
  9286. }
  9287. extern __inline void
  9288. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9289. _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
  9290. __m256i __index, __m512d __v1, int __scale)
  9291. {
  9292. __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
  9293. (__v8df) __v1, __scale);
  9294. }
  9295. extern __inline void
  9296. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9297. _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
  9298. {
  9299. __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
  9300. (__v8di) __index, (__v8sf) __v1, __scale);
  9301. }
  9302. extern __inline void
  9303. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9304. _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
  9305. __m512i __index, __m256 __v1, int __scale)
  9306. {
  9307. __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
  9308. (__v8sf) __v1, __scale);
  9309. }
  9310. extern __inline void
  9311. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9312. _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
  9313. int __scale)
  9314. {
  9315. __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
  9316. (__v8di) __index, (__v8df) __v1, __scale);
  9317. }
  9318. extern __inline void
  9319. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9320. _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
  9321. __m512i __index, __m512d __v1, int __scale)
  9322. {
  9323. __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
  9324. (__v8df) __v1, __scale);
  9325. }
  9326. extern __inline void
  9327. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9328. _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
  9329. __m512i __v1, int __scale)
  9330. {
  9331. __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
  9332. (__v16si) __index, (__v16si) __v1, __scale);
  9333. }
  9334. extern __inline void
  9335. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9336. _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
  9337. __m512i __index, __m512i __v1, int __scale)
  9338. {
  9339. __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
  9340. (__v16si) __v1, __scale);
  9341. }
  9342. extern __inline void
  9343. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9344. _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
  9345. __m512i __v1, int __scale)
  9346. {
  9347. __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
  9348. (__v8si) __index, (__v8di) __v1, __scale);
  9349. }
  9350. extern __inline void
  9351. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9352. _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
  9353. __m256i __index, __m512i __v1, int __scale)
  9354. {
  9355. __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
  9356. (__v8di) __v1, __scale);
  9357. }
  9358. extern __inline void
  9359. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9360. _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
  9361. __m256i __v1, int __scale)
  9362. {
  9363. __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
  9364. (__v8di) __index, (__v8si) __v1, __scale);
  9365. }
  9366. extern __inline void
  9367. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9368. _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
  9369. __m512i __index, __m256i __v1, int __scale)
  9370. {
  9371. __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
  9372. (__v8si) __v1, __scale);
  9373. }
  9374. extern __inline void
  9375. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9376. _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
  9377. __m512i __v1, int __scale)
  9378. {
  9379. __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
  9380. (__v8di) __index, (__v8di) __v1, __scale);
  9381. }
  9382. extern __inline void
  9383. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9384. _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
  9385. __m512i __index, __m512i __v1, int __scale)
  9386. {
  9387. __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
  9388. (__v8di) __v1, __scale);
  9389. }
  9390. #else
  9391. #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
  9392. (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
  9393. (void const *) (ADDR), \
  9394. (__v16si)(__m512i) (INDEX), \
  9395. (__mmask16)0xFFFF, \
  9396. (int) (SCALE))
  9397. #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9398. (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512) (V1OLD), \
  9399. (void const *) (ADDR), \
  9400. (__v16si)(__m512i) (INDEX), \
  9401. (__mmask16) (MASK), \
  9402. (int) (SCALE))
  9403. #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
  9404. (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
  9405. (void const *) (ADDR), \
  9406. (__v8si)(__m256i) (INDEX), \
  9407. (__mmask8)0xFF, (int) (SCALE))
  9408. #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9409. (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d) (V1OLD), \
  9410. (void const *) (ADDR), \
  9411. (__v8si)(__m256i) (INDEX), \
  9412. (__mmask8) (MASK), \
  9413. (int) (SCALE))
  9414. #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
  9415. (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
  9416. (void const *) (ADDR), \
  9417. (__v8di)(__m512i) (INDEX), \
  9418. (__mmask8)0xFF, (int) (SCALE))
  9419. #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9420. (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256) (V1OLD), \
  9421. (void const *) (ADDR), \
  9422. (__v8di)(__m512i) (INDEX), \
  9423. (__mmask8) (MASK), \
  9424. (int) (SCALE))
  9425. #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
  9426. (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
  9427. (void const *) (ADDR), \
  9428. (__v8di)(__m512i) (INDEX), \
  9429. (__mmask8)0xFF, (int) (SCALE))
  9430. #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9431. (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d) (V1OLD), \
  9432. (void const *) (ADDR), \
  9433. (__v8di)(__m512i) (INDEX), \
  9434. (__mmask8) (MASK), \
  9435. (int) (SCALE))
  9436. #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
  9437. (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),\
  9438. (void const *) (ADDR), \
  9439. (__v16si)(__m512i) (INDEX), \
  9440. (__mmask16)0xFFFF, \
  9441. (int) (SCALE))
  9442. #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9443. (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i) (V1OLD), \
  9444. (void const *) (ADDR), \
  9445. (__v16si)(__m512i) (INDEX), \
  9446. (__mmask16) (MASK), \
  9447. (int) (SCALE))
  9448. #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
  9449. (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),\
  9450. (void const *) (ADDR), \
  9451. (__v8si)(__m256i) (INDEX), \
  9452. (__mmask8)0xFF, (int) (SCALE))
  9453. #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9454. (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i) (V1OLD), \
  9455. (void const *) (ADDR), \
  9456. (__v8si)(__m256i) (INDEX), \
  9457. (__mmask8) (MASK), \
  9458. (int) (SCALE))
  9459. #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
  9460. (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(),\
  9461. (void const *) (ADDR), \
  9462. (__v8di)(__m512i) (INDEX), \
  9463. (__mmask8)0xFF, (int) (SCALE))
  9464. #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9465. (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i) (V1OLD), \
  9466. (void const *) (ADDR), \
  9467. (__v8di)(__m512i) (INDEX), \
  9468. (__mmask8) (MASK), \
  9469. (int) (SCALE))
  9470. #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
  9471. (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),\
  9472. (void const *) (ADDR), \
  9473. (__v8di)(__m512i) (INDEX), \
  9474. (__mmask8)0xFF, (int) (SCALE))
  9475. #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
  9476. (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i) (V1OLD), \
  9477. (void const *) (ADDR), \
  9478. (__v8di)(__m512i) (INDEX), \
  9479. (__mmask8) (MASK), \
  9480. (int) (SCALE))
  9481. #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
  9482. __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16)0xFFFF, \
  9483. (__v16si)(__m512i) (INDEX), \
  9484. (__v16sf)(__m512) (V1), (int) (SCALE))
  9485. #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  9486. __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
  9487. (__v16si)(__m512i) (INDEX), \
  9488. (__v16sf)(__m512) (V1), (int) (SCALE))
  9489. #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
  9490. __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8)0xFF, \
  9491. (__v8si)(__m256i) (INDEX), \
  9492. (__v8df)(__m512d) (V1), (int) (SCALE))
  9493. #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  9494. __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8) (MASK), \
  9495. (__v8si)(__m256i) (INDEX), \
  9496. (__v8df)(__m512d) (V1), (int) (SCALE))
  9497. #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
  9498. __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask8)0xFF, \
  9499. (__v8di)(__m512i) (INDEX), \
  9500. (__v8sf)(__m256) (V1), (int) (SCALE))
  9501. #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
  9502. __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
  9503. (__v8di)(__m512i) (INDEX), \
  9504. (__v8sf)(__m256) (V1), (int) (SCALE))
  9505. #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
  9506. __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8)0xFF, \
  9507. (__v8di)(__m512i) (INDEX), \
  9508. (__v8df)(__m512d) (V1), (int) (SCALE))
  9509. #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
  9510. __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8) (MASK), \
  9511. (__v8di)(__m512i) (INDEX), \
  9512. (__v8df)(__m512d) (V1), (int) (SCALE))
  9513. #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
  9514. __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16)0xFFFF, \
  9515. (__v16si)(__m512i) (INDEX), \
  9516. (__v16si)(__m512i) (V1), (int) (SCALE))
  9517. #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  9518. __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16) (MASK), \
  9519. (__v16si)(__m512i) (INDEX), \
  9520. (__v16si)(__m512i) (V1), (int) (SCALE))
  9521. #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
  9522. __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8)0xFF, \
  9523. (__v8si)(__m256i) (INDEX), \
  9524. (__v8di)(__m512i) (V1), (int) (SCALE))
  9525. #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  9526. __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8) (MASK), \
  9527. (__v8si)(__m256i) (INDEX), \
  9528. (__v8di)(__m512i) (V1), (int) (SCALE))
  9529. #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
  9530. __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8)0xFF, \
  9531. (__v8di)(__m512i) (INDEX), \
  9532. (__v8si)(__m256i) (V1), (int) (SCALE))
  9533. #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
  9534. __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8) (MASK), \
  9535. (__v8di)(__m512i) (INDEX), \
  9536. (__v8si)(__m256i) (V1), (int) (SCALE))
  9537. #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
  9538. __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8)0xFF, \
  9539. (__v8di)(__m512i) (INDEX), \
  9540. (__v8di)(__m512i) (V1), (int) (SCALE))
  9541. #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
  9542. __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8) (MASK), \
  9543. (__v8di)(__m512i) (INDEX), \
  9544. (__v8di)(__m512i) (V1), (int) (SCALE))
  9545. #endif
  9546. extern __inline __m512d
  9547. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9548. _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
  9549. {
  9550. return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
  9551. (__v8df) __W,
  9552. (__mmask8) __U);
  9553. }
  9554. extern __inline __m512d
  9555. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9556. _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
  9557. {
  9558. return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
  9559. (__v8df)
  9560. _mm512_setzero_pd (),
  9561. (__mmask8) __U);
  9562. }
  9563. extern __inline void
  9564. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9565. _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
  9566. {
  9567. __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
  9568. (__mmask8) __U);
  9569. }
  9570. extern __inline __m512
  9571. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9572. _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
  9573. {
  9574. return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
  9575. (__v16sf) __W,
  9576. (__mmask16) __U);
  9577. }
  9578. extern __inline __m512
  9579. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9580. _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
  9581. {
  9582. return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
  9583. (__v16sf)
  9584. _mm512_setzero_ps (),
  9585. (__mmask16) __U);
  9586. }
  9587. extern __inline void
  9588. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9589. _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
  9590. {
  9591. __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
  9592. (__mmask16) __U);
  9593. }
  9594. extern __inline __m512i
  9595. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9596. _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
  9597. {
  9598. return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
  9599. (__v8di) __W,
  9600. (__mmask8) __U);
  9601. }
  9602. extern __inline __m512i
  9603. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9604. _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
  9605. {
  9606. return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
  9607. (__v8di)
  9608. _mm512_setzero_si512 (),
  9609. (__mmask8) __U);
  9610. }
  9611. extern __inline void
  9612. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9613. _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
  9614. {
  9615. __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
  9616. (__mmask8) __U);
  9617. }
  9618. extern __inline __m512i
  9619. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9620. _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
  9621. {
  9622. return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
  9623. (__v16si) __W,
  9624. (__mmask16) __U);
  9625. }
  9626. extern __inline __m512i
  9627. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9628. _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
  9629. {
  9630. return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
  9631. (__v16si)
  9632. _mm512_setzero_si512 (),
  9633. (__mmask16) __U);
  9634. }
  9635. extern __inline void
  9636. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9637. _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
  9638. {
  9639. __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
  9640. (__mmask16) __U);
  9641. }
  9642. extern __inline __m512d
  9643. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9644. _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
  9645. {
  9646. return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
  9647. (__v8df) __W,
  9648. (__mmask8) __U);
  9649. }
  9650. extern __inline __m512d
  9651. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9652. _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
  9653. {
  9654. return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
  9655. (__v8df)
  9656. _mm512_setzero_pd (),
  9657. (__mmask8) __U);
  9658. }
  9659. extern __inline __m512d
  9660. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9661. _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
  9662. {
  9663. return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
  9664. (__v8df) __W,
  9665. (__mmask8) __U);
  9666. }
  9667. extern __inline __m512d
  9668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9669. _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
  9670. {
  9671. return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
  9672. (__v8df)
  9673. _mm512_setzero_pd (),
  9674. (__mmask8) __U);
  9675. }
  9676. extern __inline __m512
  9677. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9678. _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
  9679. {
  9680. return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
  9681. (__v16sf) __W,
  9682. (__mmask16) __U);
  9683. }
  9684. extern __inline __m512
  9685. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9686. _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
  9687. {
  9688. return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
  9689. (__v16sf)
  9690. _mm512_setzero_ps (),
  9691. (__mmask16) __U);
  9692. }
  9693. extern __inline __m512
  9694. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9695. _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
  9696. {
  9697. return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
  9698. (__v16sf) __W,
  9699. (__mmask16) __U);
  9700. }
  9701. extern __inline __m512
  9702. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9703. _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
  9704. {
  9705. return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
  9706. (__v16sf)
  9707. _mm512_setzero_ps (),
  9708. (__mmask16) __U);
  9709. }
  9710. extern __inline __m512i
  9711. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9712. _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
  9713. {
  9714. return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
  9715. (__v8di) __W,
  9716. (__mmask8) __U);
  9717. }
  9718. extern __inline __m512i
  9719. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9720. _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
  9721. {
  9722. return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
  9723. (__v8di)
  9724. _mm512_setzero_si512 (),
  9725. (__mmask8) __U);
  9726. }
  9727. extern __inline __m512i
  9728. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9729. _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
  9730. {
  9731. return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
  9732. (__v8di) __W,
  9733. (__mmask8) __U);
  9734. }
  9735. extern __inline __m512i
  9736. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9737. _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
  9738. {
  9739. return (__m512i)
  9740. __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
  9741. (__v8di)
  9742. _mm512_setzero_si512 (),
  9743. (__mmask8) __U);
  9744. }
  9745. extern __inline __m512i
  9746. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9747. _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
  9748. {
  9749. return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
  9750. (__v16si) __W,
  9751. (__mmask16) __U);
  9752. }
  9753. extern __inline __m512i
  9754. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9755. _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
  9756. {
  9757. return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
  9758. (__v16si)
  9759. _mm512_setzero_si512 (),
  9760. (__mmask16) __U);
  9761. }
  9762. extern __inline __m512i
  9763. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9764. _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
  9765. {
  9766. return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
  9767. (__v16si) __W,
  9768. (__mmask16) __U);
  9769. }
  9770. extern __inline __m512i
  9771. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9772. _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
  9773. {
  9774. return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
  9775. (__v16si)
  9776. _mm512_setzero_si512
  9777. (), (__mmask16) __U);
  9778. }
  9779. /* Mask arithmetic operations */
  9780. #define _kand_mask16 _mm512_kand
  9781. #define _kandn_mask16 _mm512_kandn
  9782. #define _knot_mask16 _mm512_knot
  9783. #define _kor_mask16 _mm512_kor
  9784. #define _kxnor_mask16 _mm512_kxnor
  9785. #define _kxor_mask16 _mm512_kxor
  9786. extern __inline unsigned char
  9787. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9788. _kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
  9789. {
  9790. *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
  9791. return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
  9792. }
  9793. extern __inline unsigned char
  9794. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9795. _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
  9796. {
  9797. return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
  9798. (__mmask16) __B);
  9799. }
  9800. extern __inline unsigned char
  9801. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9802. _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
  9803. {
  9804. return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
  9805. (__mmask16) __B);
  9806. }
  9807. extern __inline unsigned int
  9808. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9809. _cvtmask16_u32 (__mmask16 __A)
  9810. {
  9811. return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
  9812. }
  9813. extern __inline __mmask16
  9814. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9815. _cvtu32_mask16 (unsigned int __A)
  9816. {
  9817. return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
  9818. }
  9819. extern __inline __mmask16
  9820. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9821. _load_mask16 (__mmask16 *__A)
  9822. {
  9823. return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
  9824. }
  9825. extern __inline void
  9826. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9827. _store_mask16 (__mmask16 *__A, __mmask16 __B)
  9828. {
  9829. *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
  9830. }
  9831. extern __inline __mmask16
  9832. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9833. _mm512_kand (__mmask16 __A, __mmask16 __B)
  9834. {
  9835. return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
  9836. }
  9837. extern __inline __mmask16
  9838. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9839. _mm512_kandn (__mmask16 __A, __mmask16 __B)
  9840. {
  9841. return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
  9842. (__mmask16) __B);
  9843. }
  9844. extern __inline __mmask16
  9845. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9846. _mm512_kor (__mmask16 __A, __mmask16 __B)
  9847. {
  9848. return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
  9849. }
  9850. extern __inline int
  9851. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9852. _mm512_kortestz (__mmask16 __A, __mmask16 __B)
  9853. {
  9854. return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
  9855. (__mmask16) __B);
  9856. }
  9857. extern __inline int
  9858. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9859. _mm512_kortestc (__mmask16 __A, __mmask16 __B)
  9860. {
  9861. return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
  9862. (__mmask16) __B);
  9863. }
  9864. extern __inline __mmask16
  9865. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9866. _mm512_kxnor (__mmask16 __A, __mmask16 __B)
  9867. {
  9868. return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
  9869. }
  9870. extern __inline __mmask16
  9871. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9872. _mm512_kxor (__mmask16 __A, __mmask16 __B)
  9873. {
  9874. return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
  9875. }
  9876. extern __inline __mmask16
  9877. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9878. _mm512_knot (__mmask16 __A)
  9879. {
  9880. return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
  9881. }
  9882. extern __inline __mmask16
  9883. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9884. _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
  9885. {
  9886. return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
  9887. }
  9888. extern __inline __mmask16
  9889. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9890. _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
  9891. {
  9892. return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
  9893. }
  9894. #ifdef __OPTIMIZE__
  9895. extern __inline __m512i
  9896. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9897. _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
  9898. const int __imm)
  9899. {
  9900. return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
  9901. (__v4si) __D,
  9902. __imm,
  9903. (__v16si)
  9904. _mm512_setzero_si512 (),
  9905. __B);
  9906. }
  9907. extern __inline __m512
  9908. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9909. _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
  9910. const int __imm)
  9911. {
  9912. return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
  9913. (__v4sf) __D,
  9914. __imm,
  9915. (__v16sf)
  9916. _mm512_setzero_ps (), __B);
  9917. }
  9918. extern __inline __m512i
  9919. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9920. _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
  9921. __m128i __D, const int __imm)
  9922. {
  9923. return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
  9924. (__v4si) __D,
  9925. __imm,
  9926. (__v16si) __A,
  9927. __B);
  9928. }
  9929. extern __inline __m512
  9930. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9931. _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
  9932. __m128 __D, const int __imm)
  9933. {
  9934. return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
  9935. (__v4sf) __D,
  9936. __imm,
  9937. (__v16sf) __A, __B);
  9938. }
  9939. #else
  9940. #define _mm512_maskz_insertf32x4(A, X, Y, C) \
  9941. ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
  9942. (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
  9943. (__mmask16)(A)))
  9944. #define _mm512_maskz_inserti32x4(A, X, Y, C) \
  9945. ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
  9946. (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
  9947. (__mmask16)(A)))
  9948. #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
  9949. ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
  9950. (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
  9951. (__mmask16)(B)))
  9952. #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
  9953. ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
  9954. (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
  9955. (__mmask16)(B)))
  9956. #endif
  9957. extern __inline __m512i
  9958. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9959. _mm512_max_epi64 (__m512i __A, __m512i __B)
  9960. {
  9961. return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
  9962. (__v8di) __B,
  9963. (__v8di)
  9964. _mm512_undefined_epi32 (),
  9965. (__mmask8) -1);
  9966. }
  9967. extern __inline __m512i
  9968. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9969. _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
  9970. {
  9971. return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
  9972. (__v8di) __B,
  9973. (__v8di)
  9974. _mm512_setzero_si512 (),
  9975. __M);
  9976. }
  9977. extern __inline __m512i
  9978. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9979. _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
  9980. {
  9981. return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
  9982. (__v8di) __B,
  9983. (__v8di) __W, __M);
  9984. }
  9985. extern __inline __m512i
  9986. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9987. _mm512_min_epi64 (__m512i __A, __m512i __B)
  9988. {
  9989. return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
  9990. (__v8di) __B,
  9991. (__v8di)
  9992. _mm512_undefined_epi32 (),
  9993. (__mmask8) -1);
  9994. }
  9995. extern __inline __m512i
  9996. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  9997. _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
  9998. {
  9999. return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
  10000. (__v8di) __B,
  10001. (__v8di) __W, __M);
  10002. }
  10003. extern __inline __m512i
  10004. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10005. _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
  10006. {
  10007. return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
  10008. (__v8di) __B,
  10009. (__v8di)
  10010. _mm512_setzero_si512 (),
  10011. __M);
  10012. }
  10013. extern __inline __m512i
  10014. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10015. _mm512_max_epu64 (__m512i __A, __m512i __B)
  10016. {
  10017. return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
  10018. (__v8di) __B,
  10019. (__v8di)
  10020. _mm512_undefined_epi32 (),
  10021. (__mmask8) -1);
  10022. }
  10023. extern __inline __m512i
  10024. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10025. _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
  10026. {
  10027. return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
  10028. (__v8di) __B,
  10029. (__v8di)
  10030. _mm512_setzero_si512 (),
  10031. __M);
  10032. }
  10033. extern __inline __m512i
  10034. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10035. _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
  10036. {
  10037. return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
  10038. (__v8di) __B,
  10039. (__v8di) __W, __M);
  10040. }
  10041. extern __inline __m512i
  10042. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10043. _mm512_min_epu64 (__m512i __A, __m512i __B)
  10044. {
  10045. return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
  10046. (__v8di) __B,
  10047. (__v8di)
  10048. _mm512_undefined_epi32 (),
  10049. (__mmask8) -1);
  10050. }
  10051. extern __inline __m512i
  10052. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10053. _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
  10054. {
  10055. return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
  10056. (__v8di) __B,
  10057. (__v8di) __W, __M);
  10058. }
  10059. extern __inline __m512i
  10060. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10061. _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
  10062. {
  10063. return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
  10064. (__v8di) __B,
  10065. (__v8di)
  10066. _mm512_setzero_si512 (),
  10067. __M);
  10068. }
  10069. extern __inline __m512i
  10070. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10071. _mm512_max_epi32 (__m512i __A, __m512i __B)
  10072. {
  10073. return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
  10074. (__v16si) __B,
  10075. (__v16si)
  10076. _mm512_undefined_epi32 (),
  10077. (__mmask16) -1);
  10078. }
  10079. extern __inline __m512i
  10080. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10081. _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
  10082. {
  10083. return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
  10084. (__v16si) __B,
  10085. (__v16si)
  10086. _mm512_setzero_si512 (),
  10087. __M);
  10088. }
  10089. extern __inline __m512i
  10090. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10091. _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
  10092. {
  10093. return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
  10094. (__v16si) __B,
  10095. (__v16si) __W, __M);
  10096. }
  10097. extern __inline __m512i
  10098. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10099. _mm512_min_epi32 (__m512i __A, __m512i __B)
  10100. {
  10101. return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
  10102. (__v16si) __B,
  10103. (__v16si)
  10104. _mm512_undefined_epi32 (),
  10105. (__mmask16) -1);
  10106. }
  10107. extern __inline __m512i
  10108. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10109. _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
  10110. {
  10111. return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
  10112. (__v16si) __B,
  10113. (__v16si)
  10114. _mm512_setzero_si512 (),
  10115. __M);
  10116. }
  10117. extern __inline __m512i
  10118. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10119. _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
  10120. {
  10121. return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
  10122. (__v16si) __B,
  10123. (__v16si) __W, __M);
  10124. }
  10125. extern __inline __m512i
  10126. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10127. _mm512_max_epu32 (__m512i __A, __m512i __B)
  10128. {
  10129. return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
  10130. (__v16si) __B,
  10131. (__v16si)
  10132. _mm512_undefined_epi32 (),
  10133. (__mmask16) -1);
  10134. }
  10135. extern __inline __m512i
  10136. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10137. _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
  10138. {
  10139. return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
  10140. (__v16si) __B,
  10141. (__v16si)
  10142. _mm512_setzero_si512 (),
  10143. __M);
  10144. }
  10145. extern __inline __m512i
  10146. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10147. _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
  10148. {
  10149. return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
  10150. (__v16si) __B,
  10151. (__v16si) __W, __M);
  10152. }
  10153. extern __inline __m512i
  10154. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10155. _mm512_min_epu32 (__m512i __A, __m512i __B)
  10156. {
  10157. return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
  10158. (__v16si) __B,
  10159. (__v16si)
  10160. _mm512_undefined_epi32 (),
  10161. (__mmask16) -1);
  10162. }
  10163. extern __inline __m512i
  10164. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10165. _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
  10166. {
  10167. return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
  10168. (__v16si) __B,
  10169. (__v16si)
  10170. _mm512_setzero_si512 (),
  10171. __M);
  10172. }
  10173. extern __inline __m512i
  10174. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10175. _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
  10176. {
  10177. return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
  10178. (__v16si) __B,
  10179. (__v16si) __W, __M);
  10180. }
  10181. extern __inline __m512
  10182. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10183. _mm512_unpacklo_ps (__m512 __A, __m512 __B)
  10184. {
  10185. return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
  10186. (__v16sf) __B,
  10187. (__v16sf)
  10188. _mm512_undefined_ps (),
  10189. (__mmask16) -1);
  10190. }
  10191. extern __inline __m512
  10192. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10193. _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  10194. {
  10195. return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
  10196. (__v16sf) __B,
  10197. (__v16sf) __W,
  10198. (__mmask16) __U);
  10199. }
  10200. extern __inline __m512
  10201. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10202. _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
  10203. {
  10204. return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
  10205. (__v16sf) __B,
  10206. (__v16sf)
  10207. _mm512_setzero_ps (),
  10208. (__mmask16) __U);
  10209. }
  10210. #ifdef __OPTIMIZE__
  10211. extern __inline __m128d
  10212. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10213. _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
  10214. {
  10215. return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
  10216. (__v2df) __B,
  10217. __R);
  10218. }
  10219. extern __inline __m128d
  10220. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10221. _mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  10222. __m128d __B, const int __R)
  10223. {
  10224. return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
  10225. (__v2df) __B,
  10226. (__v2df) __W,
  10227. (__mmask8) __U, __R);
  10228. }
  10229. extern __inline __m128d
  10230. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10231. _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  10232. const int __R)
  10233. {
  10234. return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
  10235. (__v2df) __B,
  10236. (__v2df)
  10237. _mm_setzero_pd (),
  10238. (__mmask8) __U, __R);
  10239. }
  10240. extern __inline __m128
  10241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10242. _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
  10243. {
  10244. return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
  10245. (__v4sf) __B,
  10246. __R);
  10247. }
  10248. extern __inline __m128
  10249. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10250. _mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  10251. __m128 __B, const int __R)
  10252. {
  10253. return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
  10254. (__v4sf) __B,
  10255. (__v4sf) __W,
  10256. (__mmask8) __U, __R);
  10257. }
  10258. extern __inline __m128
  10259. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10260. _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  10261. const int __R)
  10262. {
  10263. return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
  10264. (__v4sf) __B,
  10265. (__v4sf)
  10266. _mm_setzero_ps (),
  10267. (__mmask8) __U, __R);
  10268. }
  10269. extern __inline __m128d
  10270. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10271. _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
  10272. {
  10273. return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
  10274. (__v2df) __B,
  10275. __R);
  10276. }
  10277. extern __inline __m128d
  10278. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10279. _mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
  10280. __m128d __B, const int __R)
  10281. {
  10282. return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
  10283. (__v2df) __B,
  10284. (__v2df) __W,
  10285. (__mmask8) __U, __R);
  10286. }
  10287. extern __inline __m128d
  10288. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10289. _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
  10290. const int __R)
  10291. {
  10292. return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
  10293. (__v2df) __B,
  10294. (__v2df)
  10295. _mm_setzero_pd (),
  10296. (__mmask8) __U, __R);
  10297. }
  10298. extern __inline __m128
  10299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10300. _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
  10301. {
  10302. return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
  10303. (__v4sf) __B,
  10304. __R);
  10305. }
  10306. extern __inline __m128
  10307. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10308. _mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
  10309. __m128 __B, const int __R)
  10310. {
  10311. return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
  10312. (__v4sf) __B,
  10313. (__v4sf) __W,
  10314. (__mmask8) __U, __R);
  10315. }
  10316. extern __inline __m128
  10317. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10318. _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
  10319. const int __R)
  10320. {
  10321. return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
  10322. (__v4sf) __B,
  10323. (__v4sf)
  10324. _mm_setzero_ps (),
  10325. (__mmask8) __U, __R);
  10326. }
  10327. #else
  10328. #define _mm_max_round_sd(A, B, C) \
  10329. (__m128d)__builtin_ia32_maxsd_round(A, B, C)
  10330. #define _mm_mask_max_round_sd(W, U, A, B, C) \
  10331. (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
  10332. #define _mm_maskz_max_round_sd(U, A, B, C) \
  10333. (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  10334. #define _mm_max_round_ss(A, B, C) \
  10335. (__m128)__builtin_ia32_maxss_round(A, B, C)
  10336. #define _mm_mask_max_round_ss(W, U, A, B, C) \
  10337. (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
  10338. #define _mm_maskz_max_round_ss(U, A, B, C) \
  10339. (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  10340. #define _mm_min_round_sd(A, B, C) \
  10341. (__m128d)__builtin_ia32_minsd_round(A, B, C)
  10342. #define _mm_mask_min_round_sd(W, U, A, B, C) \
  10343. (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
  10344. #define _mm_maskz_min_round_sd(U, A, B, C) \
  10345. (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
  10346. #define _mm_min_round_ss(A, B, C) \
  10347. (__m128)__builtin_ia32_minss_round(A, B, C)
  10348. #define _mm_mask_min_round_ss(W, U, A, B, C) \
  10349. (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
  10350. #define _mm_maskz_min_round_ss(U, A, B, C) \
  10351. (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
  10352. #endif
  10353. extern __inline __m512d
  10354. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10355. _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
  10356. {
  10357. return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
  10358. (__v8df) __W,
  10359. (__mmask8) __U);
  10360. }
  10361. extern __inline __m512
  10362. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10363. _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
  10364. {
  10365. return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
  10366. (__v16sf) __W,
  10367. (__mmask16) __U);
  10368. }
  10369. extern __inline __m512i
  10370. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10371. _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
  10372. {
  10373. return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
  10374. (__v8di) __W,
  10375. (__mmask8) __U);
  10376. }
  10377. extern __inline __m512i
  10378. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10379. _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
  10380. {
  10381. return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
  10382. (__v16si) __W,
  10383. (__mmask16) __U);
  10384. }
  10385. #ifdef __OPTIMIZE__
  10386. extern __inline __m128d
  10387. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10388. _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
  10389. {
  10390. return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
  10391. (__v2df) __A,
  10392. (__v2df) __B,
  10393. __R);
  10394. }
  10395. extern __inline __m128
  10396. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10397. _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
  10398. {
  10399. return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
  10400. (__v4sf) __A,
  10401. (__v4sf) __B,
  10402. __R);
  10403. }
  10404. extern __inline __m128d
  10405. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10406. _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
  10407. {
  10408. return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
  10409. (__v2df) __A,
  10410. -(__v2df) __B,
  10411. __R);
  10412. }
  10413. extern __inline __m128
  10414. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10415. _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
  10416. {
  10417. return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
  10418. (__v4sf) __A,
  10419. -(__v4sf) __B,
  10420. __R);
  10421. }
  10422. extern __inline __m128d
  10423. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10424. _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
  10425. {
  10426. return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
  10427. -(__v2df) __A,
  10428. (__v2df) __B,
  10429. __R);
  10430. }
  10431. extern __inline __m128
  10432. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10433. _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
  10434. {
  10435. return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
  10436. -(__v4sf) __A,
  10437. (__v4sf) __B,
  10438. __R);
  10439. }
  10440. extern __inline __m128d
  10441. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10442. _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
  10443. {
  10444. return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
  10445. -(__v2df) __A,
  10446. -(__v2df) __B,
  10447. __R);
  10448. }
  10449. extern __inline __m128
  10450. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10451. _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
  10452. {
  10453. return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
  10454. -(__v4sf) __A,
  10455. -(__v4sf) __B,
  10456. __R);
  10457. }
  10458. #else
  10459. #define _mm_fmadd_round_sd(A, B, C, R) \
  10460. (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
  10461. #define _mm_fmadd_round_ss(A, B, C, R) \
  10462. (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
  10463. #define _mm_fmsub_round_sd(A, B, C, R) \
  10464. (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
  10465. #define _mm_fmsub_round_ss(A, B, C, R) \
  10466. (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
  10467. #define _mm_fnmadd_round_sd(A, B, C, R) \
  10468. (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
  10469. #define _mm_fnmadd_round_ss(A, B, C, R) \
  10470. (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
  10471. #define _mm_fnmsub_round_sd(A, B, C, R) \
  10472. (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
  10473. #define _mm_fnmsub_round_ss(A, B, C, R) \
  10474. (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
  10475. #endif
  10476. extern __inline __m128d
  10477. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10478. _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  10479. {
  10480. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10481. (__v2df) __A,
  10482. (__v2df) __B,
  10483. (__mmask8) __U,
  10484. _MM_FROUND_CUR_DIRECTION);
  10485. }
  10486. extern __inline __m128
  10487. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10488. _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  10489. {
  10490. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10491. (__v4sf) __A,
  10492. (__v4sf) __B,
  10493. (__mmask8) __U,
  10494. _MM_FROUND_CUR_DIRECTION);
  10495. }
  10496. extern __inline __m128d
  10497. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10498. _mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
  10499. {
  10500. return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
  10501. (__v2df) __A,
  10502. (__v2df) __B,
  10503. (__mmask8) __U,
  10504. _MM_FROUND_CUR_DIRECTION);
  10505. }
  10506. extern __inline __m128
  10507. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10508. _mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
  10509. {
  10510. return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
  10511. (__v4sf) __A,
  10512. (__v4sf) __B,
  10513. (__mmask8) __U,
  10514. _MM_FROUND_CUR_DIRECTION);
  10515. }
  10516. extern __inline __m128d
  10517. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10518. _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
  10519. {
  10520. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10521. (__v2df) __A,
  10522. (__v2df) __B,
  10523. (__mmask8) __U,
  10524. _MM_FROUND_CUR_DIRECTION);
  10525. }
  10526. extern __inline __m128
  10527. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10528. _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
  10529. {
  10530. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10531. (__v4sf) __A,
  10532. (__v4sf) __B,
  10533. (__mmask8) __U,
  10534. _MM_FROUND_CUR_DIRECTION);
  10535. }
  10536. extern __inline __m128d
  10537. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10538. _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  10539. {
  10540. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10541. (__v2df) __A,
  10542. -(__v2df) __B,
  10543. (__mmask8) __U,
  10544. _MM_FROUND_CUR_DIRECTION);
  10545. }
  10546. extern __inline __m128
  10547. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10548. _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  10549. {
  10550. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10551. (__v4sf) __A,
  10552. -(__v4sf) __B,
  10553. (__mmask8) __U,
  10554. _MM_FROUND_CUR_DIRECTION);
  10555. }
  10556. extern __inline __m128d
  10557. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10558. _mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
  10559. {
  10560. return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
  10561. (__v2df) __A,
  10562. (__v2df) __B,
  10563. (__mmask8) __U,
  10564. _MM_FROUND_CUR_DIRECTION);
  10565. }
  10566. extern __inline __m128
  10567. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10568. _mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
  10569. {
  10570. return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
  10571. (__v4sf) __A,
  10572. (__v4sf) __B,
  10573. (__mmask8) __U,
  10574. _MM_FROUND_CUR_DIRECTION);
  10575. }
  10576. extern __inline __m128d
  10577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10578. _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
  10579. {
  10580. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10581. (__v2df) __A,
  10582. -(__v2df) __B,
  10583. (__mmask8) __U,
  10584. _MM_FROUND_CUR_DIRECTION);
  10585. }
  10586. extern __inline __m128
  10587. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10588. _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
  10589. {
  10590. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10591. (__v4sf) __A,
  10592. -(__v4sf) __B,
  10593. (__mmask8) __U,
  10594. _MM_FROUND_CUR_DIRECTION);
  10595. }
  10596. extern __inline __m128d
  10597. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10598. _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  10599. {
  10600. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10601. -(__v2df) __A,
  10602. (__v2df) __B,
  10603. (__mmask8) __U,
  10604. _MM_FROUND_CUR_DIRECTION);
  10605. }
  10606. extern __inline __m128
  10607. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10608. _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  10609. {
  10610. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10611. -(__v4sf) __A,
  10612. (__v4sf) __B,
  10613. (__mmask8) __U,
  10614. _MM_FROUND_CUR_DIRECTION);
  10615. }
  10616. extern __inline __m128d
  10617. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10618. _mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
  10619. {
  10620. return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
  10621. -(__v2df) __A,
  10622. (__v2df) __B,
  10623. (__mmask8) __U,
  10624. _MM_FROUND_CUR_DIRECTION);
  10625. }
  10626. extern __inline __m128
  10627. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10628. _mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
  10629. {
  10630. return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
  10631. -(__v4sf) __A,
  10632. (__v4sf) __B,
  10633. (__mmask8) __U,
  10634. _MM_FROUND_CUR_DIRECTION);
  10635. }
  10636. extern __inline __m128d
  10637. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10638. _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
  10639. {
  10640. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10641. -(__v2df) __A,
  10642. (__v2df) __B,
  10643. (__mmask8) __U,
  10644. _MM_FROUND_CUR_DIRECTION);
  10645. }
  10646. extern __inline __m128
  10647. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10648. _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
  10649. {
  10650. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10651. -(__v4sf) __A,
  10652. (__v4sf) __B,
  10653. (__mmask8) __U,
  10654. _MM_FROUND_CUR_DIRECTION);
  10655. }
  10656. extern __inline __m128d
  10657. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10658. _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  10659. {
  10660. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10661. -(__v2df) __A,
  10662. -(__v2df) __B,
  10663. (__mmask8) __U,
  10664. _MM_FROUND_CUR_DIRECTION);
  10665. }
  10666. extern __inline __m128
  10667. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10668. _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  10669. {
  10670. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10671. -(__v4sf) __A,
  10672. -(__v4sf) __B,
  10673. (__mmask8) __U,
  10674. _MM_FROUND_CUR_DIRECTION);
  10675. }
  10676. extern __inline __m128d
  10677. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10678. _mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
  10679. {
  10680. return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
  10681. -(__v2df) __A,
  10682. (__v2df) __B,
  10683. (__mmask8) __U,
  10684. _MM_FROUND_CUR_DIRECTION);
  10685. }
  10686. extern __inline __m128
  10687. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10688. _mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
  10689. {
  10690. return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
  10691. -(__v4sf) __A,
  10692. (__v4sf) __B,
  10693. (__mmask8) __U,
  10694. _MM_FROUND_CUR_DIRECTION);
  10695. }
  10696. extern __inline __m128d
  10697. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10698. _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
  10699. {
  10700. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10701. -(__v2df) __A,
  10702. -(__v2df) __B,
  10703. (__mmask8) __U,
  10704. _MM_FROUND_CUR_DIRECTION);
  10705. }
  10706. extern __inline __m128
  10707. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10708. _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
  10709. {
  10710. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10711. -(__v4sf) __A,
  10712. -(__v4sf) __B,
  10713. (__mmask8) __U,
  10714. _MM_FROUND_CUR_DIRECTION);
  10715. }
  10716. #ifdef __OPTIMIZE__
  10717. extern __inline __m128d
  10718. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10719. _mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  10720. const int __R)
  10721. {
  10722. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10723. (__v2df) __A,
  10724. (__v2df) __B,
  10725. (__mmask8) __U, __R);
  10726. }
  10727. extern __inline __m128
  10728. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10729. _mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  10730. const int __R)
  10731. {
  10732. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10733. (__v4sf) __A,
  10734. (__v4sf) __B,
  10735. (__mmask8) __U, __R);
  10736. }
  10737. extern __inline __m128d
  10738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10739. _mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
  10740. const int __R)
  10741. {
  10742. return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
  10743. (__v2df) __A,
  10744. (__v2df) __B,
  10745. (__mmask8) __U, __R);
  10746. }
  10747. extern __inline __m128
  10748. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10749. _mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
  10750. const int __R)
  10751. {
  10752. return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
  10753. (__v4sf) __A,
  10754. (__v4sf) __B,
  10755. (__mmask8) __U, __R);
  10756. }
  10757. extern __inline __m128d
  10758. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10759. _mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
  10760. const int __R)
  10761. {
  10762. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10763. (__v2df) __A,
  10764. (__v2df) __B,
  10765. (__mmask8) __U, __R);
  10766. }
  10767. extern __inline __m128
  10768. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10769. _mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
  10770. const int __R)
  10771. {
  10772. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10773. (__v4sf) __A,
  10774. (__v4sf) __B,
  10775. (__mmask8) __U, __R);
  10776. }
  10777. extern __inline __m128d
  10778. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10779. _mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  10780. const int __R)
  10781. {
  10782. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10783. (__v2df) __A,
  10784. -(__v2df) __B,
  10785. (__mmask8) __U, __R);
  10786. }
  10787. extern __inline __m128
  10788. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10789. _mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  10790. const int __R)
  10791. {
  10792. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10793. (__v4sf) __A,
  10794. -(__v4sf) __B,
  10795. (__mmask8) __U, __R);
  10796. }
  10797. extern __inline __m128d
  10798. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10799. _mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
  10800. const int __R)
  10801. {
  10802. return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
  10803. (__v2df) __A,
  10804. (__v2df) __B,
  10805. (__mmask8) __U, __R);
  10806. }
  10807. extern __inline __m128
  10808. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10809. _mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
  10810. const int __R)
  10811. {
  10812. return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
  10813. (__v4sf) __A,
  10814. (__v4sf) __B,
  10815. (__mmask8) __U, __R);
  10816. }
  10817. extern __inline __m128d
  10818. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10819. _mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
  10820. const int __R)
  10821. {
  10822. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10823. (__v2df) __A,
  10824. -(__v2df) __B,
  10825. (__mmask8) __U, __R);
  10826. }
  10827. extern __inline __m128
  10828. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10829. _mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
  10830. const int __R)
  10831. {
  10832. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10833. (__v4sf) __A,
  10834. -(__v4sf) __B,
  10835. (__mmask8) __U, __R);
  10836. }
  10837. extern __inline __m128d
  10838. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10839. _mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  10840. const int __R)
  10841. {
  10842. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10843. -(__v2df) __A,
  10844. (__v2df) __B,
  10845. (__mmask8) __U, __R);
  10846. }
  10847. extern __inline __m128
  10848. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10849. _mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  10850. const int __R)
  10851. {
  10852. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10853. -(__v4sf) __A,
  10854. (__v4sf) __B,
  10855. (__mmask8) __U, __R);
  10856. }
  10857. extern __inline __m128d
  10858. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10859. _mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
  10860. const int __R)
  10861. {
  10862. return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
  10863. -(__v2df) __A,
  10864. (__v2df) __B,
  10865. (__mmask8) __U, __R);
  10866. }
  10867. extern __inline __m128
  10868. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10869. _mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
  10870. const int __R)
  10871. {
  10872. return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
  10873. -(__v4sf) __A,
  10874. (__v4sf) __B,
  10875. (__mmask8) __U, __R);
  10876. }
  10877. extern __inline __m128d
  10878. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10879. _mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
  10880. const int __R)
  10881. {
  10882. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10883. -(__v2df) __A,
  10884. (__v2df) __B,
  10885. (__mmask8) __U, __R);
  10886. }
  10887. extern __inline __m128
  10888. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10889. _mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
  10890. const int __R)
  10891. {
  10892. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10893. -(__v4sf) __A,
  10894. (__v4sf) __B,
  10895. (__mmask8) __U, __R);
  10896. }
  10897. extern __inline __m128d
  10898. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10899. _mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  10900. const int __R)
  10901. {
  10902. return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
  10903. -(__v2df) __A,
  10904. -(__v2df) __B,
  10905. (__mmask8) __U, __R);
  10906. }
  10907. extern __inline __m128
  10908. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10909. _mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  10910. const int __R)
  10911. {
  10912. return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
  10913. -(__v4sf) __A,
  10914. -(__v4sf) __B,
  10915. (__mmask8) __U, __R);
  10916. }
  10917. extern __inline __m128d
  10918. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10919. _mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
  10920. const int __R)
  10921. {
  10922. return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
  10923. -(__v2df) __A,
  10924. (__v2df) __B,
  10925. (__mmask8) __U, __R);
  10926. }
  10927. extern __inline __m128
  10928. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10929. _mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
  10930. const int __R)
  10931. {
  10932. return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
  10933. -(__v4sf) __A,
  10934. (__v4sf) __B,
  10935. (__mmask8) __U, __R);
  10936. }
  10937. extern __inline __m128d
  10938. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10939. _mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
  10940. const int __R)
  10941. {
  10942. return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
  10943. -(__v2df) __A,
  10944. -(__v2df) __B,
  10945. (__mmask8) __U, __R);
  10946. }
  10947. extern __inline __m128
  10948. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  10949. _mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
  10950. const int __R)
  10951. {
  10952. return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
  10953. -(__v4sf) __A,
  10954. -(__v4sf) __B,
  10955. (__mmask8) __U, __R);
  10956. }
  10957. #else
  10958. #define _mm_mask_fmadd_round_sd(A, U, B, C, R) \
  10959. (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R)
  10960. #define _mm_mask_fmadd_round_ss(A, U, B, C, R) \
  10961. (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R)
  10962. #define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \
  10963. (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
  10964. #define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \
  10965. (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
  10966. #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
  10967. (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
  10968. #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
  10969. (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
  10970. #define _mm_mask_fmsub_round_sd(A, U, B, C, R) \
  10971. (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
  10972. #define _mm_mask_fmsub_round_ss(A, U, B, C, R) \
  10973. (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R)
  10974. #define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \
  10975. (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R)
  10976. #define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \
  10977. (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
  10978. #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
  10979. (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
  10980. #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
  10981. (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
  10982. #define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \
  10983. (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
  10984. #define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \
  10985. (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R)
  10986. #define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \
  10987. (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
  10988. #define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \
  10989. (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
  10990. #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
  10991. (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
  10992. #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
  10993. (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
  10994. #define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \
  10995. (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
  10996. #define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \
  10997. (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R)
  10998. #define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \
  10999. (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R)
  11000. #define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \
  11001. (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
  11002. #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
  11003. (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
  11004. #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
  11005. (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
  11006. #endif
  11007. #ifdef __OPTIMIZE__
  11008. extern __inline int
  11009. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11010. _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
  11011. {
  11012. return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
  11013. }
  11014. extern __inline int
  11015. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11016. _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
  11017. {
  11018. return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
  11019. }
  11020. #else
  11021. #define _mm_comi_round_ss(A, B, C, D)\
  11022. __builtin_ia32_vcomiss(A, B, C, D)
  11023. #define _mm_comi_round_sd(A, B, C, D)\
  11024. __builtin_ia32_vcomisd(A, B, C, D)
  11025. #endif
  11026. extern __inline __m512d
  11027. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11028. _mm512_sqrt_pd (__m512d __A)
  11029. {
  11030. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  11031. (__v8df)
  11032. _mm512_undefined_pd (),
  11033. (__mmask8) -1,
  11034. _MM_FROUND_CUR_DIRECTION);
  11035. }
  11036. extern __inline __m512d
  11037. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11038. _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
  11039. {
  11040. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  11041. (__v8df) __W,
  11042. (__mmask8) __U,
  11043. _MM_FROUND_CUR_DIRECTION);
  11044. }
  11045. extern __inline __m512d
  11046. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11047. _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
  11048. {
  11049. return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
  11050. (__v8df)
  11051. _mm512_setzero_pd (),
  11052. (__mmask8) __U,
  11053. _MM_FROUND_CUR_DIRECTION);
  11054. }
  11055. extern __inline __m512
  11056. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11057. _mm512_sqrt_ps (__m512 __A)
  11058. {
  11059. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  11060. (__v16sf)
  11061. _mm512_undefined_ps (),
  11062. (__mmask16) -1,
  11063. _MM_FROUND_CUR_DIRECTION);
  11064. }
  11065. extern __inline __m512
  11066. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11067. _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
  11068. {
  11069. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  11070. (__v16sf) __W,
  11071. (__mmask16) __U,
  11072. _MM_FROUND_CUR_DIRECTION);
  11073. }
  11074. extern __inline __m512
  11075. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11076. _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
  11077. {
  11078. return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
  11079. (__v16sf)
  11080. _mm512_setzero_ps (),
  11081. (__mmask16) __U,
  11082. _MM_FROUND_CUR_DIRECTION);
  11083. }
  11084. extern __inline __m512d
  11085. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11086. _mm512_add_pd (__m512d __A, __m512d __B)
  11087. {
  11088. return (__m512d) ((__v8df)__A + (__v8df)__B);
  11089. }
  11090. extern __inline __m512d
  11091. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11092. _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  11093. {
  11094. return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
  11095. (__v8df) __B,
  11096. (__v8df) __W,
  11097. (__mmask8) __U,
  11098. _MM_FROUND_CUR_DIRECTION);
  11099. }
  11100. extern __inline __m512d
  11101. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11102. _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
  11103. {
  11104. return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
  11105. (__v8df) __B,
  11106. (__v8df)
  11107. _mm512_setzero_pd (),
  11108. (__mmask8) __U,
  11109. _MM_FROUND_CUR_DIRECTION);
  11110. }
  11111. extern __inline __m512
  11112. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11113. _mm512_add_ps (__m512 __A, __m512 __B)
  11114. {
  11115. return (__m512) ((__v16sf)__A + (__v16sf)__B);
  11116. }
  11117. extern __inline __m512
  11118. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11119. _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11120. {
  11121. return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
  11122. (__v16sf) __B,
  11123. (__v16sf) __W,
  11124. (__mmask16) __U,
  11125. _MM_FROUND_CUR_DIRECTION);
  11126. }
  11127. extern __inline __m512
  11128. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11129. _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11130. {
  11131. return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
  11132. (__v16sf) __B,
  11133. (__v16sf)
  11134. _mm512_setzero_ps (),
  11135. (__mmask16) __U,
  11136. _MM_FROUND_CUR_DIRECTION);
  11137. }
  11138. extern __inline __m128d
  11139. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11140. _mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  11141. {
  11142. return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
  11143. (__v2df) __B,
  11144. (__v2df) __W,
  11145. (__mmask8) __U,
  11146. _MM_FROUND_CUR_DIRECTION);
  11147. }
  11148. extern __inline __m128d
  11149. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11150. _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
  11151. {
  11152. return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
  11153. (__v2df) __B,
  11154. (__v2df)
  11155. _mm_setzero_pd (),
  11156. (__mmask8) __U,
  11157. _MM_FROUND_CUR_DIRECTION);
  11158. }
  11159. extern __inline __m128
  11160. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11161. _mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  11162. {
  11163. return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
  11164. (__v4sf) __B,
  11165. (__v4sf) __W,
  11166. (__mmask8) __U,
  11167. _MM_FROUND_CUR_DIRECTION);
  11168. }
  11169. extern __inline __m128
  11170. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11171. _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
  11172. {
  11173. return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
  11174. (__v4sf) __B,
  11175. (__v4sf)
  11176. _mm_setzero_ps (),
  11177. (__mmask8) __U,
  11178. _MM_FROUND_CUR_DIRECTION);
  11179. }
  11180. extern __inline __m512d
  11181. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11182. _mm512_sub_pd (__m512d __A, __m512d __B)
  11183. {
  11184. return (__m512d) ((__v8df)__A - (__v8df)__B);
  11185. }
  11186. extern __inline __m512d
  11187. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11188. _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  11189. {
  11190. return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
  11191. (__v8df) __B,
  11192. (__v8df) __W,
  11193. (__mmask8) __U,
  11194. _MM_FROUND_CUR_DIRECTION);
  11195. }
  11196. extern __inline __m512d
  11197. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11198. _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
  11199. {
  11200. return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
  11201. (__v8df) __B,
  11202. (__v8df)
  11203. _mm512_setzero_pd (),
  11204. (__mmask8) __U,
  11205. _MM_FROUND_CUR_DIRECTION);
  11206. }
  11207. extern __inline __m512
  11208. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11209. _mm512_sub_ps (__m512 __A, __m512 __B)
  11210. {
  11211. return (__m512) ((__v16sf)__A - (__v16sf)__B);
  11212. }
  11213. extern __inline __m512
  11214. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11215. _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11216. {
  11217. return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
  11218. (__v16sf) __B,
  11219. (__v16sf) __W,
  11220. (__mmask16) __U,
  11221. _MM_FROUND_CUR_DIRECTION);
  11222. }
  11223. extern __inline __m512
  11224. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11225. _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11226. {
  11227. return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
  11228. (__v16sf) __B,
  11229. (__v16sf)
  11230. _mm512_setzero_ps (),
  11231. (__mmask16) __U,
  11232. _MM_FROUND_CUR_DIRECTION);
  11233. }
  11234. extern __inline __m128d
  11235. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11236. _mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  11237. {
  11238. return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
  11239. (__v2df) __B,
  11240. (__v2df) __W,
  11241. (__mmask8) __U,
  11242. _MM_FROUND_CUR_DIRECTION);
  11243. }
  11244. extern __inline __m128d
  11245. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11246. _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
  11247. {
  11248. return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
  11249. (__v2df) __B,
  11250. (__v2df)
  11251. _mm_setzero_pd (),
  11252. (__mmask8) __U,
  11253. _MM_FROUND_CUR_DIRECTION);
  11254. }
  11255. extern __inline __m128
  11256. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11257. _mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  11258. {
  11259. return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
  11260. (__v4sf) __B,
  11261. (__v4sf) __W,
  11262. (__mmask8) __U,
  11263. _MM_FROUND_CUR_DIRECTION);
  11264. }
  11265. extern __inline __m128
  11266. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11267. _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
  11268. {
  11269. return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
  11270. (__v4sf) __B,
  11271. (__v4sf)
  11272. _mm_setzero_ps (),
  11273. (__mmask8) __U,
  11274. _MM_FROUND_CUR_DIRECTION);
  11275. }
  11276. extern __inline __m512d
  11277. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11278. _mm512_mul_pd (__m512d __A, __m512d __B)
  11279. {
  11280. return (__m512d) ((__v8df)__A * (__v8df)__B);
  11281. }
  11282. extern __inline __m512d
  11283. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11284. _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  11285. {
  11286. return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
  11287. (__v8df) __B,
  11288. (__v8df) __W,
  11289. (__mmask8) __U,
  11290. _MM_FROUND_CUR_DIRECTION);
  11291. }
  11292. extern __inline __m512d
  11293. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11294. _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
  11295. {
  11296. return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
  11297. (__v8df) __B,
  11298. (__v8df)
  11299. _mm512_setzero_pd (),
  11300. (__mmask8) __U,
  11301. _MM_FROUND_CUR_DIRECTION);
  11302. }
  11303. extern __inline __m512
  11304. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11305. _mm512_mul_ps (__m512 __A, __m512 __B)
  11306. {
  11307. return (__m512) ((__v16sf)__A * (__v16sf)__B);
  11308. }
  11309. extern __inline __m512
  11310. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11311. _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11312. {
  11313. return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
  11314. (__v16sf) __B,
  11315. (__v16sf) __W,
  11316. (__mmask16) __U,
  11317. _MM_FROUND_CUR_DIRECTION);
  11318. }
  11319. extern __inline __m512
  11320. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11321. _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11322. {
  11323. return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
  11324. (__v16sf) __B,
  11325. (__v16sf)
  11326. _mm512_setzero_ps (),
  11327. (__mmask16) __U,
  11328. _MM_FROUND_CUR_DIRECTION);
  11329. }
  11330. extern __inline __m128d
  11331. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11332. _mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
  11333. __m128d __B)
  11334. {
  11335. return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
  11336. (__v2df) __B,
  11337. (__v2df) __W,
  11338. (__mmask8) __U,
  11339. _MM_FROUND_CUR_DIRECTION);
  11340. }
  11341. extern __inline __m128d
  11342. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11343. _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
  11344. {
  11345. return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
  11346. (__v2df) __B,
  11347. (__v2df)
  11348. _mm_setzero_pd (),
  11349. (__mmask8) __U,
  11350. _MM_FROUND_CUR_DIRECTION);
  11351. }
  11352. extern __inline __m128
  11353. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11354. _mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
  11355. __m128 __B)
  11356. {
  11357. return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
  11358. (__v4sf) __B,
  11359. (__v4sf) __W,
  11360. (__mmask8) __U,
  11361. _MM_FROUND_CUR_DIRECTION);
  11362. }
  11363. extern __inline __m128
  11364. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11365. _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
  11366. {
  11367. return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
  11368. (__v4sf) __B,
  11369. (__v4sf)
  11370. _mm_setzero_ps (),
  11371. (__mmask8) __U,
  11372. _MM_FROUND_CUR_DIRECTION);
  11373. }
  11374. extern __inline __m512d
  11375. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11376. _mm512_div_pd (__m512d __M, __m512d __V)
  11377. {
  11378. return (__m512d) ((__v8df)__M / (__v8df)__V);
  11379. }
  11380. extern __inline __m512d
  11381. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11382. _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
  11383. {
  11384. return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
  11385. (__v8df) __V,
  11386. (__v8df) __W,
  11387. (__mmask8) __U,
  11388. _MM_FROUND_CUR_DIRECTION);
  11389. }
  11390. extern __inline __m512d
  11391. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11392. _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
  11393. {
  11394. return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
  11395. (__v8df) __V,
  11396. (__v8df)
  11397. _mm512_setzero_pd (),
  11398. (__mmask8) __U,
  11399. _MM_FROUND_CUR_DIRECTION);
  11400. }
  11401. extern __inline __m512
  11402. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11403. _mm512_div_ps (__m512 __A, __m512 __B)
  11404. {
  11405. return (__m512) ((__v16sf)__A / (__v16sf)__B);
  11406. }
  11407. extern __inline __m512
  11408. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11409. _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11410. {
  11411. return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
  11412. (__v16sf) __B,
  11413. (__v16sf) __W,
  11414. (__mmask16) __U,
  11415. _MM_FROUND_CUR_DIRECTION);
  11416. }
  11417. extern __inline __m512
  11418. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11419. _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11420. {
  11421. return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
  11422. (__v16sf) __B,
  11423. (__v16sf)
  11424. _mm512_setzero_ps (),
  11425. (__mmask16) __U,
  11426. _MM_FROUND_CUR_DIRECTION);
  11427. }
  11428. extern __inline __m128d
  11429. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11430. _mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
  11431. __m128d __B)
  11432. {
  11433. return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
  11434. (__v2df) __B,
  11435. (__v2df) __W,
  11436. (__mmask8) __U,
  11437. _MM_FROUND_CUR_DIRECTION);
  11438. }
  11439. extern __inline __m128d
  11440. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11441. _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
  11442. {
  11443. return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
  11444. (__v2df) __B,
  11445. (__v2df)
  11446. _mm_setzero_pd (),
  11447. (__mmask8) __U,
  11448. _MM_FROUND_CUR_DIRECTION);
  11449. }
  11450. extern __inline __m128
  11451. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11452. _mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
  11453. __m128 __B)
  11454. {
  11455. return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
  11456. (__v4sf) __B,
  11457. (__v4sf) __W,
  11458. (__mmask8) __U,
  11459. _MM_FROUND_CUR_DIRECTION);
  11460. }
  11461. extern __inline __m128
  11462. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11463. _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
  11464. {
  11465. return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
  11466. (__v4sf) __B,
  11467. (__v4sf)
  11468. _mm_setzero_ps (),
  11469. (__mmask8) __U,
  11470. _MM_FROUND_CUR_DIRECTION);
  11471. }
  11472. extern __inline __m512d
  11473. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11474. _mm512_max_pd (__m512d __A, __m512d __B)
  11475. {
  11476. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  11477. (__v8df) __B,
  11478. (__v8df)
  11479. _mm512_undefined_pd (),
  11480. (__mmask8) -1,
  11481. _MM_FROUND_CUR_DIRECTION);
  11482. }
  11483. extern __inline __m512d
  11484. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11485. _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  11486. {
  11487. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  11488. (__v8df) __B,
  11489. (__v8df) __W,
  11490. (__mmask8) __U,
  11491. _MM_FROUND_CUR_DIRECTION);
  11492. }
  11493. extern __inline __m512d
  11494. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11495. _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
  11496. {
  11497. return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
  11498. (__v8df) __B,
  11499. (__v8df)
  11500. _mm512_setzero_pd (),
  11501. (__mmask8) __U,
  11502. _MM_FROUND_CUR_DIRECTION);
  11503. }
  11504. extern __inline __m512
  11505. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11506. _mm512_max_ps (__m512 __A, __m512 __B)
  11507. {
  11508. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  11509. (__v16sf) __B,
  11510. (__v16sf)
  11511. _mm512_undefined_ps (),
  11512. (__mmask16) -1,
  11513. _MM_FROUND_CUR_DIRECTION);
  11514. }
  11515. extern __inline __m512
  11516. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11517. _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11518. {
  11519. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  11520. (__v16sf) __B,
  11521. (__v16sf) __W,
  11522. (__mmask16) __U,
  11523. _MM_FROUND_CUR_DIRECTION);
  11524. }
  11525. extern __inline __m512
  11526. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11527. _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11528. {
  11529. return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
  11530. (__v16sf) __B,
  11531. (__v16sf)
  11532. _mm512_setzero_ps (),
  11533. (__mmask16) __U,
  11534. _MM_FROUND_CUR_DIRECTION);
  11535. }
  11536. extern __inline __m128d
  11537. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11538. _mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  11539. {
  11540. return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
  11541. (__v2df) __B,
  11542. (__v2df) __W,
  11543. (__mmask8) __U,
  11544. _MM_FROUND_CUR_DIRECTION);
  11545. }
  11546. extern __inline __m128d
  11547. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11548. _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
  11549. {
  11550. return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
  11551. (__v2df) __B,
  11552. (__v2df)
  11553. _mm_setzero_pd (),
  11554. (__mmask8) __U,
  11555. _MM_FROUND_CUR_DIRECTION);
  11556. }
  11557. extern __inline __m128
  11558. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11559. _mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  11560. {
  11561. return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
  11562. (__v4sf) __B,
  11563. (__v4sf) __W,
  11564. (__mmask8) __U,
  11565. _MM_FROUND_CUR_DIRECTION);
  11566. }
  11567. extern __inline __m128
  11568. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11569. _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
  11570. {
  11571. return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
  11572. (__v4sf) __B,
  11573. (__v4sf)
  11574. _mm_setzero_ps (),
  11575. (__mmask8) __U,
  11576. _MM_FROUND_CUR_DIRECTION);
  11577. }
  11578. extern __inline __m512d
  11579. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11580. _mm512_min_pd (__m512d __A, __m512d __B)
  11581. {
  11582. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  11583. (__v8df) __B,
  11584. (__v8df)
  11585. _mm512_undefined_pd (),
  11586. (__mmask8) -1,
  11587. _MM_FROUND_CUR_DIRECTION);
  11588. }
  11589. extern __inline __m512d
  11590. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11591. _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  11592. {
  11593. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  11594. (__v8df) __B,
  11595. (__v8df) __W,
  11596. (__mmask8) __U,
  11597. _MM_FROUND_CUR_DIRECTION);
  11598. }
  11599. extern __inline __m512d
  11600. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11601. _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
  11602. {
  11603. return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
  11604. (__v8df) __B,
  11605. (__v8df)
  11606. _mm512_setzero_pd (),
  11607. (__mmask8) __U,
  11608. _MM_FROUND_CUR_DIRECTION);
  11609. }
  11610. extern __inline __m512
  11611. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11612. _mm512_min_ps (__m512 __A, __m512 __B)
  11613. {
  11614. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  11615. (__v16sf) __B,
  11616. (__v16sf)
  11617. _mm512_undefined_ps (),
  11618. (__mmask16) -1,
  11619. _MM_FROUND_CUR_DIRECTION);
  11620. }
  11621. extern __inline __m512
  11622. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11623. _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11624. {
  11625. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  11626. (__v16sf) __B,
  11627. (__v16sf) __W,
  11628. (__mmask16) __U,
  11629. _MM_FROUND_CUR_DIRECTION);
  11630. }
  11631. extern __inline __m512
  11632. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11633. _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11634. {
  11635. return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
  11636. (__v16sf) __B,
  11637. (__v16sf)
  11638. _mm512_setzero_ps (),
  11639. (__mmask16) __U,
  11640. _MM_FROUND_CUR_DIRECTION);
  11641. }
  11642. extern __inline __m128d
  11643. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11644. _mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  11645. {
  11646. return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
  11647. (__v2df) __B,
  11648. (__v2df) __W,
  11649. (__mmask8) __U,
  11650. _MM_FROUND_CUR_DIRECTION);
  11651. }
  11652. extern __inline __m128d
  11653. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11654. _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
  11655. {
  11656. return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
  11657. (__v2df) __B,
  11658. (__v2df)
  11659. _mm_setzero_pd (),
  11660. (__mmask8) __U,
  11661. _MM_FROUND_CUR_DIRECTION);
  11662. }
  11663. extern __inline __m128
  11664. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11665. _mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  11666. {
  11667. return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
  11668. (__v4sf) __B,
  11669. (__v4sf) __W,
  11670. (__mmask8) __U,
  11671. _MM_FROUND_CUR_DIRECTION);
  11672. }
  11673. extern __inline __m128
  11674. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11675. _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
  11676. {
  11677. return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
  11678. (__v4sf) __B,
  11679. (__v4sf)
  11680. _mm_setzero_ps (),
  11681. (__mmask8) __U,
  11682. _MM_FROUND_CUR_DIRECTION);
  11683. }
  11684. extern __inline __m512d
  11685. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11686. _mm512_scalef_pd (__m512d __A, __m512d __B)
  11687. {
  11688. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  11689. (__v8df) __B,
  11690. (__v8df)
  11691. _mm512_undefined_pd (),
  11692. (__mmask8) -1,
  11693. _MM_FROUND_CUR_DIRECTION);
  11694. }
  11695. extern __inline __m512d
  11696. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11697. _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
  11698. {
  11699. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  11700. (__v8df) __B,
  11701. (__v8df) __W,
  11702. (__mmask8) __U,
  11703. _MM_FROUND_CUR_DIRECTION);
  11704. }
  11705. extern __inline __m512d
  11706. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11707. _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
  11708. {
  11709. return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
  11710. (__v8df) __B,
  11711. (__v8df)
  11712. _mm512_setzero_pd (),
  11713. (__mmask8) __U,
  11714. _MM_FROUND_CUR_DIRECTION);
  11715. }
  11716. extern __inline __m512
  11717. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11718. _mm512_scalef_ps (__m512 __A, __m512 __B)
  11719. {
  11720. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  11721. (__v16sf) __B,
  11722. (__v16sf)
  11723. _mm512_undefined_ps (),
  11724. (__mmask16) -1,
  11725. _MM_FROUND_CUR_DIRECTION);
  11726. }
  11727. extern __inline __m512
  11728. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11729. _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
  11730. {
  11731. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  11732. (__v16sf) __B,
  11733. (__v16sf) __W,
  11734. (__mmask16) __U,
  11735. _MM_FROUND_CUR_DIRECTION);
  11736. }
  11737. extern __inline __m512
  11738. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11739. _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
  11740. {
  11741. return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
  11742. (__v16sf) __B,
  11743. (__v16sf)
  11744. _mm512_setzero_ps (),
  11745. (__mmask16) __U,
  11746. _MM_FROUND_CUR_DIRECTION);
  11747. }
  11748. extern __inline __m128d
  11749. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11750. _mm_scalef_sd (__m128d __A, __m128d __B)
  11751. {
  11752. return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
  11753. (__v2df) __B,
  11754. (__v2df)
  11755. _mm_setzero_pd (),
  11756. (__mmask8) -1,
  11757. _MM_FROUND_CUR_DIRECTION);
  11758. }
  11759. extern __inline __m128
  11760. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11761. _mm_scalef_ss (__m128 __A, __m128 __B)
  11762. {
  11763. return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
  11764. (__v4sf) __B,
  11765. (__v4sf)
  11766. _mm_setzero_ps (),
  11767. (__mmask8) -1,
  11768. _MM_FROUND_CUR_DIRECTION);
  11769. }
  11770. extern __inline __m512d
  11771. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11772. _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
  11773. {
  11774. return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
  11775. (__v8df) __B,
  11776. (__v8df) __C,
  11777. (__mmask8) -1,
  11778. _MM_FROUND_CUR_DIRECTION);
  11779. }
  11780. extern __inline __m512d
  11781. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11782. _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  11783. {
  11784. return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
  11785. (__v8df) __B,
  11786. (__v8df) __C,
  11787. (__mmask8) __U,
  11788. _MM_FROUND_CUR_DIRECTION);
  11789. }
  11790. extern __inline __m512d
  11791. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11792. _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  11793. {
  11794. return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
  11795. (__v8df) __B,
  11796. (__v8df) __C,
  11797. (__mmask8) __U,
  11798. _MM_FROUND_CUR_DIRECTION);
  11799. }
  11800. extern __inline __m512d
  11801. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11802. _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  11803. {
  11804. return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
  11805. (__v8df) __B,
  11806. (__v8df) __C,
  11807. (__mmask8) __U,
  11808. _MM_FROUND_CUR_DIRECTION);
  11809. }
  11810. extern __inline __m512
  11811. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11812. _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
  11813. {
  11814. return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
  11815. (__v16sf) __B,
  11816. (__v16sf) __C,
  11817. (__mmask16) -1,
  11818. _MM_FROUND_CUR_DIRECTION);
  11819. }
  11820. extern __inline __m512
  11821. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11822. _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  11823. {
  11824. return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
  11825. (__v16sf) __B,
  11826. (__v16sf) __C,
  11827. (__mmask16) __U,
  11828. _MM_FROUND_CUR_DIRECTION);
  11829. }
  11830. extern __inline __m512
  11831. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11832. _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  11833. {
  11834. return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
  11835. (__v16sf) __B,
  11836. (__v16sf) __C,
  11837. (__mmask16) __U,
  11838. _MM_FROUND_CUR_DIRECTION);
  11839. }
  11840. extern __inline __m512
  11841. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11842. _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  11843. {
  11844. return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
  11845. (__v16sf) __B,
  11846. (__v16sf) __C,
  11847. (__mmask16) __U,
  11848. _MM_FROUND_CUR_DIRECTION);
  11849. }
  11850. extern __inline __m512d
  11851. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11852. _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
  11853. {
  11854. return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
  11855. (__v8df) __B,
  11856. (__v8df) __C,
  11857. (__mmask8) -1,
  11858. _MM_FROUND_CUR_DIRECTION);
  11859. }
  11860. extern __inline __m512d
  11861. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11862. _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  11863. {
  11864. return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
  11865. (__v8df) __B,
  11866. (__v8df) __C,
  11867. (__mmask8) __U,
  11868. _MM_FROUND_CUR_DIRECTION);
  11869. }
  11870. extern __inline __m512d
  11871. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11872. _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  11873. {
  11874. return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
  11875. (__v8df) __B,
  11876. (__v8df) __C,
  11877. (__mmask8) __U,
  11878. _MM_FROUND_CUR_DIRECTION);
  11879. }
  11880. extern __inline __m512d
  11881. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11882. _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  11883. {
  11884. return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
  11885. (__v8df) __B,
  11886. (__v8df) __C,
  11887. (__mmask8) __U,
  11888. _MM_FROUND_CUR_DIRECTION);
  11889. }
  11890. extern __inline __m512
  11891. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11892. _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
  11893. {
  11894. return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
  11895. (__v16sf) __B,
  11896. (__v16sf) __C,
  11897. (__mmask16) -1,
  11898. _MM_FROUND_CUR_DIRECTION);
  11899. }
  11900. extern __inline __m512
  11901. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11902. _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  11903. {
  11904. return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
  11905. (__v16sf) __B,
  11906. (__v16sf) __C,
  11907. (__mmask16) __U,
  11908. _MM_FROUND_CUR_DIRECTION);
  11909. }
  11910. extern __inline __m512
  11911. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11912. _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  11913. {
  11914. return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
  11915. (__v16sf) __B,
  11916. (__v16sf) __C,
  11917. (__mmask16) __U,
  11918. _MM_FROUND_CUR_DIRECTION);
  11919. }
  11920. extern __inline __m512
  11921. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11922. _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  11923. {
  11924. return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
  11925. (__v16sf) __B,
  11926. (__v16sf) __C,
  11927. (__mmask16) __U,
  11928. _MM_FROUND_CUR_DIRECTION);
  11929. }
  11930. extern __inline __m512d
  11931. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11932. _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
  11933. {
  11934. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  11935. (__v8df) __B,
  11936. (__v8df) __C,
  11937. (__mmask8) -1,
  11938. _MM_FROUND_CUR_DIRECTION);
  11939. }
  11940. extern __inline __m512d
  11941. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11942. _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  11943. {
  11944. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  11945. (__v8df) __B,
  11946. (__v8df) __C,
  11947. (__mmask8) __U,
  11948. _MM_FROUND_CUR_DIRECTION);
  11949. }
  11950. extern __inline __m512d
  11951. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11952. _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  11953. {
  11954. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
  11955. (__v8df) __B,
  11956. (__v8df) __C,
  11957. (__mmask8) __U,
  11958. _MM_FROUND_CUR_DIRECTION);
  11959. }
  11960. extern __inline __m512d
  11961. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11962. _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  11963. {
  11964. return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
  11965. (__v8df) __B,
  11966. (__v8df) __C,
  11967. (__mmask8) __U,
  11968. _MM_FROUND_CUR_DIRECTION);
  11969. }
  11970. extern __inline __m512
  11971. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11972. _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
  11973. {
  11974. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  11975. (__v16sf) __B,
  11976. (__v16sf) __C,
  11977. (__mmask16) -1,
  11978. _MM_FROUND_CUR_DIRECTION);
  11979. }
  11980. extern __inline __m512
  11981. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11982. _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  11983. {
  11984. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  11985. (__v16sf) __B,
  11986. (__v16sf) __C,
  11987. (__mmask16) __U,
  11988. _MM_FROUND_CUR_DIRECTION);
  11989. }
  11990. extern __inline __m512
  11991. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  11992. _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  11993. {
  11994. return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
  11995. (__v16sf) __B,
  11996. (__v16sf) __C,
  11997. (__mmask16) __U,
  11998. _MM_FROUND_CUR_DIRECTION);
  11999. }
  12000. extern __inline __m512
  12001. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12002. _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  12003. {
  12004. return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
  12005. (__v16sf) __B,
  12006. (__v16sf) __C,
  12007. (__mmask16) __U,
  12008. _MM_FROUND_CUR_DIRECTION);
  12009. }
  12010. extern __inline __m512d
  12011. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12012. _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
  12013. {
  12014. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  12015. (__v8df) __B,
  12016. -(__v8df) __C,
  12017. (__mmask8) -1,
  12018. _MM_FROUND_CUR_DIRECTION);
  12019. }
  12020. extern __inline __m512d
  12021. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12022. _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  12023. {
  12024. return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
  12025. (__v8df) __B,
  12026. -(__v8df) __C,
  12027. (__mmask8) __U,
  12028. _MM_FROUND_CUR_DIRECTION);
  12029. }
  12030. extern __inline __m512d
  12031. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12032. _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  12033. {
  12034. return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
  12035. (__v8df) __B,
  12036. (__v8df) __C,
  12037. (__mmask8) __U,
  12038. _MM_FROUND_CUR_DIRECTION);
  12039. }
  12040. extern __inline __m512d
  12041. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12042. _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  12043. {
  12044. return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
  12045. (__v8df) __B,
  12046. -(__v8df) __C,
  12047. (__mmask8) __U,
  12048. _MM_FROUND_CUR_DIRECTION);
  12049. }
  12050. extern __inline __m512
  12051. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12052. _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
  12053. {
  12054. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  12055. (__v16sf) __B,
  12056. -(__v16sf) __C,
  12057. (__mmask16) -1,
  12058. _MM_FROUND_CUR_DIRECTION);
  12059. }
  12060. extern __inline __m512
  12061. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12062. _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  12063. {
  12064. return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
  12065. (__v16sf) __B,
  12066. -(__v16sf) __C,
  12067. (__mmask16) __U,
  12068. _MM_FROUND_CUR_DIRECTION);
  12069. }
  12070. extern __inline __m512
  12071. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12072. _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  12073. {
  12074. return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
  12075. (__v16sf) __B,
  12076. (__v16sf) __C,
  12077. (__mmask16) __U,
  12078. _MM_FROUND_CUR_DIRECTION);
  12079. }
  12080. extern __inline __m512
  12081. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12082. _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  12083. {
  12084. return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
  12085. (__v16sf) __B,
  12086. -(__v16sf) __C,
  12087. (__mmask16) __U,
  12088. _MM_FROUND_CUR_DIRECTION);
  12089. }
  12090. extern __inline __m512d
  12091. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12092. _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
  12093. {
  12094. return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
  12095. (__v8df) __B,
  12096. (__v8df) __C,
  12097. (__mmask8) -1,
  12098. _MM_FROUND_CUR_DIRECTION);
  12099. }
  12100. extern __inline __m512d
  12101. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12102. _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  12103. {
  12104. return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
  12105. (__v8df) __B,
  12106. (__v8df) __C,
  12107. (__mmask8) __U,
  12108. _MM_FROUND_CUR_DIRECTION);
  12109. }
  12110. extern __inline __m512d
  12111. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12112. _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  12113. {
  12114. return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
  12115. (__v8df) __B,
  12116. (__v8df) __C,
  12117. (__mmask8) __U,
  12118. _MM_FROUND_CUR_DIRECTION);
  12119. }
  12120. extern __inline __m512d
  12121. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12122. _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  12123. {
  12124. return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
  12125. (__v8df) __B,
  12126. (__v8df) __C,
  12127. (__mmask8) __U,
  12128. _MM_FROUND_CUR_DIRECTION);
  12129. }
  12130. extern __inline __m512
  12131. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12132. _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
  12133. {
  12134. return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
  12135. (__v16sf) __B,
  12136. (__v16sf) __C,
  12137. (__mmask16) -1,
  12138. _MM_FROUND_CUR_DIRECTION);
  12139. }
  12140. extern __inline __m512
  12141. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12142. _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  12143. {
  12144. return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
  12145. (__v16sf) __B,
  12146. (__v16sf) __C,
  12147. (__mmask16) __U,
  12148. _MM_FROUND_CUR_DIRECTION);
  12149. }
  12150. extern __inline __m512
  12151. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12152. _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  12153. {
  12154. return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
  12155. (__v16sf) __B,
  12156. (__v16sf) __C,
  12157. (__mmask16) __U,
  12158. _MM_FROUND_CUR_DIRECTION);
  12159. }
  12160. extern __inline __m512
  12161. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12162. _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  12163. {
  12164. return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
  12165. (__v16sf) __B,
  12166. (__v16sf) __C,
  12167. (__mmask16) __U,
  12168. _MM_FROUND_CUR_DIRECTION);
  12169. }
  12170. extern __inline __m512d
  12171. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12172. _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
  12173. {
  12174. return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
  12175. (__v8df) __B,
  12176. (__v8df) __C,
  12177. (__mmask8) -1,
  12178. _MM_FROUND_CUR_DIRECTION);
  12179. }
  12180. extern __inline __m512d
  12181. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12182. _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
  12183. {
  12184. return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
  12185. (__v8df) __B,
  12186. (__v8df) __C,
  12187. (__mmask8) __U,
  12188. _MM_FROUND_CUR_DIRECTION);
  12189. }
  12190. extern __inline __m512d
  12191. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12192. _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
  12193. {
  12194. return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
  12195. (__v8df) __B,
  12196. (__v8df) __C,
  12197. (__mmask8) __U,
  12198. _MM_FROUND_CUR_DIRECTION);
  12199. }
  12200. extern __inline __m512d
  12201. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12202. _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
  12203. {
  12204. return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
  12205. (__v8df) __B,
  12206. (__v8df) __C,
  12207. (__mmask8) __U,
  12208. _MM_FROUND_CUR_DIRECTION);
  12209. }
  12210. extern __inline __m512
  12211. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12212. _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
  12213. {
  12214. return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
  12215. (__v16sf) __B,
  12216. (__v16sf) __C,
  12217. (__mmask16) -1,
  12218. _MM_FROUND_CUR_DIRECTION);
  12219. }
  12220. extern __inline __m512
  12221. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12222. _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
  12223. {
  12224. return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
  12225. (__v16sf) __B,
  12226. (__v16sf) __C,
  12227. (__mmask16) __U,
  12228. _MM_FROUND_CUR_DIRECTION);
  12229. }
  12230. extern __inline __m512
  12231. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12232. _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
  12233. {
  12234. return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
  12235. (__v16sf) __B,
  12236. (__v16sf) __C,
  12237. (__mmask16) __U,
  12238. _MM_FROUND_CUR_DIRECTION);
  12239. }
  12240. extern __inline __m512
  12241. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12242. _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
  12243. {
  12244. return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
  12245. (__v16sf) __B,
  12246. (__v16sf) __C,
  12247. (__mmask16) __U,
  12248. _MM_FROUND_CUR_DIRECTION);
  12249. }
  12250. extern __inline __m256i
  12251. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12252. _mm512_cvttpd_epi32 (__m512d __A)
  12253. {
  12254. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  12255. (__v8si)
  12256. _mm256_undefined_si256 (),
  12257. (__mmask8) -1,
  12258. _MM_FROUND_CUR_DIRECTION);
  12259. }
  12260. extern __inline __m256i
  12261. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12262. _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
  12263. {
  12264. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  12265. (__v8si) __W,
  12266. (__mmask8) __U,
  12267. _MM_FROUND_CUR_DIRECTION);
  12268. }
  12269. extern __inline __m256i
  12270. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12271. _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
  12272. {
  12273. return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
  12274. (__v8si)
  12275. _mm256_setzero_si256 (),
  12276. (__mmask8) __U,
  12277. _MM_FROUND_CUR_DIRECTION);
  12278. }
  12279. extern __inline __m256i
  12280. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12281. _mm512_cvttpd_epu32 (__m512d __A)
  12282. {
  12283. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  12284. (__v8si)
  12285. _mm256_undefined_si256 (),
  12286. (__mmask8) -1,
  12287. _MM_FROUND_CUR_DIRECTION);
  12288. }
  12289. extern __inline __m256i
  12290. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12291. _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
  12292. {
  12293. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  12294. (__v8si) __W,
  12295. (__mmask8) __U,
  12296. _MM_FROUND_CUR_DIRECTION);
  12297. }
  12298. extern __inline __m256i
  12299. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12300. _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
  12301. {
  12302. return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
  12303. (__v8si)
  12304. _mm256_setzero_si256 (),
  12305. (__mmask8) __U,
  12306. _MM_FROUND_CUR_DIRECTION);
  12307. }
  12308. extern __inline __m256i
  12309. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12310. _mm512_cvtpd_epi32 (__m512d __A)
  12311. {
  12312. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  12313. (__v8si)
  12314. _mm256_undefined_si256 (),
  12315. (__mmask8) -1,
  12316. _MM_FROUND_CUR_DIRECTION);
  12317. }
  12318. extern __inline __m256i
  12319. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12320. _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
  12321. {
  12322. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  12323. (__v8si) __W,
  12324. (__mmask8) __U,
  12325. _MM_FROUND_CUR_DIRECTION);
  12326. }
  12327. extern __inline __m256i
  12328. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12329. _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
  12330. {
  12331. return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
  12332. (__v8si)
  12333. _mm256_setzero_si256 (),
  12334. (__mmask8) __U,
  12335. _MM_FROUND_CUR_DIRECTION);
  12336. }
  12337. extern __inline __m256i
  12338. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12339. _mm512_cvtpd_epu32 (__m512d __A)
  12340. {
  12341. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  12342. (__v8si)
  12343. _mm256_undefined_si256 (),
  12344. (__mmask8) -1,
  12345. _MM_FROUND_CUR_DIRECTION);
  12346. }
  12347. extern __inline __m256i
  12348. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12349. _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
  12350. {
  12351. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  12352. (__v8si) __W,
  12353. (__mmask8) __U,
  12354. _MM_FROUND_CUR_DIRECTION);
  12355. }
  12356. extern __inline __m256i
  12357. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12358. _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
  12359. {
  12360. return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
  12361. (__v8si)
  12362. _mm256_setzero_si256 (),
  12363. (__mmask8) __U,
  12364. _MM_FROUND_CUR_DIRECTION);
  12365. }
  12366. extern __inline __m512i
  12367. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12368. _mm512_cvttps_epi32 (__m512 __A)
  12369. {
  12370. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  12371. (__v16si)
  12372. _mm512_undefined_epi32 (),
  12373. (__mmask16) -1,
  12374. _MM_FROUND_CUR_DIRECTION);
  12375. }
  12376. extern __inline __m512i
  12377. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12378. _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
  12379. {
  12380. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  12381. (__v16si) __W,
  12382. (__mmask16) __U,
  12383. _MM_FROUND_CUR_DIRECTION);
  12384. }
  12385. extern __inline __m512i
  12386. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12387. _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
  12388. {
  12389. return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
  12390. (__v16si)
  12391. _mm512_setzero_si512 (),
  12392. (__mmask16) __U,
  12393. _MM_FROUND_CUR_DIRECTION);
  12394. }
  12395. extern __inline __m512i
  12396. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12397. _mm512_cvttps_epu32 (__m512 __A)
  12398. {
  12399. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  12400. (__v16si)
  12401. _mm512_undefined_epi32 (),
  12402. (__mmask16) -1,
  12403. _MM_FROUND_CUR_DIRECTION);
  12404. }
  12405. extern __inline __m512i
  12406. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12407. _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
  12408. {
  12409. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  12410. (__v16si) __W,
  12411. (__mmask16) __U,
  12412. _MM_FROUND_CUR_DIRECTION);
  12413. }
  12414. extern __inline __m512i
  12415. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12416. _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
  12417. {
  12418. return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
  12419. (__v16si)
  12420. _mm512_setzero_si512 (),
  12421. (__mmask16) __U,
  12422. _MM_FROUND_CUR_DIRECTION);
  12423. }
  12424. extern __inline __m512i
  12425. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12426. _mm512_cvtps_epi32 (__m512 __A)
  12427. {
  12428. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  12429. (__v16si)
  12430. _mm512_undefined_epi32 (),
  12431. (__mmask16) -1,
  12432. _MM_FROUND_CUR_DIRECTION);
  12433. }
  12434. extern __inline __m512i
  12435. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12436. _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
  12437. {
  12438. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  12439. (__v16si) __W,
  12440. (__mmask16) __U,
  12441. _MM_FROUND_CUR_DIRECTION);
  12442. }
  12443. extern __inline __m512i
  12444. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12445. _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
  12446. {
  12447. return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
  12448. (__v16si)
  12449. _mm512_setzero_si512 (),
  12450. (__mmask16) __U,
  12451. _MM_FROUND_CUR_DIRECTION);
  12452. }
  12453. extern __inline __m512i
  12454. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12455. _mm512_cvtps_epu32 (__m512 __A)
  12456. {
  12457. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  12458. (__v16si)
  12459. _mm512_undefined_epi32 (),
  12460. (__mmask16) -1,
  12461. _MM_FROUND_CUR_DIRECTION);
  12462. }
  12463. extern __inline __m512i
  12464. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12465. _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
  12466. {
  12467. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  12468. (__v16si) __W,
  12469. (__mmask16) __U,
  12470. _MM_FROUND_CUR_DIRECTION);
  12471. }
  12472. extern __inline __m512i
  12473. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12474. _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
  12475. {
  12476. return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
  12477. (__v16si)
  12478. _mm512_setzero_si512 (),
  12479. (__mmask16) __U,
  12480. _MM_FROUND_CUR_DIRECTION);
  12481. }
  12482. extern __inline double
  12483. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12484. _mm512_cvtsd_f64 (__m512d __A)
  12485. {
  12486. return __A[0];
  12487. }
  12488. extern __inline float
  12489. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12490. _mm512_cvtss_f32 (__m512 __A)
  12491. {
  12492. return __A[0];
  12493. }
  12494. #ifdef __x86_64__
  12495. extern __inline __m128
  12496. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12497. _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
  12498. {
  12499. return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
  12500. _MM_FROUND_CUR_DIRECTION);
  12501. }
  12502. extern __inline __m128d
  12503. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12504. _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
  12505. {
  12506. return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
  12507. _MM_FROUND_CUR_DIRECTION);
  12508. }
  12509. #endif
  12510. extern __inline __m128
  12511. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12512. _mm_cvtu32_ss (__m128 __A, unsigned __B)
  12513. {
  12514. return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
  12515. _MM_FROUND_CUR_DIRECTION);
  12516. }
  12517. extern __inline __m512
  12518. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12519. _mm512_cvtepi32_ps (__m512i __A)
  12520. {
  12521. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  12522. (__v16sf)
  12523. _mm512_undefined_ps (),
  12524. (__mmask16) -1,
  12525. _MM_FROUND_CUR_DIRECTION);
  12526. }
  12527. extern __inline __m512
  12528. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12529. _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
  12530. {
  12531. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  12532. (__v16sf) __W,
  12533. (__mmask16) __U,
  12534. _MM_FROUND_CUR_DIRECTION);
  12535. }
  12536. extern __inline __m512
  12537. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12538. _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
  12539. {
  12540. return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
  12541. (__v16sf)
  12542. _mm512_setzero_ps (),
  12543. (__mmask16) __U,
  12544. _MM_FROUND_CUR_DIRECTION);
  12545. }
  12546. extern __inline __m512
  12547. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12548. _mm512_cvtepu32_ps (__m512i __A)
  12549. {
  12550. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  12551. (__v16sf)
  12552. _mm512_undefined_ps (),
  12553. (__mmask16) -1,
  12554. _MM_FROUND_CUR_DIRECTION);
  12555. }
  12556. extern __inline __m512
  12557. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12558. _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
  12559. {
  12560. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  12561. (__v16sf) __W,
  12562. (__mmask16) __U,
  12563. _MM_FROUND_CUR_DIRECTION);
  12564. }
  12565. extern __inline __m512
  12566. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12567. _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
  12568. {
  12569. return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
  12570. (__v16sf)
  12571. _mm512_setzero_ps (),
  12572. (__mmask16) __U,
  12573. _MM_FROUND_CUR_DIRECTION);
  12574. }
  12575. #ifdef __OPTIMIZE__
  12576. extern __inline __m512d
  12577. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12578. _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
  12579. {
  12580. return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
  12581. (__v8df) __B,
  12582. (__v8di) __C,
  12583. __imm,
  12584. (__mmask8) -1,
  12585. _MM_FROUND_CUR_DIRECTION);
  12586. }
  12587. extern __inline __m512d
  12588. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12589. _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
  12590. __m512i __C, const int __imm)
  12591. {
  12592. return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
  12593. (__v8df) __B,
  12594. (__v8di) __C,
  12595. __imm,
  12596. (__mmask8) __U,
  12597. _MM_FROUND_CUR_DIRECTION);
  12598. }
  12599. extern __inline __m512d
  12600. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12601. _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
  12602. __m512i __C, const int __imm)
  12603. {
  12604. return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
  12605. (__v8df) __B,
  12606. (__v8di) __C,
  12607. __imm,
  12608. (__mmask8) __U,
  12609. _MM_FROUND_CUR_DIRECTION);
  12610. }
  12611. extern __inline __m512
  12612. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12613. _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
  12614. {
  12615. return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
  12616. (__v16sf) __B,
  12617. (__v16si) __C,
  12618. __imm,
  12619. (__mmask16) -1,
  12620. _MM_FROUND_CUR_DIRECTION);
  12621. }
  12622. extern __inline __m512
  12623. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12624. _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
  12625. __m512i __C, const int __imm)
  12626. {
  12627. return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
  12628. (__v16sf) __B,
  12629. (__v16si) __C,
  12630. __imm,
  12631. (__mmask16) __U,
  12632. _MM_FROUND_CUR_DIRECTION);
  12633. }
  12634. extern __inline __m512
  12635. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12636. _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
  12637. __m512i __C, const int __imm)
  12638. {
  12639. return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
  12640. (__v16sf) __B,
  12641. (__v16si) __C,
  12642. __imm,
  12643. (__mmask16) __U,
  12644. _MM_FROUND_CUR_DIRECTION);
  12645. }
  12646. extern __inline __m128d
  12647. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12648. _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
  12649. {
  12650. return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
  12651. (__v2df) __B,
  12652. (__v2di) __C, __imm,
  12653. (__mmask8) -1,
  12654. _MM_FROUND_CUR_DIRECTION);
  12655. }
  12656. extern __inline __m128d
  12657. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12658. _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
  12659. __m128i __C, const int __imm)
  12660. {
  12661. return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
  12662. (__v2df) __B,
  12663. (__v2di) __C, __imm,
  12664. (__mmask8) __U,
  12665. _MM_FROUND_CUR_DIRECTION);
  12666. }
  12667. extern __inline __m128d
  12668. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12669. _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
  12670. __m128i __C, const int __imm)
  12671. {
  12672. return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
  12673. (__v2df) __B,
  12674. (__v2di) __C,
  12675. __imm,
  12676. (__mmask8) __U,
  12677. _MM_FROUND_CUR_DIRECTION);
  12678. }
  12679. extern __inline __m128
  12680. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12681. _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
  12682. {
  12683. return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
  12684. (__v4sf) __B,
  12685. (__v4si) __C, __imm,
  12686. (__mmask8) -1,
  12687. _MM_FROUND_CUR_DIRECTION);
  12688. }
  12689. extern __inline __m128
  12690. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12691. _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
  12692. __m128i __C, const int __imm)
  12693. {
  12694. return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
  12695. (__v4sf) __B,
  12696. (__v4si) __C, __imm,
  12697. (__mmask8) __U,
  12698. _MM_FROUND_CUR_DIRECTION);
  12699. }
  12700. extern __inline __m128
  12701. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12702. _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
  12703. __m128i __C, const int __imm)
  12704. {
  12705. return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
  12706. (__v4sf) __B,
  12707. (__v4si) __C, __imm,
  12708. (__mmask8) __U,
  12709. _MM_FROUND_CUR_DIRECTION);
  12710. }
  12711. #else
  12712. #define _mm512_fixupimm_pd(X, Y, Z, C) \
  12713. ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
  12714. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  12715. (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
  12716. #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
  12717. ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
  12718. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  12719. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12720. #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
  12721. ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
  12722. (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
  12723. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12724. #define _mm512_fixupimm_ps(X, Y, Z, C) \
  12725. ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
  12726. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  12727. (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
  12728. #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
  12729. ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
  12730. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  12731. (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
  12732. #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
  12733. ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
  12734. (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
  12735. (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
  12736. #define _mm_fixupimm_sd(X, Y, Z, C) \
  12737. ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
  12738. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  12739. (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
  12740. #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
  12741. ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
  12742. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  12743. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12744. #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
  12745. ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
  12746. (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
  12747. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12748. #define _mm_fixupimm_ss(X, Y, Z, C) \
  12749. ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
  12750. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  12751. (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
  12752. #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
  12753. ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
  12754. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  12755. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12756. #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
  12757. ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
  12758. (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
  12759. (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  12760. #endif
  12761. #ifdef __x86_64__
  12762. extern __inline unsigned long long
  12763. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12764. _mm_cvtss_u64 (__m128 __A)
  12765. {
  12766. return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
  12767. __A,
  12768. _MM_FROUND_CUR_DIRECTION);
  12769. }
  12770. extern __inline unsigned long long
  12771. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12772. _mm_cvttss_u64 (__m128 __A)
  12773. {
  12774. return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
  12775. __A,
  12776. _MM_FROUND_CUR_DIRECTION);
  12777. }
  12778. extern __inline long long
  12779. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12780. _mm_cvttss_i64 (__m128 __A)
  12781. {
  12782. return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
  12783. _MM_FROUND_CUR_DIRECTION);
  12784. }
  12785. #endif /* __x86_64__ */
  12786. extern __inline int
  12787. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12788. _mm512_cvtsi512_si32 (__m512i __A)
  12789. {
  12790. __v16si __B = (__v16si) __A;
  12791. return __B[0];
  12792. }
  12793. extern __inline unsigned
  12794. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12795. _mm_cvtss_u32 (__m128 __A)
  12796. {
  12797. return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
  12798. _MM_FROUND_CUR_DIRECTION);
  12799. }
  12800. extern __inline unsigned
  12801. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12802. _mm_cvttss_u32 (__m128 __A)
  12803. {
  12804. return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
  12805. _MM_FROUND_CUR_DIRECTION);
  12806. }
  12807. extern __inline int
  12808. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12809. _mm_cvttss_i32 (__m128 __A)
  12810. {
  12811. return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
  12812. _MM_FROUND_CUR_DIRECTION);
  12813. }
  12814. extern __inline int
  12815. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12816. _mm_cvtsd_i32 (__m128d __A)
  12817. {
  12818. return (int) __builtin_ia32_cvtsd2si ((__v2df) __A);
  12819. }
  12820. extern __inline int
  12821. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12822. _mm_cvtss_i32 (__m128 __A)
  12823. {
  12824. return (int) __builtin_ia32_cvtss2si ((__v4sf) __A);
  12825. }
  12826. extern __inline __m128d
  12827. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12828. _mm_cvti32_sd (__m128d __A, int __B)
  12829. {
  12830. return (__m128d) __builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
  12831. }
  12832. extern __inline __m128
  12833. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12834. _mm_cvti32_ss (__m128 __A, int __B)
  12835. {
  12836. return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
  12837. }
  12838. #ifdef __x86_64__
  12839. extern __inline unsigned long long
  12840. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12841. _mm_cvtsd_u64 (__m128d __A)
  12842. {
  12843. return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
  12844. __A,
  12845. _MM_FROUND_CUR_DIRECTION);
  12846. }
  12847. extern __inline unsigned long long
  12848. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12849. _mm_cvttsd_u64 (__m128d __A)
  12850. {
  12851. return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
  12852. __A,
  12853. _MM_FROUND_CUR_DIRECTION);
  12854. }
  12855. extern __inline long long
  12856. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12857. _mm_cvttsd_i64 (__m128d __A)
  12858. {
  12859. return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
  12860. _MM_FROUND_CUR_DIRECTION);
  12861. }
  12862. extern __inline long long
  12863. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12864. _mm_cvtsd_i64 (__m128d __A)
  12865. {
  12866. return (long long) __builtin_ia32_cvtsd2si64 ((__v2df) __A);
  12867. }
  12868. extern __inline long long
  12869. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12870. _mm_cvtss_i64 (__m128 __A)
  12871. {
  12872. return (long long) __builtin_ia32_cvtss2si64 ((__v4sf) __A);
  12873. }
  12874. extern __inline __m128d
  12875. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12876. _mm_cvti64_sd (__m128d __A, long long __B)
  12877. {
  12878. return (__m128d) __builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
  12879. }
  12880. extern __inline __m128
  12881. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12882. _mm_cvti64_ss (__m128 __A, long long __B)
  12883. {
  12884. return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
  12885. }
  12886. #endif /* __x86_64__ */
  12887. extern __inline unsigned
  12888. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12889. _mm_cvtsd_u32 (__m128d __A)
  12890. {
  12891. return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
  12892. _MM_FROUND_CUR_DIRECTION);
  12893. }
  12894. extern __inline unsigned
  12895. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12896. _mm_cvttsd_u32 (__m128d __A)
  12897. {
  12898. return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
  12899. _MM_FROUND_CUR_DIRECTION);
  12900. }
  12901. extern __inline int
  12902. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12903. _mm_cvttsd_i32 (__m128d __A)
  12904. {
  12905. return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
  12906. _MM_FROUND_CUR_DIRECTION);
  12907. }
  12908. extern __inline __m512d
  12909. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12910. _mm512_cvtps_pd (__m256 __A)
  12911. {
  12912. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  12913. (__v8df)
  12914. _mm512_undefined_pd (),
  12915. (__mmask8) -1,
  12916. _MM_FROUND_CUR_DIRECTION);
  12917. }
  12918. extern __inline __m512d
  12919. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12920. _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
  12921. {
  12922. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  12923. (__v8df) __W,
  12924. (__mmask8) __U,
  12925. _MM_FROUND_CUR_DIRECTION);
  12926. }
  12927. extern __inline __m512d
  12928. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12929. _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
  12930. {
  12931. return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
  12932. (__v8df)
  12933. _mm512_setzero_pd (),
  12934. (__mmask8) __U,
  12935. _MM_FROUND_CUR_DIRECTION);
  12936. }
  12937. extern __inline __m512
  12938. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12939. _mm512_cvtph_ps (__m256i __A)
  12940. {
  12941. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  12942. (__v16sf)
  12943. _mm512_undefined_ps (),
  12944. (__mmask16) -1,
  12945. _MM_FROUND_CUR_DIRECTION);
  12946. }
  12947. extern __inline __m512
  12948. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12949. _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
  12950. {
  12951. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  12952. (__v16sf) __W,
  12953. (__mmask16) __U,
  12954. _MM_FROUND_CUR_DIRECTION);
  12955. }
  12956. extern __inline __m512
  12957. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12958. _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
  12959. {
  12960. return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
  12961. (__v16sf)
  12962. _mm512_setzero_ps (),
  12963. (__mmask16) __U,
  12964. _MM_FROUND_CUR_DIRECTION);
  12965. }
  12966. extern __inline __m256
  12967. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12968. _mm512_cvtpd_ps (__m512d __A)
  12969. {
  12970. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  12971. (__v8sf)
  12972. _mm256_undefined_ps (),
  12973. (__mmask8) -1,
  12974. _MM_FROUND_CUR_DIRECTION);
  12975. }
  12976. extern __inline __m256
  12977. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12978. _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
  12979. {
  12980. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  12981. (__v8sf) __W,
  12982. (__mmask8) __U,
  12983. _MM_FROUND_CUR_DIRECTION);
  12984. }
  12985. extern __inline __m256
  12986. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12987. _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
  12988. {
  12989. return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
  12990. (__v8sf)
  12991. _mm256_setzero_ps (),
  12992. (__mmask8) __U,
  12993. _MM_FROUND_CUR_DIRECTION);
  12994. }
  12995. #ifdef __OPTIMIZE__
  12996. extern __inline __m512
  12997. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  12998. _mm512_getexp_ps (__m512 __A)
  12999. {
  13000. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  13001. (__v16sf)
  13002. _mm512_undefined_ps (),
  13003. (__mmask16) -1,
  13004. _MM_FROUND_CUR_DIRECTION);
  13005. }
  13006. extern __inline __m512
  13007. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13008. _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
  13009. {
  13010. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  13011. (__v16sf) __W,
  13012. (__mmask16) __U,
  13013. _MM_FROUND_CUR_DIRECTION);
  13014. }
  13015. extern __inline __m512
  13016. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13017. _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
  13018. {
  13019. return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
  13020. (__v16sf)
  13021. _mm512_setzero_ps (),
  13022. (__mmask16) __U,
  13023. _MM_FROUND_CUR_DIRECTION);
  13024. }
  13025. extern __inline __m512d
  13026. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13027. _mm512_getexp_pd (__m512d __A)
  13028. {
  13029. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  13030. (__v8df)
  13031. _mm512_undefined_pd (),
  13032. (__mmask8) -1,
  13033. _MM_FROUND_CUR_DIRECTION);
  13034. }
  13035. extern __inline __m512d
  13036. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13037. _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
  13038. {
  13039. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  13040. (__v8df) __W,
  13041. (__mmask8) __U,
  13042. _MM_FROUND_CUR_DIRECTION);
  13043. }
  13044. extern __inline __m512d
  13045. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13046. _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
  13047. {
  13048. return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
  13049. (__v8df)
  13050. _mm512_setzero_pd (),
  13051. (__mmask8) __U,
  13052. _MM_FROUND_CUR_DIRECTION);
  13053. }
  13054. extern __inline __m128
  13055. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13056. _mm_getexp_ss (__m128 __A, __m128 __B)
  13057. {
  13058. return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
  13059. (__v4sf) __B,
  13060. _MM_FROUND_CUR_DIRECTION);
  13061. }
  13062. extern __inline __m128
  13063. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13064. _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  13065. {
  13066. return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
  13067. (__v4sf) __B,
  13068. (__v4sf) __W,
  13069. (__mmask8) __U,
  13070. _MM_FROUND_CUR_DIRECTION);
  13071. }
  13072. extern __inline __m128
  13073. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13074. _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
  13075. {
  13076. return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
  13077. (__v4sf) __B,
  13078. (__v4sf)
  13079. _mm_setzero_ps (),
  13080. (__mmask8) __U,
  13081. _MM_FROUND_CUR_DIRECTION);
  13082. }
  13083. extern __inline __m128d
  13084. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13085. _mm_getexp_sd (__m128d __A, __m128d __B)
  13086. {
  13087. return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
  13088. (__v2df) __B,
  13089. _MM_FROUND_CUR_DIRECTION);
  13090. }
  13091. extern __inline __m128d
  13092. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13093. _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  13094. {
  13095. return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
  13096. (__v2df) __B,
  13097. (__v2df) __W,
  13098. (__mmask8) __U,
  13099. _MM_FROUND_CUR_DIRECTION);
  13100. }
  13101. extern __inline __m128d
  13102. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13103. _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
  13104. {
  13105. return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
  13106. (__v2df) __B,
  13107. (__v2df)
  13108. _mm_setzero_pd (),
  13109. (__mmask8) __U,
  13110. _MM_FROUND_CUR_DIRECTION);
  13111. }
  13112. extern __inline __m512d
  13113. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13114. _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
  13115. _MM_MANTISSA_SIGN_ENUM __C)
  13116. {
  13117. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  13118. (__C << 2) | __B,
  13119. _mm512_undefined_pd (),
  13120. (__mmask8) -1,
  13121. _MM_FROUND_CUR_DIRECTION);
  13122. }
  13123. extern __inline __m512d
  13124. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13125. _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
  13126. _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
  13127. {
  13128. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  13129. (__C << 2) | __B,
  13130. (__v8df) __W, __U,
  13131. _MM_FROUND_CUR_DIRECTION);
  13132. }
  13133. extern __inline __m512d
  13134. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13135. _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
  13136. _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
  13137. {
  13138. return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
  13139. (__C << 2) | __B,
  13140. (__v8df)
  13141. _mm512_setzero_pd (),
  13142. __U,
  13143. _MM_FROUND_CUR_DIRECTION);
  13144. }
  13145. extern __inline __m512
  13146. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13147. _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
  13148. _MM_MANTISSA_SIGN_ENUM __C)
  13149. {
  13150. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  13151. (__C << 2) | __B,
  13152. _mm512_undefined_ps (),
  13153. (__mmask16) -1,
  13154. _MM_FROUND_CUR_DIRECTION);
  13155. }
  13156. extern __inline __m512
  13157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13158. _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
  13159. _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
  13160. {
  13161. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  13162. (__C << 2) | __B,
  13163. (__v16sf) __W, __U,
  13164. _MM_FROUND_CUR_DIRECTION);
  13165. }
  13166. extern __inline __m512
  13167. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13168. _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
  13169. _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
  13170. {
  13171. return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
  13172. (__C << 2) | __B,
  13173. (__v16sf)
  13174. _mm512_setzero_ps (),
  13175. __U,
  13176. _MM_FROUND_CUR_DIRECTION);
  13177. }
  13178. extern __inline __m128d
  13179. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13180. _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
  13181. _MM_MANTISSA_SIGN_ENUM __D)
  13182. {
  13183. return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
  13184. (__v2df) __B,
  13185. (__D << 2) | __C,
  13186. _MM_FROUND_CUR_DIRECTION);
  13187. }
  13188. extern __inline __m128d
  13189. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13190. _mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
  13191. _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
  13192. {
  13193. return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
  13194. (__v2df) __B,
  13195. (__D << 2) | __C,
  13196. (__v2df) __W,
  13197. __U,
  13198. _MM_FROUND_CUR_DIRECTION);
  13199. }
  13200. extern __inline __m128d
  13201. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13202. _mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
  13203. _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
  13204. {
  13205. return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
  13206. (__v2df) __B,
  13207. (__D << 2) | __C,
  13208. (__v2df)
  13209. _mm_setzero_pd(),
  13210. __U,
  13211. _MM_FROUND_CUR_DIRECTION);
  13212. }
  13213. extern __inline __m128
  13214. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13215. _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
  13216. _MM_MANTISSA_SIGN_ENUM __D)
  13217. {
  13218. return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
  13219. (__v4sf) __B,
  13220. (__D << 2) | __C,
  13221. _MM_FROUND_CUR_DIRECTION);
  13222. }
  13223. extern __inline __m128
  13224. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13225. _mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
  13226. _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
  13227. {
  13228. return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
  13229. (__v4sf) __B,
  13230. (__D << 2) | __C,
  13231. (__v4sf) __W,
  13232. __U,
  13233. _MM_FROUND_CUR_DIRECTION);
  13234. }
  13235. extern __inline __m128
  13236. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13237. _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
  13238. _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
  13239. {
  13240. return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
  13241. (__v4sf) __B,
  13242. (__D << 2) | __C,
  13243. (__v4sf)
  13244. _mm_setzero_ps(),
  13245. __U,
  13246. _MM_FROUND_CUR_DIRECTION);
  13247. }
  13248. #else
  13249. #define _mm512_getmant_pd(X, B, C) \
  13250. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  13251. (int)(((C)<<2) | (B)), \
  13252. (__v8df)_mm512_undefined_pd(), \
  13253. (__mmask8)-1,\
  13254. _MM_FROUND_CUR_DIRECTION))
  13255. #define _mm512_mask_getmant_pd(W, U, X, B, C) \
  13256. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  13257. (int)(((C)<<2) | (B)), \
  13258. (__v8df)(__m512d)(W), \
  13259. (__mmask8)(U),\
  13260. _MM_FROUND_CUR_DIRECTION))
  13261. #define _mm512_maskz_getmant_pd(U, X, B, C) \
  13262. ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
  13263. (int)(((C)<<2) | (B)), \
  13264. (__v8df)_mm512_setzero_pd(), \
  13265. (__mmask8)(U),\
  13266. _MM_FROUND_CUR_DIRECTION))
  13267. #define _mm512_getmant_ps(X, B, C) \
  13268. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  13269. (int)(((C)<<2) | (B)), \
  13270. (__v16sf)_mm512_undefined_ps(), \
  13271. (__mmask16)-1,\
  13272. _MM_FROUND_CUR_DIRECTION))
  13273. #define _mm512_mask_getmant_ps(W, U, X, B, C) \
  13274. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  13275. (int)(((C)<<2) | (B)), \
  13276. (__v16sf)(__m512)(W), \
  13277. (__mmask16)(U),\
  13278. _MM_FROUND_CUR_DIRECTION))
  13279. #define _mm512_maskz_getmant_ps(U, X, B, C) \
  13280. ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
  13281. (int)(((C)<<2) | (B)), \
  13282. (__v16sf)_mm512_setzero_ps(), \
  13283. (__mmask16)(U),\
  13284. _MM_FROUND_CUR_DIRECTION))
  13285. #define _mm_getmant_sd(X, Y, C, D) \
  13286. ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
  13287. (__v2df)(__m128d)(Y), \
  13288. (int)(((D)<<2) | (C)), \
  13289. _MM_FROUND_CUR_DIRECTION))
  13290. #define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
  13291. ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
  13292. (__v2df)(__m128d)(Y), \
  13293. (int)(((D)<<2) | (C)), \
  13294. (__v2df)(__m128d)(W), \
  13295. (__mmask8)(U),\
  13296. _MM_FROUND_CUR_DIRECTION))
  13297. #define _mm_maskz_getmant_sd(U, X, Y, C, D) \
  13298. ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
  13299. (__v2df)(__m128d)(Y), \
  13300. (int)(((D)<<2) | (C)), \
  13301. (__v2df)_mm_setzero_pd(), \
  13302. (__mmask8)(U),\
  13303. _MM_FROUND_CUR_DIRECTION))
  13304. #define _mm_getmant_ss(X, Y, C, D) \
  13305. ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
  13306. (__v4sf)(__m128)(Y), \
  13307. (int)(((D)<<2) | (C)), \
  13308. _MM_FROUND_CUR_DIRECTION))
  13309. #define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
  13310. ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
  13311. (__v4sf)(__m128)(Y), \
  13312. (int)(((D)<<2) | (C)), \
  13313. (__v4sf)(__m128)(W), \
  13314. (__mmask8)(U),\
  13315. _MM_FROUND_CUR_DIRECTION))
  13316. #define _mm_maskz_getmant_ss(U, X, Y, C, D) \
  13317. ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
  13318. (__v4sf)(__m128)(Y), \
  13319. (int)(((D)<<2) | (C)), \
  13320. (__v4sf)_mm_setzero_ps(), \
  13321. (__mmask8)(U),\
  13322. _MM_FROUND_CUR_DIRECTION))
  13323. #define _mm_getexp_ss(A, B) \
  13324. ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
  13325. _MM_FROUND_CUR_DIRECTION))
  13326. #define _mm_mask_getexp_ss(W, U, A, B) \
  13327. (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
  13328. _MM_FROUND_CUR_DIRECTION)
  13329. #define _mm_maskz_getexp_ss(U, A, B) \
  13330. (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
  13331. _MM_FROUND_CUR_DIRECTION)
  13332. #define _mm_getexp_sd(A, B) \
  13333. ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
  13334. _MM_FROUND_CUR_DIRECTION))
  13335. #define _mm_mask_getexp_sd(W, U, A, B) \
  13336. (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
  13337. _MM_FROUND_CUR_DIRECTION)
  13338. #define _mm_maskz_getexp_sd(U, A, B) \
  13339. (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
  13340. _MM_FROUND_CUR_DIRECTION)
  13341. #define _mm512_getexp_ps(A) \
  13342. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  13343. (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
  13344. #define _mm512_mask_getexp_ps(W, U, A) \
  13345. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  13346. (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
  13347. #define _mm512_maskz_getexp_ps(U, A) \
  13348. ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
  13349. (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
  13350. #define _mm512_getexp_pd(A) \
  13351. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  13352. (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
  13353. #define _mm512_mask_getexp_pd(W, U, A) \
  13354. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  13355. (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  13356. #define _mm512_maskz_getexp_pd(U, A) \
  13357. ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
  13358. (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
  13359. #endif
  13360. #ifdef __OPTIMIZE__
  13361. extern __inline __m512
  13362. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13363. _mm512_roundscale_ps (__m512 __A, const int __imm)
  13364. {
  13365. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
  13366. (__v16sf)
  13367. _mm512_undefined_ps (),
  13368. -1,
  13369. _MM_FROUND_CUR_DIRECTION);
  13370. }
  13371. extern __inline __m512
  13372. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13373. _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
  13374. const int __imm)
  13375. {
  13376. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
  13377. (__v16sf) __A,
  13378. (__mmask16) __B,
  13379. _MM_FROUND_CUR_DIRECTION);
  13380. }
  13381. extern __inline __m512
  13382. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13383. _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
  13384. {
  13385. return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
  13386. __imm,
  13387. (__v16sf)
  13388. _mm512_setzero_ps (),
  13389. (__mmask16) __A,
  13390. _MM_FROUND_CUR_DIRECTION);
  13391. }
  13392. extern __inline __m512d
  13393. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13394. _mm512_roundscale_pd (__m512d __A, const int __imm)
  13395. {
  13396. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
  13397. (__v8df)
  13398. _mm512_undefined_pd (),
  13399. -1,
  13400. _MM_FROUND_CUR_DIRECTION);
  13401. }
  13402. extern __inline __m512d
  13403. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13404. _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
  13405. const int __imm)
  13406. {
  13407. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
  13408. (__v8df) __A,
  13409. (__mmask8) __B,
  13410. _MM_FROUND_CUR_DIRECTION);
  13411. }
  13412. extern __inline __m512d
  13413. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13414. _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
  13415. {
  13416. return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
  13417. __imm,
  13418. (__v8df)
  13419. _mm512_setzero_pd (),
  13420. (__mmask8) __A,
  13421. _MM_FROUND_CUR_DIRECTION);
  13422. }
  13423. extern __inline __m128
  13424. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13425. _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
  13426. {
  13427. return (__m128)
  13428. __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
  13429. (__v4sf) __B, __imm,
  13430. (__v4sf)
  13431. _mm_setzero_ps (),
  13432. (__mmask8) -1,
  13433. _MM_FROUND_CUR_DIRECTION);
  13434. }
  13435. extern __inline __m128
  13436. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13437. _mm_mask_roundscale_ss (__m128 __A, __mmask8 __B, __m128 __C, __m128 __D,
  13438. const int __imm)
  13439. {
  13440. return (__m128)
  13441. __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
  13442. (__v4sf) __D, __imm,
  13443. (__v4sf) __A,
  13444. (__mmask8) __B,
  13445. _MM_FROUND_CUR_DIRECTION);
  13446. }
  13447. extern __inline __m128
  13448. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13449. _mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C,
  13450. const int __imm)
  13451. {
  13452. return (__m128)
  13453. __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
  13454. (__v4sf) __C, __imm,
  13455. (__v4sf)
  13456. _mm_setzero_ps (),
  13457. (__mmask8) __A,
  13458. _MM_FROUND_CUR_DIRECTION);
  13459. }
  13460. extern __inline __m128d
  13461. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13462. _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
  13463. {
  13464. return (__m128d)
  13465. __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
  13466. (__v2df) __B, __imm,
  13467. (__v2df)
  13468. _mm_setzero_pd (),
  13469. (__mmask8) -1,
  13470. _MM_FROUND_CUR_DIRECTION);
  13471. }
  13472. extern __inline __m128d
  13473. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13474. _mm_mask_roundscale_sd (__m128d __A, __mmask8 __B, __m128d __C, __m128d __D,
  13475. const int __imm)
  13476. {
  13477. return (__m128d)
  13478. __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
  13479. (__v2df) __D, __imm,
  13480. (__v2df) __A,
  13481. (__mmask8) __B,
  13482. _MM_FROUND_CUR_DIRECTION);
  13483. }
  13484. extern __inline __m128d
  13485. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13486. _mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C,
  13487. const int __imm)
  13488. {
  13489. return (__m128d)
  13490. __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
  13491. (__v2df) __C, __imm,
  13492. (__v2df)
  13493. _mm_setzero_pd (),
  13494. (__mmask8) __A,
  13495. _MM_FROUND_CUR_DIRECTION);
  13496. }
  13497. #else
  13498. #define _mm512_roundscale_ps(A, B) \
  13499. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
  13500. (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
  13501. #define _mm512_mask_roundscale_ps(A, B, C, D) \
  13502. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
  13503. (int)(D), \
  13504. (__v16sf)(__m512)(A), \
  13505. (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
  13506. #define _mm512_maskz_roundscale_ps(A, B, C) \
  13507. ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
  13508. (int)(C), \
  13509. (__v16sf)_mm512_setzero_ps(),\
  13510. (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
  13511. #define _mm512_roundscale_pd(A, B) \
  13512. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
  13513. (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
  13514. #define _mm512_mask_roundscale_pd(A, B, C, D) \
  13515. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
  13516. (int)(D), \
  13517. (__v8df)(__m512d)(A), \
  13518. (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
  13519. #define _mm512_maskz_roundscale_pd(A, B, C) \
  13520. ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
  13521. (int)(C), \
  13522. (__v8df)_mm512_setzero_pd(),\
  13523. (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
  13524. #define _mm_roundscale_ss(A, B, I) \
  13525. ((__m128) \
  13526. __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
  13527. (__v4sf) (__m128) (B), \
  13528. (int) (I), \
  13529. (__v4sf) _mm_setzero_ps (), \
  13530. (__mmask8) (-1), \
  13531. _MM_FROUND_CUR_DIRECTION))
  13532. #define _mm_mask_roundscale_ss(A, U, B, C, I) \
  13533. ((__m128) \
  13534. __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
  13535. (__v4sf) (__m128) (C), \
  13536. (int) (I), \
  13537. (__v4sf) (__m128) (A), \
  13538. (__mmask8) (U), \
  13539. _MM_FROUND_CUR_DIRECTION))
  13540. #define _mm_maskz_roundscale_ss(U, A, B, I) \
  13541. ((__m128) \
  13542. __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
  13543. (__v4sf) (__m128) (B), \
  13544. (int) (I), \
  13545. (__v4sf) _mm_setzero_ps (), \
  13546. (__mmask8) (U), \
  13547. _MM_FROUND_CUR_DIRECTION))
  13548. #define _mm_roundscale_sd(A, B, I) \
  13549. ((__m128d) \
  13550. __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
  13551. (__v2df) (__m128d) (B), \
  13552. (int) (I), \
  13553. (__v2df) _mm_setzero_pd (), \
  13554. (__mmask8) (-1), \
  13555. _MM_FROUND_CUR_DIRECTION))
  13556. #define _mm_mask_roundscale_sd(A, U, B, C, I) \
  13557. ((__m128d) \
  13558. __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
  13559. (__v2df) (__m128d) (C), \
  13560. (int) (I), \
  13561. (__v2df) (__m128d) (A), \
  13562. (__mmask8) (U), \
  13563. _MM_FROUND_CUR_DIRECTION))
  13564. #define _mm_maskz_roundscale_sd(U, A, B, I) \
  13565. ((__m128d) \
  13566. __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
  13567. (__v2df) (__m128d) (B), \
  13568. (int) (I), \
  13569. (__v2df) _mm_setzero_pd (), \
  13570. (__mmask8) (U), \
  13571. _MM_FROUND_CUR_DIRECTION))
  13572. #endif
  13573. #ifdef __OPTIMIZE__
  13574. extern __inline __mmask8
  13575. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13576. _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
  13577. {
  13578. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13579. (__v8df) __Y, __P,
  13580. (__mmask8) -1,
  13581. _MM_FROUND_CUR_DIRECTION);
  13582. }
  13583. extern __inline __mmask16
  13584. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13585. _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
  13586. {
  13587. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13588. (__v16sf) __Y, __P,
  13589. (__mmask16) -1,
  13590. _MM_FROUND_CUR_DIRECTION);
  13591. }
  13592. extern __inline __mmask16
  13593. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13594. _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
  13595. {
  13596. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13597. (__v16sf) __Y, __P,
  13598. (__mmask16) __U,
  13599. _MM_FROUND_CUR_DIRECTION);
  13600. }
  13601. extern __inline __mmask8
  13602. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13603. _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
  13604. {
  13605. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13606. (__v8df) __Y, __P,
  13607. (__mmask8) __U,
  13608. _MM_FROUND_CUR_DIRECTION);
  13609. }
  13610. extern __inline __mmask8
  13611. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13612. _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
  13613. {
  13614. return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
  13615. (__v2df) __Y, __P,
  13616. (__mmask8) -1,
  13617. _MM_FROUND_CUR_DIRECTION);
  13618. }
  13619. extern __inline __mmask8
  13620. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13621. _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
  13622. {
  13623. return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
  13624. (__v2df) __Y, __P,
  13625. (__mmask8) __M,
  13626. _MM_FROUND_CUR_DIRECTION);
  13627. }
  13628. extern __inline __mmask8
  13629. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13630. _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
  13631. {
  13632. return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
  13633. (__v4sf) __Y, __P,
  13634. (__mmask8) -1,
  13635. _MM_FROUND_CUR_DIRECTION);
  13636. }
  13637. extern __inline __mmask8
  13638. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13639. _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
  13640. {
  13641. return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
  13642. (__v4sf) __Y, __P,
  13643. (__mmask8) __M,
  13644. _MM_FROUND_CUR_DIRECTION);
  13645. }
  13646. #else
  13647. #define _mm512_cmp_pd_mask(X, Y, P) \
  13648. ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
  13649. (__v8df)(__m512d)(Y), (int)(P),\
  13650. (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
  13651. #define _mm512_cmp_ps_mask(X, Y, P) \
  13652. ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
  13653. (__v16sf)(__m512)(Y), (int)(P),\
  13654. (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
  13655. #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
  13656. ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
  13657. (__v8df)(__m512d)(Y), (int)(P),\
  13658. (__mmask8)(M), _MM_FROUND_CUR_DIRECTION))
  13659. #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
  13660. ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
  13661. (__v16sf)(__m512)(Y), (int)(P),\
  13662. (__mmask16)(M),_MM_FROUND_CUR_DIRECTION))
  13663. #define _mm_cmp_sd_mask(X, Y, P) \
  13664. ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
  13665. (__v2df)(__m128d)(Y), (int)(P),\
  13666. (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
  13667. #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
  13668. ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
  13669. (__v2df)(__m128d)(Y), (int)(P),\
  13670. M,_MM_FROUND_CUR_DIRECTION))
  13671. #define _mm_cmp_ss_mask(X, Y, P) \
  13672. ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
  13673. (__v4sf)(__m128)(Y), (int)(P), \
  13674. (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
  13675. #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
  13676. ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
  13677. (__v4sf)(__m128)(Y), (int)(P), \
  13678. M,_MM_FROUND_CUR_DIRECTION))
  13679. #endif
  13680. extern __inline __mmask8
  13681. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13682. _mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
  13683. {
  13684. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13685. (__v8df) __Y, _CMP_EQ_OQ,
  13686. (__mmask8) -1,
  13687. _MM_FROUND_CUR_DIRECTION);
  13688. }
  13689. extern __inline __mmask8
  13690. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13691. _mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13692. {
  13693. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13694. (__v8df) __Y, _CMP_EQ_OQ,
  13695. (__mmask8) __U,
  13696. _MM_FROUND_CUR_DIRECTION);
  13697. }
  13698. extern __inline __mmask8
  13699. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13700. _mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
  13701. {
  13702. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13703. (__v8df) __Y, _CMP_LT_OS,
  13704. (__mmask8) -1,
  13705. _MM_FROUND_CUR_DIRECTION);
  13706. }
  13707. extern __inline __mmask8
  13708. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13709. _mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13710. {
  13711. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13712. (__v8df) __Y, _CMP_LT_OS,
  13713. (__mmask8) __U,
  13714. _MM_FROUND_CUR_DIRECTION);
  13715. }
  13716. extern __inline __mmask8
  13717. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13718. _mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
  13719. {
  13720. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13721. (__v8df) __Y, _CMP_LE_OS,
  13722. (__mmask8) -1,
  13723. _MM_FROUND_CUR_DIRECTION);
  13724. }
  13725. extern __inline __mmask8
  13726. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13727. _mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13728. {
  13729. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13730. (__v8df) __Y, _CMP_LE_OS,
  13731. (__mmask8) __U,
  13732. _MM_FROUND_CUR_DIRECTION);
  13733. }
  13734. extern __inline __mmask8
  13735. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13736. _mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
  13737. {
  13738. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13739. (__v8df) __Y, _CMP_UNORD_Q,
  13740. (__mmask8) -1,
  13741. _MM_FROUND_CUR_DIRECTION);
  13742. }
  13743. extern __inline __mmask8
  13744. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13745. _mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13746. {
  13747. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13748. (__v8df) __Y, _CMP_UNORD_Q,
  13749. (__mmask8) __U,
  13750. _MM_FROUND_CUR_DIRECTION);
  13751. }
  13752. extern __inline __mmask8
  13753. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13754. _mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
  13755. {
  13756. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13757. (__v8df) __Y, _CMP_NEQ_UQ,
  13758. (__mmask8) -1,
  13759. _MM_FROUND_CUR_DIRECTION);
  13760. }
  13761. extern __inline __mmask8
  13762. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13763. _mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13764. {
  13765. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13766. (__v8df) __Y, _CMP_NEQ_UQ,
  13767. (__mmask8) __U,
  13768. _MM_FROUND_CUR_DIRECTION);
  13769. }
  13770. extern __inline __mmask8
  13771. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13772. _mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
  13773. {
  13774. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13775. (__v8df) __Y, _CMP_NLT_US,
  13776. (__mmask8) -1,
  13777. _MM_FROUND_CUR_DIRECTION);
  13778. }
  13779. extern __inline __mmask8
  13780. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13781. _mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13782. {
  13783. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13784. (__v8df) __Y, _CMP_NLT_US,
  13785. (__mmask8) __U,
  13786. _MM_FROUND_CUR_DIRECTION);
  13787. }
  13788. extern __inline __mmask8
  13789. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13790. _mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
  13791. {
  13792. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13793. (__v8df) __Y, _CMP_NLE_US,
  13794. (__mmask8) -1,
  13795. _MM_FROUND_CUR_DIRECTION);
  13796. }
  13797. extern __inline __mmask8
  13798. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13799. _mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13800. {
  13801. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13802. (__v8df) __Y, _CMP_NLE_US,
  13803. (__mmask8) __U,
  13804. _MM_FROUND_CUR_DIRECTION);
  13805. }
  13806. extern __inline __mmask8
  13807. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13808. _mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
  13809. {
  13810. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13811. (__v8df) __Y, _CMP_ORD_Q,
  13812. (__mmask8) -1,
  13813. _MM_FROUND_CUR_DIRECTION);
  13814. }
  13815. extern __inline __mmask8
  13816. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13817. _mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
  13818. {
  13819. return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
  13820. (__v8df) __Y, _CMP_ORD_Q,
  13821. (__mmask8) __U,
  13822. _MM_FROUND_CUR_DIRECTION);
  13823. }
  13824. extern __inline __mmask16
  13825. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13826. _mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
  13827. {
  13828. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13829. (__v16sf) __Y, _CMP_EQ_OQ,
  13830. (__mmask16) -1,
  13831. _MM_FROUND_CUR_DIRECTION);
  13832. }
  13833. extern __inline __mmask16
  13834. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13835. _mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13836. {
  13837. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13838. (__v16sf) __Y, _CMP_EQ_OQ,
  13839. (__mmask16) __U,
  13840. _MM_FROUND_CUR_DIRECTION);
  13841. }
  13842. extern __inline __mmask16
  13843. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13844. _mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
  13845. {
  13846. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13847. (__v16sf) __Y, _CMP_LT_OS,
  13848. (__mmask16) -1,
  13849. _MM_FROUND_CUR_DIRECTION);
  13850. }
  13851. extern __inline __mmask16
  13852. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13853. _mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13854. {
  13855. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13856. (__v16sf) __Y, _CMP_LT_OS,
  13857. (__mmask16) __U,
  13858. _MM_FROUND_CUR_DIRECTION);
  13859. }
  13860. extern __inline __mmask16
  13861. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13862. _mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
  13863. {
  13864. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13865. (__v16sf) __Y, _CMP_LE_OS,
  13866. (__mmask16) -1,
  13867. _MM_FROUND_CUR_DIRECTION);
  13868. }
  13869. extern __inline __mmask16
  13870. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13871. _mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13872. {
  13873. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13874. (__v16sf) __Y, _CMP_LE_OS,
  13875. (__mmask16) __U,
  13876. _MM_FROUND_CUR_DIRECTION);
  13877. }
  13878. extern __inline __mmask16
  13879. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13880. _mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
  13881. {
  13882. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13883. (__v16sf) __Y, _CMP_UNORD_Q,
  13884. (__mmask16) -1,
  13885. _MM_FROUND_CUR_DIRECTION);
  13886. }
  13887. extern __inline __mmask16
  13888. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13889. _mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13890. {
  13891. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13892. (__v16sf) __Y, _CMP_UNORD_Q,
  13893. (__mmask16) __U,
  13894. _MM_FROUND_CUR_DIRECTION);
  13895. }
  13896. extern __inline __mmask16
  13897. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13898. _mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
  13899. {
  13900. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13901. (__v16sf) __Y, _CMP_NEQ_UQ,
  13902. (__mmask16) -1,
  13903. _MM_FROUND_CUR_DIRECTION);
  13904. }
  13905. extern __inline __mmask16
  13906. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13907. _mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13908. {
  13909. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13910. (__v16sf) __Y, _CMP_NEQ_UQ,
  13911. (__mmask16) __U,
  13912. _MM_FROUND_CUR_DIRECTION);
  13913. }
  13914. extern __inline __mmask16
  13915. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13916. _mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
  13917. {
  13918. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13919. (__v16sf) __Y, _CMP_NLT_US,
  13920. (__mmask16) -1,
  13921. _MM_FROUND_CUR_DIRECTION);
  13922. }
  13923. extern __inline __mmask16
  13924. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13925. _mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13926. {
  13927. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13928. (__v16sf) __Y, _CMP_NLT_US,
  13929. (__mmask16) __U,
  13930. _MM_FROUND_CUR_DIRECTION);
  13931. }
  13932. extern __inline __mmask16
  13933. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13934. _mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
  13935. {
  13936. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13937. (__v16sf) __Y, _CMP_NLE_US,
  13938. (__mmask16) -1,
  13939. _MM_FROUND_CUR_DIRECTION);
  13940. }
  13941. extern __inline __mmask16
  13942. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13943. _mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13944. {
  13945. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13946. (__v16sf) __Y, _CMP_NLE_US,
  13947. (__mmask16) __U,
  13948. _MM_FROUND_CUR_DIRECTION);
  13949. }
  13950. extern __inline __mmask16
  13951. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13952. _mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
  13953. {
  13954. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13955. (__v16sf) __Y, _CMP_ORD_Q,
  13956. (__mmask16) -1,
  13957. _MM_FROUND_CUR_DIRECTION);
  13958. }
  13959. extern __inline __mmask16
  13960. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13961. _mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
  13962. {
  13963. return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
  13964. (__v16sf) __Y, _CMP_ORD_Q,
  13965. (__mmask16) __U,
  13966. _MM_FROUND_CUR_DIRECTION);
  13967. }
  13968. extern __inline __mmask16
  13969. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13970. _mm512_kmov (__mmask16 __A)
  13971. {
  13972. return __builtin_ia32_kmovw (__A);
  13973. }
  13974. extern __inline __m512
  13975. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13976. _mm512_castpd_ps (__m512d __A)
  13977. {
  13978. return (__m512) (__A);
  13979. }
  13980. extern __inline __m512i
  13981. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13982. _mm512_castpd_si512 (__m512d __A)
  13983. {
  13984. return (__m512i) (__A);
  13985. }
  13986. extern __inline __m512d
  13987. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13988. _mm512_castps_pd (__m512 __A)
  13989. {
  13990. return (__m512d) (__A);
  13991. }
  13992. extern __inline __m512i
  13993. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  13994. _mm512_castps_si512 (__m512 __A)
  13995. {
  13996. return (__m512i) (__A);
  13997. }
  13998. extern __inline __m512
  13999. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14000. _mm512_castsi512_ps (__m512i __A)
  14001. {
  14002. return (__m512) (__A);
  14003. }
  14004. extern __inline __m512d
  14005. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14006. _mm512_castsi512_pd (__m512i __A)
  14007. {
  14008. return (__m512d) (__A);
  14009. }
  14010. extern __inline __m128d
  14011. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14012. _mm512_castpd512_pd128 (__m512d __A)
  14013. {
  14014. return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
  14015. }
  14016. extern __inline __m128
  14017. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14018. _mm512_castps512_ps128 (__m512 __A)
  14019. {
  14020. return _mm512_extractf32x4_ps(__A, 0);
  14021. }
  14022. extern __inline __m128i
  14023. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14024. _mm512_castsi512_si128 (__m512i __A)
  14025. {
  14026. return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
  14027. }
  14028. extern __inline __m256d
  14029. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14030. _mm512_castpd512_pd256 (__m512d __A)
  14031. {
  14032. return _mm512_extractf64x4_pd(__A, 0);
  14033. }
  14034. extern __inline __m256
  14035. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14036. _mm512_castps512_ps256 (__m512 __A)
  14037. {
  14038. return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
  14039. }
  14040. extern __inline __m256i
  14041. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14042. _mm512_castsi512_si256 (__m512i __A)
  14043. {
  14044. return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
  14045. }
  14046. extern __inline __m512d
  14047. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14048. _mm512_castpd128_pd512 (__m128d __A)
  14049. {
  14050. return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
  14051. }
  14052. extern __inline __m512
  14053. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14054. _mm512_castps128_ps512 (__m128 __A)
  14055. {
  14056. return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
  14057. }
  14058. extern __inline __m512i
  14059. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14060. _mm512_castsi128_si512 (__m128i __A)
  14061. {
  14062. return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
  14063. }
  14064. extern __inline __m512d
  14065. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14066. _mm512_castpd256_pd512 (__m256d __A)
  14067. {
  14068. return __builtin_ia32_pd512_256pd (__A);
  14069. }
  14070. extern __inline __m512
  14071. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14072. _mm512_castps256_ps512 (__m256 __A)
  14073. {
  14074. return __builtin_ia32_ps512_256ps (__A);
  14075. }
  14076. extern __inline __m512i
  14077. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14078. _mm512_castsi256_si512 (__m256i __A)
  14079. {
  14080. return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
  14081. }
  14082. extern __inline __m512d
  14083. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14084. _mm512_zextpd128_pd512 (__m128d __A)
  14085. {
  14086. return (__m512d) _mm512_insertf32x4 (_mm512_setzero_ps (), (__m128) __A, 0);
  14087. }
  14088. extern __inline __m512
  14089. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14090. _mm512_zextps128_ps512 (__m128 __A)
  14091. {
  14092. return _mm512_insertf32x4 (_mm512_setzero_ps (), __A, 0);
  14093. }
  14094. extern __inline __m512i
  14095. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14096. _mm512_zextsi128_si512 (__m128i __A)
  14097. {
  14098. return _mm512_inserti32x4 (_mm512_setzero_si512 (), __A, 0);
  14099. }
  14100. extern __inline __m512d
  14101. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14102. _mm512_zextpd256_pd512 (__m256d __A)
  14103. {
  14104. return _mm512_insertf64x4 (_mm512_setzero_pd (), __A, 0);
  14105. }
  14106. extern __inline __m512
  14107. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14108. _mm512_zextps256_ps512 (__m256 __A)
  14109. {
  14110. return (__m512) _mm512_insertf64x4 (_mm512_setzero_pd (), (__m256d) __A, 0);
  14111. }
  14112. extern __inline __m512i
  14113. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14114. _mm512_zextsi256_si512 (__m256i __A)
  14115. {
  14116. return _mm512_inserti64x4 (_mm512_setzero_si512 (), __A, 0);
  14117. }
  14118. extern __inline __mmask16
  14119. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14120. _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
  14121. {
  14122. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
  14123. (__v16si) __B, 0,
  14124. (__mmask16) -1);
  14125. }
  14126. extern __inline __mmask16
  14127. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14128. _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  14129. {
  14130. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
  14131. (__v16si) __B, 0, __U);
  14132. }
  14133. extern __inline __mmask8
  14134. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14135. _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  14136. {
  14137. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
  14138. (__v8di) __B, 0, __U);
  14139. }
  14140. extern __inline __mmask8
  14141. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14142. _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
  14143. {
  14144. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
  14145. (__v8di) __B, 0,
  14146. (__mmask8) -1);
  14147. }
  14148. extern __inline __mmask16
  14149. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14150. _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
  14151. {
  14152. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
  14153. (__v16si) __B, 6,
  14154. (__mmask16) -1);
  14155. }
  14156. extern __inline __mmask16
  14157. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14158. _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
  14159. {
  14160. return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
  14161. (__v16si) __B, 6, __U);
  14162. }
  14163. extern __inline __mmask8
  14164. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14165. _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
  14166. {
  14167. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
  14168. (__v8di) __B, 6, __U);
  14169. }
  14170. extern __inline __mmask8
  14171. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14172. _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
  14173. {
  14174. return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
  14175. (__v8di) __B, 6,
  14176. (__mmask8) -1);
  14177. }
  14178. #undef __MM512_REDUCE_OP
  14179. #define __MM512_REDUCE_OP(op) \
  14180. __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
  14181. __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
  14182. __m256i __T3 = (__m256i) (__T1 op __T2); \
  14183. __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
  14184. __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
  14185. __v4si __T6 = __T4 op __T5; \
  14186. __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
  14187. __v4si __T8 = __T6 op __T7; \
  14188. return __T8[0] op __T8[1]
  14189. extern __inline int
  14190. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14191. _mm512_reduce_add_epi32 (__m512i __A)
  14192. {
  14193. __MM512_REDUCE_OP (+);
  14194. }
  14195. extern __inline int
  14196. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14197. _mm512_reduce_mul_epi32 (__m512i __A)
  14198. {
  14199. __MM512_REDUCE_OP (*);
  14200. }
  14201. extern __inline int
  14202. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14203. _mm512_reduce_and_epi32 (__m512i __A)
  14204. {
  14205. __MM512_REDUCE_OP (&);
  14206. }
  14207. extern __inline int
  14208. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14209. _mm512_reduce_or_epi32 (__m512i __A)
  14210. {
  14211. __MM512_REDUCE_OP (|);
  14212. }
  14213. extern __inline int
  14214. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14215. _mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
  14216. {
  14217. __A = _mm512_maskz_mov_epi32 (__U, __A);
  14218. __MM512_REDUCE_OP (+);
  14219. }
  14220. extern __inline int
  14221. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14222. _mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
  14223. {
  14224. __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
  14225. __MM512_REDUCE_OP (*);
  14226. }
  14227. extern __inline int
  14228. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14229. _mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
  14230. {
  14231. __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
  14232. __MM512_REDUCE_OP (&);
  14233. }
  14234. extern __inline int
  14235. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14236. _mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
  14237. {
  14238. __A = _mm512_maskz_mov_epi32 (__U, __A);
  14239. __MM512_REDUCE_OP (|);
  14240. }
  14241. #undef __MM512_REDUCE_OP
  14242. #define __MM512_REDUCE_OP(op) \
  14243. __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
  14244. __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
  14245. __m256i __T3 = _mm256_##op (__T1, __T2); \
  14246. __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
  14247. __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
  14248. __m128i __T6 = _mm_##op (__T4, __T5); \
  14249. __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
  14250. (__v4si) { 2, 3, 0, 1 }); \
  14251. __m128i __T8 = _mm_##op (__T6, __T7); \
  14252. __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
  14253. (__v4si) { 1, 0, 1, 0 }); \
  14254. __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
  14255. return __T10[0]
  14256. extern __inline int
  14257. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14258. _mm512_reduce_min_epi32 (__m512i __A)
  14259. {
  14260. __MM512_REDUCE_OP (min_epi32);
  14261. }
  14262. extern __inline int
  14263. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14264. _mm512_reduce_max_epi32 (__m512i __A)
  14265. {
  14266. __MM512_REDUCE_OP (max_epi32);
  14267. }
  14268. extern __inline unsigned int
  14269. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14270. _mm512_reduce_min_epu32 (__m512i __A)
  14271. {
  14272. __MM512_REDUCE_OP (min_epu32);
  14273. }
  14274. extern __inline unsigned int
  14275. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14276. _mm512_reduce_max_epu32 (__m512i __A)
  14277. {
  14278. __MM512_REDUCE_OP (max_epu32);
  14279. }
  14280. extern __inline int
  14281. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14282. _mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
  14283. {
  14284. __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
  14285. __MM512_REDUCE_OP (min_epi32);
  14286. }
  14287. extern __inline int
  14288. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14289. _mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
  14290. {
  14291. __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
  14292. __MM512_REDUCE_OP (max_epi32);
  14293. }
  14294. extern __inline unsigned int
  14295. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14296. _mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
  14297. {
  14298. __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
  14299. __MM512_REDUCE_OP (min_epu32);
  14300. }
  14301. extern __inline unsigned int
  14302. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14303. _mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
  14304. {
  14305. __A = _mm512_maskz_mov_epi32 (__U, __A);
  14306. __MM512_REDUCE_OP (max_epu32);
  14307. }
  14308. #undef __MM512_REDUCE_OP
  14309. #define __MM512_REDUCE_OP(op) \
  14310. __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
  14311. __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
  14312. __m256 __T3 = __T1 op __T2; \
  14313. __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
  14314. __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
  14315. __m128 __T6 = __T4 op __T5; \
  14316. __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
  14317. __m128 __T8 = __T6 op __T7; \
  14318. return __T8[0] op __T8[1]
  14319. extern __inline float
  14320. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14321. _mm512_reduce_add_ps (__m512 __A)
  14322. {
  14323. __MM512_REDUCE_OP (+);
  14324. }
  14325. extern __inline float
  14326. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14327. _mm512_reduce_mul_ps (__m512 __A)
  14328. {
  14329. __MM512_REDUCE_OP (*);
  14330. }
  14331. extern __inline float
  14332. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14333. _mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
  14334. {
  14335. __A = _mm512_maskz_mov_ps (__U, __A);
  14336. __MM512_REDUCE_OP (+);
  14337. }
  14338. extern __inline float
  14339. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14340. _mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
  14341. {
  14342. __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
  14343. __MM512_REDUCE_OP (*);
  14344. }
  14345. #undef __MM512_REDUCE_OP
  14346. #define __MM512_REDUCE_OP(op) \
  14347. __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
  14348. __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
  14349. __m256 __T3 = _mm256_##op (__T1, __T2); \
  14350. __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
  14351. __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
  14352. __m128 __T6 = _mm_##op (__T4, __T5); \
  14353. __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
  14354. __m128 __T8 = _mm_##op (__T6, __T7); \
  14355. __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
  14356. __m128 __T10 = _mm_##op (__T8, __T9); \
  14357. return __T10[0]
  14358. extern __inline float
  14359. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14360. _mm512_reduce_min_ps (__m512 __A)
  14361. {
  14362. __MM512_REDUCE_OP (min_ps);
  14363. }
  14364. extern __inline float
  14365. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14366. _mm512_reduce_max_ps (__m512 __A)
  14367. {
  14368. __MM512_REDUCE_OP (max_ps);
  14369. }
  14370. extern __inline float
  14371. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14372. _mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
  14373. {
  14374. __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
  14375. __MM512_REDUCE_OP (min_ps);
  14376. }
  14377. extern __inline float
  14378. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14379. _mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
  14380. {
  14381. __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
  14382. __MM512_REDUCE_OP (max_ps);
  14383. }
  14384. #undef __MM512_REDUCE_OP
  14385. #define __MM512_REDUCE_OP(op) \
  14386. __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
  14387. __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
  14388. __m256i __T3 = (__m256i) (__T1 op __T2); \
  14389. __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
  14390. __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
  14391. __v2di __T6 = __T4 op __T5; \
  14392. return __T6[0] op __T6[1]
  14393. extern __inline long long
  14394. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14395. _mm512_reduce_add_epi64 (__m512i __A)
  14396. {
  14397. __MM512_REDUCE_OP (+);
  14398. }
  14399. extern __inline long long
  14400. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14401. _mm512_reduce_mul_epi64 (__m512i __A)
  14402. {
  14403. __MM512_REDUCE_OP (*);
  14404. }
  14405. extern __inline long long
  14406. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14407. _mm512_reduce_and_epi64 (__m512i __A)
  14408. {
  14409. __MM512_REDUCE_OP (&);
  14410. }
  14411. extern __inline long long
  14412. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14413. _mm512_reduce_or_epi64 (__m512i __A)
  14414. {
  14415. __MM512_REDUCE_OP (|);
  14416. }
  14417. extern __inline long long
  14418. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14419. _mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
  14420. {
  14421. __A = _mm512_maskz_mov_epi64 (__U, __A);
  14422. __MM512_REDUCE_OP (+);
  14423. }
  14424. extern __inline long long
  14425. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14426. _mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
  14427. {
  14428. __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
  14429. __MM512_REDUCE_OP (*);
  14430. }
  14431. extern __inline long long
  14432. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14433. _mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
  14434. {
  14435. __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
  14436. __MM512_REDUCE_OP (&);
  14437. }
  14438. extern __inline long long
  14439. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14440. _mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
  14441. {
  14442. __A = _mm512_maskz_mov_epi64 (__U, __A);
  14443. __MM512_REDUCE_OP (|);
  14444. }
  14445. #undef __MM512_REDUCE_OP
  14446. #define __MM512_REDUCE_OP(op) \
  14447. __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
  14448. __m512i __T2 = _mm512_##op (__A, __T1); \
  14449. __m512i __T3 \
  14450. = (__m512i) __builtin_shuffle ((__v8di) __T2, \
  14451. (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
  14452. __m512i __T4 = _mm512_##op (__T2, __T3); \
  14453. __m512i __T5 \
  14454. = (__m512i) __builtin_shuffle ((__v8di) __T4, \
  14455. (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
  14456. __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
  14457. return __T6[0]
  14458. extern __inline long long
  14459. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14460. _mm512_reduce_min_epi64 (__m512i __A)
  14461. {
  14462. __MM512_REDUCE_OP (min_epi64);
  14463. }
  14464. extern __inline long long
  14465. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14466. _mm512_reduce_max_epi64 (__m512i __A)
  14467. {
  14468. __MM512_REDUCE_OP (max_epi64);
  14469. }
  14470. extern __inline long long
  14471. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14472. _mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
  14473. {
  14474. __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
  14475. __U, __A);
  14476. __MM512_REDUCE_OP (min_epi64);
  14477. }
  14478. extern __inline long long
  14479. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14480. _mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
  14481. {
  14482. __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
  14483. __U, __A);
  14484. __MM512_REDUCE_OP (max_epi64);
  14485. }
  14486. extern __inline unsigned long long
  14487. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14488. _mm512_reduce_min_epu64 (__m512i __A)
  14489. {
  14490. __MM512_REDUCE_OP (min_epu64);
  14491. }
  14492. extern __inline unsigned long long
  14493. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14494. _mm512_reduce_max_epu64 (__m512i __A)
  14495. {
  14496. __MM512_REDUCE_OP (max_epu64);
  14497. }
  14498. extern __inline unsigned long long
  14499. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14500. _mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
  14501. {
  14502. __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
  14503. __MM512_REDUCE_OP (min_epu64);
  14504. }
  14505. extern __inline unsigned long long
  14506. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14507. _mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
  14508. {
  14509. __A = _mm512_maskz_mov_epi64 (__U, __A);
  14510. __MM512_REDUCE_OP (max_epu64);
  14511. }
  14512. #undef __MM512_REDUCE_OP
  14513. #define __MM512_REDUCE_OP(op) \
  14514. __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
  14515. __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
  14516. __m256d __T3 = __T1 op __T2; \
  14517. __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
  14518. __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
  14519. __m128d __T6 = __T4 op __T5; \
  14520. return __T6[0] op __T6[1]
  14521. extern __inline double
  14522. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14523. _mm512_reduce_add_pd (__m512d __A)
  14524. {
  14525. __MM512_REDUCE_OP (+);
  14526. }
  14527. extern __inline double
  14528. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14529. _mm512_reduce_mul_pd (__m512d __A)
  14530. {
  14531. __MM512_REDUCE_OP (*);
  14532. }
  14533. extern __inline double
  14534. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14535. _mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
  14536. {
  14537. __A = _mm512_maskz_mov_pd (__U, __A);
  14538. __MM512_REDUCE_OP (+);
  14539. }
  14540. extern __inline double
  14541. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14542. _mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
  14543. {
  14544. __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
  14545. __MM512_REDUCE_OP (*);
  14546. }
  14547. #undef __MM512_REDUCE_OP
  14548. #define __MM512_REDUCE_OP(op) \
  14549. __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
  14550. __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
  14551. __m256d __T3 = _mm256_##op (__T1, __T2); \
  14552. __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
  14553. __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
  14554. __m128d __T6 = _mm_##op (__T4, __T5); \
  14555. __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
  14556. __m128d __T8 = _mm_##op (__T6, __T7); \
  14557. return __T8[0]
  14558. extern __inline double
  14559. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14560. _mm512_reduce_min_pd (__m512d __A)
  14561. {
  14562. __MM512_REDUCE_OP (min_pd);
  14563. }
  14564. extern __inline double
  14565. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14566. _mm512_reduce_max_pd (__m512d __A)
  14567. {
  14568. __MM512_REDUCE_OP (max_pd);
  14569. }
  14570. extern __inline double
  14571. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14572. _mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
  14573. {
  14574. __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
  14575. __MM512_REDUCE_OP (min_pd);
  14576. }
  14577. extern __inline double
  14578. __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
  14579. _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
  14580. {
  14581. __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
  14582. __MM512_REDUCE_OP (max_pd);
  14583. }
  14584. #undef __MM512_REDUCE_OP
  14585. #ifdef __DISABLE_AVX512F__
  14586. #undef __DISABLE_AVX512F__
  14587. #pragma GCC pop_options
  14588. #endif /* __DISABLE_AVX512F__ */
  14589. #endif /* _AVX512FINTRIN_H_INCLUDED */