123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908 |
- /* Copyright (C) 2011-2019 Free Software Foundation, Inc.
- This file is part of GCC.
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
- #ifndef _IMMINTRIN_H_INCLUDED
- # error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
- #endif
- #ifndef _AVX2INTRIN_H_INCLUDED
- #define _AVX2INTRIN_H_INCLUDED
- #ifndef __AVX2__
- #pragma GCC push_options
- #pragma GCC target("avx2")
- #define __DISABLE_AVX2__
- #endif /* __AVX2__ */
- /* Sum absolute 8-bit integer difference of adjacent groups of 4
- byte integers in the first 2 operands. Starting offsets within
- operands are determined by the 3rd mask operand. */
- #ifdef __OPTIMIZE__
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mpsadbw_epu8 (__m256i __X, __m256i __Y, const int __M)
- {
- return (__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)__X,
- (__v32qi)__Y, __M);
- }
- #else
- #define _mm256_mpsadbw_epu8(X, Y, M) \
- ((__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)(__m256i)(X), \
- (__v32qi)(__m256i)(Y), (int)(M)))
- #endif
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_abs_epi8 (__m256i __A)
- {
- return (__m256i)__builtin_ia32_pabsb256 ((__v32qi)__A);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_abs_epi16 (__m256i __A)
- {
- return (__m256i)__builtin_ia32_pabsw256 ((__v16hi)__A);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_abs_epi32 (__m256i __A)
- {
- return (__m256i)__builtin_ia32_pabsd256 ((__v8si)__A);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_packs_epi32 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_packssdw256 ((__v8si)__A, (__v8si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_packs_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_packsswb256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_packus_epi32 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_packusdw256 ((__v8si)__A, (__v8si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_packus_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_packuswb256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_add_epi8 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v32qu)__A + (__v32qu)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_add_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v16hu)__A + (__v16hu)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_add_epi32 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v8su)__A + (__v8su)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_add_epi64 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v4du)__A + (__v4du)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_adds_epi8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_paddsb256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_adds_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_paddsw256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_adds_epu8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_paddusb256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_adds_epu16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_paddusw256 ((__v16hi)__A, (__v16hi)__B);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_alignr_epi8 (__m256i __A, __m256i __B, const int __N)
- {
- return (__m256i) __builtin_ia32_palignr256 ((__v4di)__A,
- (__v4di)__B,
- __N * 8);
- }
- #else
- /* In that case (__N*8) will be in vreg, and insn will not be matched. */
- /* Use define instead */
- #define _mm256_alignr_epi8(A, B, N) \
- ((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A), \
- (__v4di)(__m256i)(B), \
- (int)(N) * 8))
- #endif
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_and_si256 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v4du)__A & (__v4du)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_andnot_si256 (__m256i __A, __m256i __B)
- {
- return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_avg_epu8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pavgb256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_avg_epu16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pavgw256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M)
- {
- return (__m256i) __builtin_ia32_pblendvb256 ((__v32qi)__X,
- (__v32qi)__Y,
- (__v32qi)__M);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_blend_epi16 (__m256i __X, __m256i __Y, const int __M)
- {
- return (__m256i) __builtin_ia32_pblendw256 ((__v16hi)__X,
- (__v16hi)__Y,
- __M);
- }
- #else
- #define _mm256_blend_epi16(X, Y, M) \
- ((__m256i) __builtin_ia32_pblendw256 ((__v16hi)(__m256i)(X), \
- (__v16hi)(__m256i)(Y), (int)(M)))
- #endif
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cmpeq_epi8 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v32qi)__A == (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cmpeq_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v16hi)__A == (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cmpeq_epi32 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v8si)__A == (__v8si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cmpeq_epi64 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v4di)__A == (__v4di)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cmpgt_epi8 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v32qs)__A > (__v32qs)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cmpgt_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v16hi)__A > (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cmpgt_epi32 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v8si)__A > (__v8si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cmpgt_epi64 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v4di)__A > (__v4di)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_hadd_epi16 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_phaddw256 ((__v16hi)__X,
- (__v16hi)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_hadd_epi32 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_phaddd256 ((__v8si)__X, (__v8si)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_hadds_epi16 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_phaddsw256 ((__v16hi)__X,
- (__v16hi)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_hsub_epi16 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_phsubw256 ((__v16hi)__X,
- (__v16hi)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_hsub_epi32 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_phsubd256 ((__v8si)__X, (__v8si)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_hsubs_epi16 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_phsubsw256 ((__v16hi)__X,
- (__v16hi)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_maddubs_epi16 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_pmaddubsw256 ((__v32qi)__X,
- (__v32qi)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_madd_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pmaddwd256 ((__v16hi)__A,
- (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_max_epi8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pmaxsb256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_max_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_max_epi32 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pmaxsd256 ((__v8si)__A, (__v8si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_max_epu8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pmaxub256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_max_epu16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_max_epu32 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pmaxud256 ((__v8si)__A, (__v8si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_min_epi8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pminsb256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_min_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_min_epi32 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pminsd256 ((__v8si)__A, (__v8si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_min_epu8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pminub256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_min_epu16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_min_epu32 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pminud256 ((__v8si)__A, (__v8si)__B);
- }
- extern __inline int
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_movemask_epi8 (__m256i __A)
- {
- return __builtin_ia32_pmovmskb256 ((__v32qi)__A);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepi8_epi16 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovsxbw256 ((__v16qi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepi8_epi32 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovsxbd256 ((__v16qi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepi8_epi64 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovsxbq256 ((__v16qi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepi16_epi32 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovsxwd256 ((__v8hi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepi16_epi64 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovsxwq256 ((__v8hi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepi32_epi64 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovsxdq256 ((__v4si)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepu8_epi16 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovzxbw256 ((__v16qi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepu8_epi32 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovzxbd256 ((__v16qi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepu8_epi64 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovzxbq256 ((__v16qi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepu16_epi32 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovzxwd256 ((__v8hi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepu16_epi64 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovzxwq256 ((__v8hi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_cvtepu32_epi64 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pmovzxdq256 ((__v4si)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mul_epi32 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_pmuldq256 ((__v8si)__X, (__v8si)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mulhrs_epi16 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_pmulhrsw256 ((__v16hi)__X,
- (__v16hi)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mulhi_epu16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pmulhuw256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mulhi_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pmulhw256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mullo_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v16hu)__A * (__v16hu)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mullo_epi32 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v8su)__A * (__v8su)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mul_epu32 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_or_si256 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v4du)__A | (__v4du)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sad_epu8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_shuffle_epi8 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_pshufb256 ((__v32qi)__X,
- (__v32qi)__Y);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_shuffle_epi32 (__m256i __A, const int __mask)
- {
- return (__m256i)__builtin_ia32_pshufd256 ((__v8si)__A, __mask);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_shufflehi_epi16 (__m256i __A, const int __mask)
- {
- return (__m256i)__builtin_ia32_pshufhw256 ((__v16hi)__A, __mask);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_shufflelo_epi16 (__m256i __A, const int __mask)
- {
- return (__m256i)__builtin_ia32_pshuflw256 ((__v16hi)__A, __mask);
- }
- #else
- #define _mm256_shuffle_epi32(A, N) \
- ((__m256i)__builtin_ia32_pshufd256 ((__v8si)(__m256i)(A), (int)(N)))
- #define _mm256_shufflehi_epi16(A, N) \
- ((__m256i)__builtin_ia32_pshufhw256 ((__v16hi)(__m256i)(A), (int)(N)))
- #define _mm256_shufflelo_epi16(A, N) \
- ((__m256i)__builtin_ia32_pshuflw256 ((__v16hi)(__m256i)(A), (int)(N)))
- #endif
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sign_epi8 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_psignb256 ((__v32qi)__X, (__v32qi)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sign_epi16 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_psignw256 ((__v16hi)__X, (__v16hi)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sign_epi32 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_psignd256 ((__v8si)__X, (__v8si)__Y);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_bslli_epi128 (__m256i __A, const int __N)
- {
- return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_slli_si256 (__m256i __A, const int __N)
- {
- return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
- }
- #else
- #define _mm256_bslli_epi128(A, N) \
- ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
- #define _mm256_slli_si256(A, N) \
- ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
- #endif
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_slli_epi16 (__m256i __A, int __B)
- {
- return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sll_epi16 (__m256i __A, __m128i __B)
- {
- return (__m256i)__builtin_ia32_psllw256((__v16hi)__A, (__v8hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_slli_epi32 (__m256i __A, int __B)
- {
- return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sll_epi32 (__m256i __A, __m128i __B)
- {
- return (__m256i)__builtin_ia32_pslld256((__v8si)__A, (__v4si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_slli_epi64 (__m256i __A, int __B)
- {
- return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sll_epi64 (__m256i __A, __m128i __B)
- {
- return (__m256i)__builtin_ia32_psllq256((__v4di)__A, (__v2di)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srai_epi16 (__m256i __A, int __B)
- {
- return (__m256i)__builtin_ia32_psrawi256 ((__v16hi)__A, __B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sra_epi16 (__m256i __A, __m128i __B)
- {
- return (__m256i)__builtin_ia32_psraw256 ((__v16hi)__A, (__v8hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srai_epi32 (__m256i __A, int __B)
- {
- return (__m256i)__builtin_ia32_psradi256 ((__v8si)__A, __B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sra_epi32 (__m256i __A, __m128i __B)
- {
- return (__m256i)__builtin_ia32_psrad256 ((__v8si)__A, (__v4si)__B);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_bsrli_epi128 (__m256i __A, const int __N)
- {
- return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srli_si256 (__m256i __A, const int __N)
- {
- return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
- }
- #else
- #define _mm256_bsrli_epi128(A, N) \
- ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
- #define _mm256_srli_si256(A, N) \
- ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
- #endif
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srli_epi16 (__m256i __A, int __B)
- {
- return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srl_epi16 (__m256i __A, __m128i __B)
- {
- return (__m256i)__builtin_ia32_psrlw256((__v16hi)__A, (__v8hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srli_epi32 (__m256i __A, int __B)
- {
- return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srl_epi32 (__m256i __A, __m128i __B)
- {
- return (__m256i)__builtin_ia32_psrld256((__v8si)__A, (__v4si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srli_epi64 (__m256i __A, int __B)
- {
- return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srl_epi64 (__m256i __A, __m128i __B)
- {
- return (__m256i)__builtin_ia32_psrlq256((__v4di)__A, (__v2di)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sub_epi8 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v32qu)__A - (__v32qu)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sub_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v16hu)__A - (__v16hu)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sub_epi32 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v8su)__A - (__v8su)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sub_epi64 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v4du)__A - (__v4du)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_subs_epi8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_psubsb256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_subs_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_psubsw256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_subs_epu8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_psubusb256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_subs_epu16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_psubusw256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_unpackhi_epi8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_punpckhbw256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_unpackhi_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_punpckhwd256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_unpackhi_epi32 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_punpckhdq256 ((__v8si)__A, (__v8si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_unpackhi_epi64 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_punpckhqdq256 ((__v4di)__A, (__v4di)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_unpacklo_epi8 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_punpcklbw256 ((__v32qi)__A, (__v32qi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_unpacklo_epi16 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_punpcklwd256 ((__v16hi)__A, (__v16hi)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_unpacklo_epi32 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_punpckldq256 ((__v8si)__A, (__v8si)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_unpacklo_epi64 (__m256i __A, __m256i __B)
- {
- return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_xor_si256 (__m256i __A, __m256i __B)
- {
- return (__m256i) ((__v4du)__A ^ (__v4du)__B);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_stream_load_si256 (__m256i const *__X)
- {
- return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X);
- }
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_broadcastss_ps (__m128 __X)
- {
- return (__m128) __builtin_ia32_vbroadcastss_ps ((__v4sf)__X);
- }
- extern __inline __m256
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_broadcastss_ps (__m128 __X)
- {
- return (__m256) __builtin_ia32_vbroadcastss_ps256 ((__v4sf)__X);
- }
- extern __inline __m256d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_broadcastsd_pd (__m128d __X)
- {
- return (__m256d) __builtin_ia32_vbroadcastsd_pd256 ((__v2df)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_broadcastsi128_si256 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_blend_epi32 (__m128i __X, __m128i __Y, const int __M)
- {
- return (__m128i) __builtin_ia32_pblendd128 ((__v4si)__X,
- (__v4si)__Y,
- __M);
- }
- #else
- #define _mm_blend_epi32(X, Y, M) \
- ((__m128i) __builtin_ia32_pblendd128 ((__v4si)(__m128i)(X), \
- (__v4si)(__m128i)(Y), (int)(M)))
- #endif
- #ifdef __OPTIMIZE__
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_blend_epi32 (__m256i __X, __m256i __Y, const int __M)
- {
- return (__m256i) __builtin_ia32_pblendd256 ((__v8si)__X,
- (__v8si)__Y,
- __M);
- }
- #else
- #define _mm256_blend_epi32(X, Y, M) \
- ((__m256i) __builtin_ia32_pblendd256 ((__v8si)(__m256i)(X), \
- (__v8si)(__m256i)(Y), (int)(M)))
- #endif
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_broadcastb_epi8 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pbroadcastb256 ((__v16qi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_broadcastw_epi16 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pbroadcastw256 ((__v8hi)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_broadcastd_epi32 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pbroadcastd256 ((__v4si)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_broadcastq_epi64 (__m128i __X)
- {
- return (__m256i) __builtin_ia32_pbroadcastq256 ((__v2di)__X);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_broadcastb_epi8 (__m128i __X)
- {
- return (__m128i) __builtin_ia32_pbroadcastb128 ((__v16qi)__X);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_broadcastw_epi16 (__m128i __X)
- {
- return (__m128i) __builtin_ia32_pbroadcastw128 ((__v8hi)__X);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_broadcastd_epi32 (__m128i __X)
- {
- return (__m128i) __builtin_ia32_pbroadcastd128 ((__v4si)__X);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_broadcastq_epi64 (__m128i __X)
- {
- return (__m128i) __builtin_ia32_pbroadcastq128 ((__v2di)__X);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_permutevar8x32_epi32 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_permvarsi256 ((__v8si)__X, (__v8si)__Y);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m256d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_permute4x64_pd (__m256d __X, const int __M)
- {
- return (__m256d) __builtin_ia32_permdf256 ((__v4df)__X, __M);
- }
- #else
- #define _mm256_permute4x64_pd(X, M) \
- ((__m256d) __builtin_ia32_permdf256 ((__v4df)(__m256d)(X), (int)(M)))
- #endif
- extern __inline __m256
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_permutevar8x32_ps (__m256 __X, __m256i __Y)
- {
- return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_permute4x64_epi64 (__m256i __X, const int __M)
- {
- return (__m256i) __builtin_ia32_permdi256 ((__v4di)__X, __M);
- }
- #else
- #define _mm256_permute4x64_epi64(X, M) \
- ((__m256i) __builtin_ia32_permdi256 ((__v4di)(__m256i)(X), (int)(M)))
- #endif
- #ifdef __OPTIMIZE__
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_permute2x128_si256 (__m256i __X, __m256i __Y, const int __M)
- {
- return (__m256i) __builtin_ia32_permti256 ((__v4di)__X, (__v4di)__Y, __M);
- }
- #else
- #define _mm256_permute2x128_si256(X, Y, M) \
- ((__m256i) __builtin_ia32_permti256 ((__v4di)(__m256i)(X), (__v4di)(__m256i)(Y), (int)(M)))
- #endif
- #ifdef __OPTIMIZE__
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_extracti128_si256 (__m256i __X, const int __M)
- {
- return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
- }
- #else
- #define _mm256_extracti128_si256(X, M) \
- ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
- #endif
- #ifdef __OPTIMIZE__
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_inserti128_si256 (__m256i __X, __m128i __Y, const int __M)
- {
- return (__m256i) __builtin_ia32_insert128i256 ((__v4di)__X, (__v2di)__Y, __M);
- }
- #else
- #define _mm256_inserti128_si256(X, Y, M) \
- ((__m256i) __builtin_ia32_insert128i256 ((__v4di)(__m256i)(X), \
- (__v2di)(__m128i)(Y), \
- (int)(M)))
- #endif
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_maskload_epi32 (int const *__X, __m256i __M )
- {
- return (__m256i) __builtin_ia32_maskloadd256 ((const __v8si *)__X,
- (__v8si)__M);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_maskload_epi64 (long long const *__X, __m256i __M )
- {
- return (__m256i) __builtin_ia32_maskloadq256 ((const __v4di *)__X,
- (__v4di)__M);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskload_epi32 (int const *__X, __m128i __M )
- {
- return (__m128i) __builtin_ia32_maskloadd ((const __v4si *)__X,
- (__v4si)__M);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskload_epi64 (long long const *__X, __m128i __M )
- {
- return (__m128i) __builtin_ia32_maskloadq ((const __v2di *)__X,
- (__v2di)__M);
- }
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_maskstore_epi32 (int *__X, __m256i __M, __m256i __Y )
- {
- __builtin_ia32_maskstored256 ((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
- }
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_maskstore_epi64 (long long *__X, __m256i __M, __m256i __Y )
- {
- __builtin_ia32_maskstoreq256 ((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
- }
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskstore_epi32 (int *__X, __m128i __M, __m128i __Y )
- {
- __builtin_ia32_maskstored ((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
- }
- extern __inline void
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_maskstore_epi64 (long long *__X, __m128i __M, __m128i __Y )
- {
- __builtin_ia32_maskstoreq (( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sllv_epi32 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_psllv8si ((__v8si)__X, (__v8si)__Y);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sllv_epi32 (__m128i __X, __m128i __Y)
- {
- return (__m128i) __builtin_ia32_psllv4si ((__v4si)__X, (__v4si)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_sllv_epi64 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_psllv4di ((__v4di)__X, (__v4di)__Y);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sllv_epi64 (__m128i __X, __m128i __Y)
- {
- return (__m128i) __builtin_ia32_psllv2di ((__v2di)__X, (__v2di)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srav_epi32 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_psrav8si ((__v8si)__X, (__v8si)__Y);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_srav_epi32 (__m128i __X, __m128i __Y)
- {
- return (__m128i) __builtin_ia32_psrav4si ((__v4si)__X, (__v4si)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srlv_epi32 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_psrlv8si ((__v8si)__X, (__v8si)__Y);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_srlv_epi32 (__m128i __X, __m128i __Y)
- {
- return (__m128i) __builtin_ia32_psrlv4si ((__v4si)__X, (__v4si)__Y);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_srlv_epi64 (__m256i __X, __m256i __Y)
- {
- return (__m256i) __builtin_ia32_psrlv4di ((__v4di)__X, (__v4di)__Y);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_srlv_epi64 (__m128i __X, __m128i __Y)
- {
- return (__m128i) __builtin_ia32_psrlv2di ((__v2di)__X, (__v2di)__Y);
- }
- #ifdef __OPTIMIZE__
- extern __inline __m128d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
- {
- __v2df __zero = _mm_setzero_pd ();
- __v2df __mask = _mm_cmpeq_pd (__zero, __zero);
- return (__m128d) __builtin_ia32_gathersiv2df (_mm_undefined_pd (),
- __base,
- (__v4si)__index,
- __mask,
- __scale);
- }
- extern __inline __m128d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_i32gather_pd (__m128d __src, double const *__base, __m128i __index,
- __m128d __mask, const int __scale)
- {
- return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)__src,
- __base,
- (__v4si)__index,
- (__v2df)__mask,
- __scale);
- }
- extern __inline __m256d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_i32gather_pd (double const *__base, __m128i __index, const int __scale)
- {
- __v4df __zero = _mm256_setzero_pd ();
- __v4df __mask = _mm256_cmp_pd (__zero, __zero, _CMP_EQ_OQ);
- return (__m256d) __builtin_ia32_gathersiv4df (_mm256_undefined_pd (),
- __base,
- (__v4si)__index,
- __mask,
- __scale);
- }
- extern __inline __m256d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mask_i32gather_pd (__m256d __src, double const *__base,
- __m128i __index, __m256d __mask, const int __scale)
- {
- return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)__src,
- __base,
- (__v4si)__index,
- (__v4df)__mask,
- __scale);
- }
- extern __inline __m128d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_i64gather_pd (double const *__base, __m128i __index, const int __scale)
- {
- __v2df __src = _mm_setzero_pd ();
- __v2df __mask = _mm_cmpeq_pd (__src, __src);
- return (__m128d) __builtin_ia32_gatherdiv2df (__src,
- __base,
- (__v2di)__index,
- __mask,
- __scale);
- }
- extern __inline __m128d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_i64gather_pd (__m128d __src, double const *__base, __m128i __index,
- __m128d __mask, const int __scale)
- {
- return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)__src,
- __base,
- (__v2di)__index,
- (__v2df)__mask,
- __scale);
- }
- extern __inline __m256d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_i64gather_pd (double const *__base, __m256i __index, const int __scale)
- {
- __v4df __src = _mm256_setzero_pd ();
- __v4df __mask = _mm256_cmp_pd (__src, __src, _CMP_EQ_OQ);
- return (__m256d) __builtin_ia32_gatherdiv4df (__src,
- __base,
- (__v4di)__index,
- __mask,
- __scale);
- }
- extern __inline __m256d
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mask_i64gather_pd (__m256d __src, double const *__base,
- __m256i __index, __m256d __mask, const int __scale)
- {
- return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)__src,
- __base,
- (__v4di)__index,
- (__v4df)__mask,
- __scale);
- }
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_i32gather_ps (float const *__base, __m128i __index, const int __scale)
- {
- __v4sf __src = _mm_setzero_ps ();
- __v4sf __mask = _mm_cmpeq_ps (__src, __src);
- return (__m128) __builtin_ia32_gathersiv4sf (__src,
- __base,
- (__v4si)__index,
- __mask,
- __scale);
- }
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_i32gather_ps (__m128 __src, float const *__base, __m128i __index,
- __m128 __mask, const int __scale)
- {
- return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)__src,
- __base,
- (__v4si)__index,
- (__v4sf)__mask,
- __scale);
- }
- extern __inline __m256
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_i32gather_ps (float const *__base, __m256i __index, const int __scale)
- {
- __v8sf __src = _mm256_setzero_ps ();
- __v8sf __mask = _mm256_cmp_ps (__src, __src, _CMP_EQ_OQ);
- return (__m256) __builtin_ia32_gathersiv8sf (__src,
- __base,
- (__v8si)__index,
- __mask,
- __scale);
- }
- extern __inline __m256
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mask_i32gather_ps (__m256 __src, float const *__base,
- __m256i __index, __m256 __mask, const int __scale)
- {
- return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)__src,
- __base,
- (__v8si)__index,
- (__v8sf)__mask,
- __scale);
- }
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_i64gather_ps (float const *__base, __m128i __index, const int __scale)
- {
- __v4sf __src = _mm_setzero_ps ();
- __v4sf __mask = _mm_cmpeq_ps (__src, __src);
- return (__m128) __builtin_ia32_gatherdiv4sf (__src,
- __base,
- (__v2di)__index,
- __mask,
- __scale);
- }
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_i64gather_ps (__m128 __src, float const *__base, __m128i __index,
- __m128 __mask, const int __scale)
- {
- return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)__src,
- __base,
- (__v2di)__index,
- (__v4sf)__mask,
- __scale);
- }
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_i64gather_ps (float const *__base, __m256i __index, const int __scale)
- {
- __v4sf __src = _mm_setzero_ps ();
- __v4sf __mask = _mm_cmpeq_ps (__src, __src);
- return (__m128) __builtin_ia32_gatherdiv4sf256 (__src,
- __base,
- (__v4di)__index,
- __mask,
- __scale);
- }
- extern __inline __m128
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mask_i64gather_ps (__m128 __src, float const *__base,
- __m256i __index, __m128 __mask, const int __scale)
- {
- return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)__src,
- __base,
- (__v4di)__index,
- (__v4sf)__mask,
- __scale);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_i32gather_epi64 (long long int const *__base,
- __m128i __index, const int __scale)
- {
- __v2di __src = __extension__ (__v2di){ 0, 0 };
- __v2di __mask = __extension__ (__v2di){ ~0, ~0 };
- return (__m128i) __builtin_ia32_gathersiv2di (__src,
- __base,
- (__v4si)__index,
- __mask,
- __scale);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_i32gather_epi64 (__m128i __src, long long int const *__base,
- __m128i __index, __m128i __mask, const int __scale)
- {
- return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)__src,
- __base,
- (__v4si)__index,
- (__v2di)__mask,
- __scale);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_i32gather_epi64 (long long int const *__base,
- __m128i __index, const int __scale)
- {
- __v4di __src = __extension__ (__v4di){ 0, 0, 0, 0 };
- __v4di __mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
- return (__m256i) __builtin_ia32_gathersiv4di (__src,
- __base,
- (__v4si)__index,
- __mask,
- __scale);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mask_i32gather_epi64 (__m256i __src, long long int const *__base,
- __m128i __index, __m256i __mask,
- const int __scale)
- {
- return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)__src,
- __base,
- (__v4si)__index,
- (__v4di)__mask,
- __scale);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_i64gather_epi64 (long long int const *__base,
- __m128i __index, const int __scale)
- {
- __v2di __src = __extension__ (__v2di){ 0, 0 };
- __v2di __mask = __extension__ (__v2di){ ~0, ~0 };
- return (__m128i) __builtin_ia32_gatherdiv2di (__src,
- __base,
- (__v2di)__index,
- __mask,
- __scale);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_i64gather_epi64 (__m128i __src, long long int const *__base,
- __m128i __index, __m128i __mask, const int __scale)
- {
- return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)__src,
- __base,
- (__v2di)__index,
- (__v2di)__mask,
- __scale);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_i64gather_epi64 (long long int const *__base,
- __m256i __index, const int __scale)
- {
- __v4di __src = __extension__ (__v4di){ 0, 0, 0, 0 };
- __v4di __mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
- return (__m256i) __builtin_ia32_gatherdiv4di (__src,
- __base,
- (__v4di)__index,
- __mask,
- __scale);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mask_i64gather_epi64 (__m256i __src, long long int const *__base,
- __m256i __index, __m256i __mask,
- const int __scale)
- {
- return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)__src,
- __base,
- (__v4di)__index,
- (__v4di)__mask,
- __scale);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_i32gather_epi32 (int const *__base, __m128i __index, const int __scale)
- {
- __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
- __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
- return (__m128i) __builtin_ia32_gathersiv4si (__src,
- __base,
- (__v4si)__index,
- __mask,
- __scale);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_i32gather_epi32 (__m128i __src, int const *__base, __m128i __index,
- __m128i __mask, const int __scale)
- {
- return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)__src,
- __base,
- (__v4si)__index,
- (__v4si)__mask,
- __scale);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_i32gather_epi32 (int const *__base, __m256i __index, const int __scale)
- {
- __v8si __src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
- __v8si __mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
- return (__m256i) __builtin_ia32_gathersiv8si (__src,
- __base,
- (__v8si)__index,
- __mask,
- __scale);
- }
- extern __inline __m256i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mask_i32gather_epi32 (__m256i __src, int const *__base,
- __m256i __index, __m256i __mask,
- const int __scale)
- {
- return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)__src,
- __base,
- (__v8si)__index,
- (__v8si)__mask,
- __scale);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_i64gather_epi32 (int const *__base, __m128i __index, const int __scale)
- {
- __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
- __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
- return (__m128i) __builtin_ia32_gatherdiv4si (__src,
- __base,
- (__v2di)__index,
- __mask,
- __scale);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mask_i64gather_epi32 (__m128i __src, int const *__base, __m128i __index,
- __m128i __mask, const int __scale)
- {
- return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)__src,
- __base,
- (__v2di)__index,
- (__v4si)__mask,
- __scale);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_i64gather_epi32 (int const *__base, __m256i __index, const int __scale)
- {
- __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
- __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
- return (__m128i) __builtin_ia32_gatherdiv4si256 (__src,
- __base,
- (__v4di)__index,
- __mask,
- __scale);
- }
- extern __inline __m128i
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
- _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base,
- __m256i __index, __m128i __mask,
- const int __scale)
- {
- return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)__src,
- __base,
- (__v4di)__index,
- (__v4si)__mask,
- __scale);
- }
- #else /* __OPTIMIZE__ */
- #define _mm_i32gather_pd(BASE, INDEX, SCALE) \
- (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (), \
- (double const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- (__v2df)_mm_set1_pd( \
- (double)(long long int) -1), \
- (int)SCALE)
- #define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
- (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC, \
- (double const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- (__v2df)(__m128d)MASK, \
- (int)SCALE)
- #define _mm256_i32gather_pd(BASE, INDEX, SCALE) \
- (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (), \
- (double const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- (__v4df)_mm256_set1_pd( \
- (double)(long long int) -1), \
- (int)SCALE)
- #define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
- (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC, \
- (double const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- (__v4df)(__m256d)MASK, \
- (int)SCALE)
- #define _mm_i64gather_pd(BASE, INDEX, SCALE) \
- (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (), \
- (double const *)BASE, \
- (__v2di)(__m128i)INDEX, \
- (__v2df)_mm_set1_pd( \
- (double)(long long int) -1), \
- (int)SCALE)
- #define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
- (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC, \
- (double const *)BASE, \
- (__v2di)(__m128i)INDEX, \
- (__v2df)(__m128d)MASK, \
- (int)SCALE)
- #define _mm256_i64gather_pd(BASE, INDEX, SCALE) \
- (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (), \
- (double const *)BASE, \
- (__v4di)(__m256i)INDEX, \
- (__v4df)_mm256_set1_pd( \
- (double)(long long int) -1), \
- (int)SCALE)
- #define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
- (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC, \
- (double const *)BASE, \
- (__v4di)(__m256i)INDEX, \
- (__v4df)(__m256d)MASK, \
- (int)SCALE)
- #define _mm_i32gather_ps(BASE, INDEX, SCALE) \
- (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (), \
- (float const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- _mm_set1_ps ((float)(int) -1), \
- (int)SCALE)
- #define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
- (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128)SRC, \
- (float const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- (__v4sf)(__m128)MASK, \
- (int)SCALE)
- #define _mm256_i32gather_ps(BASE, INDEX, SCALE) \
- (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
- (float const *)BASE, \
- (__v8si)(__m256i)INDEX, \
- (__v8sf)_mm256_set1_ps ( \
- (float)(int) -1), \
- (int)SCALE)
- #define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
- (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC, \
- (float const *)BASE, \
- (__v8si)(__m256i)INDEX, \
- (__v8sf)(__m256)MASK, \
- (int)SCALE)
- #define _mm_i64gather_ps(BASE, INDEX, SCALE) \
- (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (), \
- (float const *)BASE, \
- (__v2di)(__m128i)INDEX, \
- (__v4sf)_mm_set1_ps ( \
- (float)(int) -1), \
- (int)SCALE)
- #define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
- (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC, \
- (float const *)BASE, \
- (__v2di)(__m128i)INDEX, \
- (__v4sf)(__m128)MASK, \
- (int)SCALE)
- #define _mm256_i64gather_ps(BASE, INDEX, SCALE) \
- (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (), \
- (float const *)BASE, \
- (__v4di)(__m256i)INDEX, \
- (__v4sf)_mm_set1_ps( \
- (float)(int) -1), \
- (int)SCALE)
- #define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
- (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC, \
- (float const *)BASE, \
- (__v4di)(__m256i)INDEX, \
- (__v4sf)(__m128)MASK, \
- (int)SCALE)
- #define _mm_i32gather_epi64(BASE, INDEX, SCALE) \
- (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
- (long long const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- (__v2di)_mm_set1_epi64x (-1), \
- (int)SCALE)
- #define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
- (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC, \
- (long long const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- (__v2di)(__m128i)MASK, \
- (int)SCALE)
- #define _mm256_i32gather_epi64(BASE, INDEX, SCALE) \
- (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
- (long long const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- (__v4di)_mm256_set1_epi64x (-1), \
- (int)SCALE)
- #define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
- (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC, \
- (long long const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- (__v4di)(__m256i)MASK, \
- (int)SCALE)
- #define _mm_i64gather_epi64(BASE, INDEX, SCALE) \
- (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
- (long long const *)BASE, \
- (__v2di)(__m128i)INDEX, \
- (__v2di)_mm_set1_epi64x (-1), \
- (int)SCALE)
- #define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
- (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC, \
- (long long const *)BASE, \
- (__v2di)(__m128i)INDEX, \
- (__v2di)(__m128i)MASK, \
- (int)SCALE)
- #define _mm256_i64gather_epi64(BASE, INDEX, SCALE) \
- (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
- (long long const *)BASE, \
- (__v4di)(__m256i)INDEX, \
- (__v4di)_mm256_set1_epi64x (-1), \
- (int)SCALE)
- #define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
- (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC, \
- (long long const *)BASE, \
- (__v4di)(__m256i)INDEX, \
- (__v4di)(__m256i)MASK, \
- (int)SCALE)
- #define _mm_i32gather_epi32(BASE, INDEX, SCALE) \
- (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (), \
- (int const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- (__v4si)_mm_set1_epi32 (-1), \
- (int)SCALE)
- #define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
- (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC, \
- (int const *)BASE, \
- (__v4si)(__m128i)INDEX, \
- (__v4si)(__m128i)MASK, \
- (int)SCALE)
- #define _mm256_i32gather_epi32(BASE, INDEX, SCALE) \
- (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
- (int const *)BASE, \
- (__v8si)(__m256i)INDEX, \
- (__v8si)_mm256_set1_epi32 (-1), \
- (int)SCALE)
- #define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
- (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC, \
- (int const *)BASE, \
- (__v8si)(__m256i)INDEX, \
- (__v8si)(__m256i)MASK, \
- (int)SCALE)
- #define _mm_i64gather_epi32(BASE, INDEX, SCALE) \
- (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (), \
- (int const *)BASE, \
- (__v2di)(__m128i)INDEX, \
- (__v4si)_mm_set1_epi32 (-1), \
- (int)SCALE)
- #define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
- (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC, \
- (int const *)BASE, \
- (__v2di)(__m128i)INDEX, \
- (__v4si)(__m128i)MASK, \
- (int)SCALE)
- #define _mm256_i64gather_epi32(BASE, INDEX, SCALE) \
- (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
- (int const *)BASE, \
- (__v4di)(__m256i)INDEX, \
- (__v4si)_mm_set1_epi32(-1), \
- (int)SCALE)
- #define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
- (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC, \
- (int const *)BASE, \
- (__v4di)(__m256i)INDEX, \
- (__v4si)(__m128i)MASK, \
- (int)SCALE)
- #endif /* __OPTIMIZE__ */
- #ifdef __DISABLE_AVX2__
- #undef __DISABLE_AVX2__
- #pragma GCC pop_options
- #endif /* __DISABLE_AVX2__ */
- #endif /* _AVX2INTRIN_H_INCLUDED */
|