123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949 |
- /* Copyright (C) 2002-2019 Free Software Foundation, Inc.
- This file is part of GCC.
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
- /* Implemented from the specification included in the Intel C++ Compiler
- User Guide and Reference, version 9.0. */
- #ifndef _MMINTRIN_H_INCLUDED
- #define _MMINTRIN_H_INCLUDED
- #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
- #pragma GCC push_options
- #ifdef __x86_64__
- #pragma GCC target("sse,mmx")
- #else
- #pragma GCC target("mmx")
- #endif
- #define __DISABLE_MMX__
- #endif /* __MMX__ */
- /* The Intel API is flexible enough that we must allow aliasing with other
- vector types, and their scalar components. */
- typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
- /* Unaligned version of the same type */
- typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
- /* Internal data types for implementing the intrinsics. */
- typedef int __v2si __attribute__ ((__vector_size__ (8)));
- typedef short __v4hi __attribute__ ((__vector_size__ (8)));
- typedef char __v8qi __attribute__ ((__vector_size__ (8)));
- typedef long long __v1di __attribute__ ((__vector_size__ (8)));
- typedef float __v2sf __attribute__ ((__vector_size__ (8)));
- /* Empty the multimedia state. */
- extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_empty (void)
- {
- __builtin_ia32_emms ();
- }
- extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_empty (void)
- {
- _mm_empty ();
- }
- /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsi32_si64 (int __i)
- {
- return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_from_int (int __i)
- {
- return _mm_cvtsi32_si64 (__i);
- }
- #ifdef __x86_64__
- /* Convert I to a __m64 object. */
- /* Intel intrinsic. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_from_int64 (long long __i)
- {
- return (__m64) __i;
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsi64_m64 (long long __i)
- {
- return (__m64) __i;
- }
- /* Microsoft intrinsic. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsi64x_si64 (long long __i)
- {
- return (__m64) __i;
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_set_pi64x (long long __i)
- {
- return (__m64) __i;
- }
- #endif
- /* Convert the lower 32 bits of the __m64 object into an integer. */
- extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsi64_si32 (__m64 __i)
- {
- return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
- }
- extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_to_int (__m64 __i)
- {
- return _mm_cvtsi64_si32 (__i);
- }
- #ifdef __x86_64__
- /* Convert the __m64 object to a 64bit integer. */
- /* Intel intrinsic. */
- extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_to_int64 (__m64 __i)
- {
- return (long long)__i;
- }
- extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtm64_si64 (__m64 __i)
- {
- return (long long)__i;
- }
- /* Microsoft intrinsic. */
- extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cvtsi64_si64x (__m64 __i)
- {
- return (long long)__i;
- }
- #endif
- /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
- the result, and the four 16-bit values from M2 into the upper four 8-bit
- values of the result, all with signed saturation. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_packs_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_packsswb (__m64 __m1, __m64 __m2)
- {
- return _mm_packs_pi16 (__m1, __m2);
- }
- /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
- the result, and the two 32-bit values from M2 into the upper two 16-bit
- values of the result, all with signed saturation. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_packs_pi32 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_packssdw (__m64 __m1, __m64 __m2)
- {
- return _mm_packs_pi32 (__m1, __m2);
- }
- /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
- the result, and the four 16-bit values from M2 into the upper four 8-bit
- values of the result, all with unsigned saturation. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_packs_pu16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_packuswb (__m64 __m1, __m64 __m2)
- {
- return _mm_packs_pu16 (__m1, __m2);
- }
- /* Interleave the four 8-bit values from the high half of M1 with the four
- 8-bit values from the high half of M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_punpckhbw (__m64 __m1, __m64 __m2)
- {
- return _mm_unpackhi_pi8 (__m1, __m2);
- }
- /* Interleave the two 16-bit values from the high half of M1 with the two
- 16-bit values from the high half of M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_punpckhwd (__m64 __m1, __m64 __m2)
- {
- return _mm_unpackhi_pi16 (__m1, __m2);
- }
- /* Interleave the 32-bit value from the high half of M1 with the 32-bit
- value from the high half of M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_punpckhdq (__m64 __m1, __m64 __m2)
- {
- return _mm_unpackhi_pi32 (__m1, __m2);
- }
- /* Interleave the four 8-bit values from the low half of M1 with the four
- 8-bit values from the low half of M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_punpcklbw (__m64 __m1, __m64 __m2)
- {
- return _mm_unpacklo_pi8 (__m1, __m2);
- }
- /* Interleave the two 16-bit values from the low half of M1 with the two
- 16-bit values from the low half of M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_punpcklwd (__m64 __m1, __m64 __m2)
- {
- return _mm_unpacklo_pi16 (__m1, __m2);
- }
- /* Interleave the 32-bit value from the low half of M1 with the 32-bit
- value from the low half of M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_punpckldq (__m64 __m1, __m64 __m2)
- {
- return _mm_unpacklo_pi32 (__m1, __m2);
- }
- /* Add the 8-bit values in M1 to the 8-bit values in M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_add_pi8 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_paddb (__m64 __m1, __m64 __m2)
- {
- return _mm_add_pi8 (__m1, __m2);
- }
- /* Add the 16-bit values in M1 to the 16-bit values in M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_add_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_paddw (__m64 __m1, __m64 __m2)
- {
- return _mm_add_pi16 (__m1, __m2);
- }
- /* Add the 32-bit values in M1 to the 32-bit values in M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_add_pi32 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_paddd (__m64 __m1, __m64 __m2)
- {
- return _mm_add_pi32 (__m1, __m2);
- }
- /* Add the 64-bit values in M1 to the 64-bit values in M2. */
- #ifndef __SSE2__
- #pragma GCC push_options
- #pragma GCC target("sse2,mmx")
- #define __DISABLE_SSE2__
- #endif /* __SSE2__ */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_add_si64 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
- }
- #ifdef __DISABLE_SSE2__
- #undef __DISABLE_SSE2__
- #pragma GCC pop_options
- #endif /* __DISABLE_SSE2__ */
- /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
- saturated arithmetic. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_adds_pi8 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_paddsb (__m64 __m1, __m64 __m2)
- {
- return _mm_adds_pi8 (__m1, __m2);
- }
- /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
- saturated arithmetic. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_adds_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_paddsw (__m64 __m1, __m64 __m2)
- {
- return _mm_adds_pi16 (__m1, __m2);
- }
- /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
- saturated arithmetic. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_adds_pu8 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_paddusb (__m64 __m1, __m64 __m2)
- {
- return _mm_adds_pu8 (__m1, __m2);
- }
- /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
- saturated arithmetic. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_adds_pu16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_paddusw (__m64 __m1, __m64 __m2)
- {
- return _mm_adds_pu16 (__m1, __m2);
- }
- /* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sub_pi8 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psubb (__m64 __m1, __m64 __m2)
- {
- return _mm_sub_pi8 (__m1, __m2);
- }
- /* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sub_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psubw (__m64 __m1, __m64 __m2)
- {
- return _mm_sub_pi16 (__m1, __m2);
- }
- /* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sub_pi32 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psubd (__m64 __m1, __m64 __m2)
- {
- return _mm_sub_pi32 (__m1, __m2);
- }
- /* Add the 64-bit values in M1 to the 64-bit values in M2. */
- #ifndef __SSE2__
- #pragma GCC push_options
- #pragma GCC target("sse2,mmx")
- #define __DISABLE_SSE2__
- #endif /* __SSE2__ */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sub_si64 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
- }
- #ifdef __DISABLE_SSE2__
- #undef __DISABLE_SSE2__
- #pragma GCC pop_options
- #endif /* __DISABLE_SSE2__ */
- /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
- saturating arithmetic. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_subs_pi8 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psubsb (__m64 __m1, __m64 __m2)
- {
- return _mm_subs_pi8 (__m1, __m2);
- }
- /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
- signed saturating arithmetic. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_subs_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psubsw (__m64 __m1, __m64 __m2)
- {
- return _mm_subs_pi16 (__m1, __m2);
- }
- /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
- unsigned saturating arithmetic. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_subs_pu8 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psubusb (__m64 __m1, __m64 __m2)
- {
- return _mm_subs_pu8 (__m1, __m2);
- }
- /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
- unsigned saturating arithmetic. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_subs_pu16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psubusw (__m64 __m1, __m64 __m2)
- {
- return _mm_subs_pu16 (__m1, __m2);
- }
- /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
- four 32-bit intermediate results, which are then summed by pairs to
- produce two 32-bit results. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_madd_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pmaddwd (__m64 __m1, __m64 __m2)
- {
- return _mm_madd_pi16 (__m1, __m2);
- }
- /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
- M2 and produce the high 16 bits of the 32-bit results. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pmulhw (__m64 __m1, __m64 __m2)
- {
- return _mm_mulhi_pi16 (__m1, __m2);
- }
- /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
- the low 16 bits of the results. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pmullw (__m64 __m1, __m64 __m2)
- {
- return _mm_mullo_pi16 (__m1, __m2);
- }
- /* Shift four 16-bit values in M left by COUNT. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sll_pi16 (__m64 __m, __m64 __count)
- {
- return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psllw (__m64 __m, __m64 __count)
- {
- return _mm_sll_pi16 (__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_slli_pi16 (__m64 __m, int __count)
- {
- return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psllwi (__m64 __m, int __count)
- {
- return _mm_slli_pi16 (__m, __count);
- }
- /* Shift two 32-bit values in M left by COUNT. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sll_pi32 (__m64 __m, __m64 __count)
- {
- return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pslld (__m64 __m, __m64 __count)
- {
- return _mm_sll_pi32 (__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_slli_pi32 (__m64 __m, int __count)
- {
- return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pslldi (__m64 __m, int __count)
- {
- return _mm_slli_pi32 (__m, __count);
- }
- /* Shift the 64-bit value in M left by COUNT. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sll_si64 (__m64 __m, __m64 __count)
- {
- return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psllq (__m64 __m, __m64 __count)
- {
- return _mm_sll_si64 (__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_slli_si64 (__m64 __m, int __count)
- {
- return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psllqi (__m64 __m, int __count)
- {
- return _mm_slli_si64 (__m, __count);
- }
- /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sra_pi16 (__m64 __m, __m64 __count)
- {
- return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psraw (__m64 __m, __m64 __count)
- {
- return _mm_sra_pi16 (__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_srai_pi16 (__m64 __m, int __count)
- {
- return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psrawi (__m64 __m, int __count)
- {
- return _mm_srai_pi16 (__m, __count);
- }
- /* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_sra_pi32 (__m64 __m, __m64 __count)
- {
- return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psrad (__m64 __m, __m64 __count)
- {
- return _mm_sra_pi32 (__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_srai_pi32 (__m64 __m, int __count)
- {
- return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psradi (__m64 __m, int __count)
- {
- return _mm_srai_pi32 (__m, __count);
- }
- /* Shift four 16-bit values in M right by COUNT; shift in zeros. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_srl_pi16 (__m64 __m, __m64 __count)
- {
- return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psrlw (__m64 __m, __m64 __count)
- {
- return _mm_srl_pi16 (__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_srli_pi16 (__m64 __m, int __count)
- {
- return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psrlwi (__m64 __m, int __count)
- {
- return _mm_srli_pi16 (__m, __count);
- }
- /* Shift two 32-bit values in M right by COUNT; shift in zeros. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_srl_pi32 (__m64 __m, __m64 __count)
- {
- return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psrld (__m64 __m, __m64 __count)
- {
- return _mm_srl_pi32 (__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_srli_pi32 (__m64 __m, int __count)
- {
- return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psrldi (__m64 __m, int __count)
- {
- return _mm_srli_pi32 (__m, __count);
- }
- /* Shift the 64-bit value in M left by COUNT; shift in zeros. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_srl_si64 (__m64 __m, __m64 __count)
- {
- return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psrlq (__m64 __m, __m64 __count)
- {
- return _mm_srl_si64 (__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_srli_si64 (__m64 __m, int __count)
- {
- return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_psrlqi (__m64 __m, int __count)
- {
- return _mm_srli_si64 (__m, __count);
- }
- /* Bit-wise AND the 64-bit values in M1 and M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_and_si64 (__m64 __m1, __m64 __m2)
- {
- return __builtin_ia32_pand (__m1, __m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pand (__m64 __m1, __m64 __m2)
- {
- return _mm_and_si64 (__m1, __m2);
- }
- /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
- 64-bit value in M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_andnot_si64 (__m64 __m1, __m64 __m2)
- {
- return __builtin_ia32_pandn (__m1, __m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pandn (__m64 __m1, __m64 __m2)
- {
- return _mm_andnot_si64 (__m1, __m2);
- }
- /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_or_si64 (__m64 __m1, __m64 __m2)
- {
- return __builtin_ia32_por (__m1, __m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_por (__m64 __m1, __m64 __m2)
- {
- return _mm_or_si64 (__m1, __m2);
- }
- /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_xor_si64 (__m64 __m1, __m64 __m2)
- {
- return __builtin_ia32_pxor (__m1, __m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pxor (__m64 __m1, __m64 __m2)
- {
- return _mm_xor_si64 (__m1, __m2);
- }
- /* Compare eight 8-bit values. The result of the comparison is 0xFF if the
- test is true and zero if false. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pcmpeqb (__m64 __m1, __m64 __m2)
- {
- return _mm_cmpeq_pi8 (__m1, __m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pcmpgtb (__m64 __m1, __m64 __m2)
- {
- return _mm_cmpgt_pi8 (__m1, __m2);
- }
- /* Compare four 16-bit values. The result of the comparison is 0xFFFF if
- the test is true and zero if false. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pcmpeqw (__m64 __m1, __m64 __m2)
- {
- return _mm_cmpeq_pi16 (__m1, __m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pcmpgtw (__m64 __m1, __m64 __m2)
- {
- return _mm_cmpgt_pi16 (__m1, __m2);
- }
- /* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
- the test is true and zero if false. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pcmpeqd (__m64 __m1, __m64 __m2)
- {
- return _mm_cmpeq_pi32 (__m1, __m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
- {
- return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _m_pcmpgtd (__m64 __m1, __m64 __m2)
- {
- return _mm_cmpgt_pi32 (__m1, __m2);
- }
- /* Creates a 64-bit zero. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_setzero_si64 (void)
- {
- return (__m64)0LL;
- }
- /* Creates a vector of two 32-bit values; I0 is least significant. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_set_pi32 (int __i1, int __i0)
- {
- return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
- }
- /* Creates a vector of four 16-bit values; W0 is least significant. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
- {
- return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
- }
- /* Creates a vector of eight 8-bit values; B0 is least significant. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
- char __b3, char __b2, char __b1, char __b0)
- {
- return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
- __b4, __b5, __b6, __b7);
- }
- /* Similar, but with the arguments in reverse order. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_setr_pi32 (int __i0, int __i1)
- {
- return _mm_set_pi32 (__i1, __i0);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
- {
- return _mm_set_pi16 (__w3, __w2, __w1, __w0);
- }
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
- char __b4, char __b5, char __b6, char __b7)
- {
- return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
- }
- /* Creates a vector of two 32-bit values, both elements containing I. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_set1_pi32 (int __i)
- {
- return _mm_set_pi32 (__i, __i);
- }
- /* Creates a vector of four 16-bit values, all elements containing W. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_set1_pi16 (short __w)
- {
- return _mm_set_pi16 (__w, __w, __w, __w);
- }
- /* Creates a vector of eight 8-bit values, all elements containing B. */
- extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_set1_pi8 (char __b)
- {
- return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
- }
- #ifdef __DISABLE_MMX__
- #undef __DISABLE_MMX__
- #pragma GCC pop_options
- #endif /* __DISABLE_MMX__ */
- #endif /* _MMINTRIN_H_INCLUDED */
|