gfniintrin.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. /* Copyright (C) 2017-2019 Free Software Foundation, Inc.
  2. This file is part of GCC.
  3. GCC is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. GCC is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. Under Section 7 of GPL version 3, you are granted additional
  12. permissions described in the GCC Runtime Library Exception, version
  13. 3.1, as published by the Free Software Foundation.
  14. You should have received a copy of the GNU General Public License and
  15. a copy of the GCC Runtime Library Exception along with this program;
  16. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  17. <http://www.gnu.org/licenses/>. */
  18. #ifndef _IMMINTRIN_H_INCLUDED
  19. #error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
  20. #endif
  21. #ifndef _GFNIINTRIN_H_INCLUDED
  22. #define _GFNIINTRIN_H_INCLUDED
  23. #if !defined(__GFNI__) || !defined(__SSE2__)
  24. #pragma GCC push_options
  25. #pragma GCC target("gfni,sse2")
  26. #define __DISABLE_GFNI__
  27. #endif /* __GFNI__ */
  28. extern __inline __m128i
  29. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  30. _mm_gf2p8mul_epi8 (__m128i __A, __m128i __B)
  31. {
  32. return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
  33. (__v16qi) __B);
  34. }
  35. #ifdef __OPTIMIZE__
  36. extern __inline __m128i
  37. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  38. _mm_gf2p8affineinv_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
  39. {
  40. return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A,
  41. (__v16qi) __B,
  42. __C);
  43. }
  44. extern __inline __m128i
  45. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  46. _mm_gf2p8affine_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
  47. {
  48. return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A,
  49. (__v16qi) __B, __C);
  50. }
  51. #else
  52. #define _mm_gf2p8affineinv_epi64_epi8(A, B, C) \
  53. ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
  54. (__v16qi)(__m128i)(B), (int)(C)))
  55. #define _mm_gf2p8affine_epi64_epi8(A, B, C) \
  56. ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi)(__m128i)(A), \
  57. (__v16qi)(__m128i)(B), (int)(C)))
  58. #endif
  59. #ifdef __DISABLE_GFNI__
  60. #undef __DISABLE_GFNI__
  61. #pragma GCC pop_options
  62. #endif /* __DISABLE_GFNI__ */
  63. #if !defined(__GFNI__) || !defined(__AVX__)
  64. #pragma GCC push_options
  65. #pragma GCC target("gfni,avx")
  66. #define __DISABLE_GFNIAVX__
  67. #endif /* __GFNIAVX__ */
  68. extern __inline __m256i
  69. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  70. _mm256_gf2p8mul_epi8 (__m256i __A, __m256i __B)
  71. {
  72. return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi ((__v32qi) __A,
  73. (__v32qi) __B);
  74. }
  75. #ifdef __OPTIMIZE__
  76. extern __inline __m256i
  77. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  78. _mm256_gf2p8affineinv_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
  79. {
  80. return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A,
  81. (__v32qi) __B,
  82. __C);
  83. }
  84. extern __inline __m256i
  85. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  86. _mm256_gf2p8affine_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
  87. {
  88. return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A,
  89. (__v32qi) __B, __C);
  90. }
  91. #else
  92. #define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) \
  93. ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
  94. (__v32qi)(__m256i)(B), \
  95. (int)(C)))
  96. #define _mm256_gf2p8affine_epi64_epi8(A, B, C) \
  97. ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi)(__m256i)(A), \
  98. ( __v32qi)(__m256i)(B), (int)(C)))
  99. #endif
  100. #ifdef __DISABLE_GFNIAVX__
  101. #undef __DISABLE_GFNIAVX__
  102. #pragma GCC pop_options
  103. #endif /* __GFNIAVX__ */
  104. #if !defined(__GFNI__) || !defined(__AVX512VL__)
  105. #pragma GCC push_options
  106. #pragma GCC target("gfni,avx512vl")
  107. #define __DISABLE_GFNIAVX512VL__
  108. #endif /* __GFNIAVX512VL__ */
  109. extern __inline __m128i
  110. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  111. _mm_mask_gf2p8mul_epi8 (__m128i __A, __mmask16 __B, __m128i __C, __m128i __D)
  112. {
  113. return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __C,
  114. (__v16qi) __D,
  115. (__v16qi)__A, __B);
  116. }
  117. extern __inline __m128i
  118. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  119. _mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C)
  120. {
  121. return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B,
  122. (__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A);
  123. }
  124. #ifdef __OPTIMIZE__
  125. extern __inline __m128i
  126. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  127. _mm_mask_gf2p8affineinv_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
  128. __m128i __D, const int __E)
  129. {
  130. return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __C,
  131. (__v16qi) __D,
  132. __E,
  133. (__v16qi)__A,
  134. __B);
  135. }
  136. extern __inline __m128i
  137. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  138. _mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
  139. const int __D)
  140. {
  141. return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B,
  142. (__v16qi) __C, __D,
  143. (__v16qi) _mm_setzero_si128 (),
  144. __A);
  145. }
  146. extern __inline __m128i
  147. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  148. _mm_mask_gf2p8affine_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
  149. __m128i __D, const int __E)
  150. {
  151. return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __C,
  152. (__v16qi) __D, __E, (__v16qi)__A, __B);
  153. }
  154. extern __inline __m128i
  155. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  156. _mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
  157. const int __D)
  158. {
  159. return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B,
  160. (__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A);
  161. }
  162. #else
  163. #define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
  164. ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \
  165. (__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), \
  166. (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
  167. #define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
  168. ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \
  169. (__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), \
  170. (int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), \
  171. (__mmask16)(A)))
  172. #define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
  173. ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C),\
  174. (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
  175. #define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
  176. ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B),\
  177. (__v16qi)(__m128i)(C), (int)(D), \
  178. (__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
  179. #endif
  180. #ifdef __DISABLE_GFNIAVX512VL__
  181. #undef __DISABLE_GFNIAVX512VL__
  182. #pragma GCC pop_options
  183. #endif /* __GFNIAVX512VL__ */
  184. #if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
  185. #pragma GCC push_options
  186. #pragma GCC target("gfni,avx512vl,avx512bw")
  187. #define __DISABLE_GFNIAVX512VLBW__
  188. #endif /* __GFNIAVX512VLBW__ */
  189. extern __inline __m256i
  190. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  191. _mm256_mask_gf2p8mul_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
  192. __m256i __D)
  193. {
  194. return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __C,
  195. (__v32qi) __D,
  196. (__v32qi)__A, __B);
  197. }
  198. extern __inline __m256i
  199. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  200. _mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C)
  201. {
  202. return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B,
  203. (__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A);
  204. }
  205. #ifdef __OPTIMIZE__
  206. extern __inline __m256i
  207. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  208. _mm256_mask_gf2p8affineinv_epi64_epi8 (__m256i __A, __mmask32 __B,
  209. __m256i __C, __m256i __D, const int __E)
  210. {
  211. return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __C,
  212. (__v32qi) __D,
  213. __E,
  214. (__v32qi)__A,
  215. __B);
  216. }
  217. extern __inline __m256i
  218. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  219. _mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B,
  220. __m256i __C, const int __D)
  221. {
  222. return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B,
  223. (__v32qi) __C, __D,
  224. (__v32qi) _mm256_setzero_si256 (), __A);
  225. }
  226. extern __inline __m256i
  227. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  228. _mm256_mask_gf2p8affine_epi64_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
  229. __m256i __D, const int __E)
  230. {
  231. return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __C,
  232. (__v32qi) __D,
  233. __E,
  234. (__v32qi)__A,
  235. __B);
  236. }
  237. extern __inline __m256i
  238. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  239. _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
  240. __m256i __C, const int __D)
  241. {
  242. return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B,
  243. (__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A);
  244. }
  245. #else
  246. #define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
  247. ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \
  248. (__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \
  249. (__v32qi)(__m256i)(A), (__mmask32)(B)))
  250. #define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
  251. ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \
  252. (__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \
  253. (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
  254. #define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
  255. ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C),\
  256. (__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
  257. #define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
  258. ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B),\
  259. (__v32qi)(__m256i)(C), (int)(D), \
  260. (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
  261. #endif
  262. #ifdef __DISABLE_GFNIAVX512VLBW__
  263. #undef __DISABLE_GFNIAVX512VLBW__
  264. #pragma GCC pop_options
  265. #endif /* __GFNIAVX512VLBW__ */
  266. #if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
  267. #pragma GCC push_options
  268. #pragma GCC target("gfni,avx512f,avx512bw")
  269. #define __DISABLE_GFNIAVX512FBW__
  270. #endif /* __GFNIAVX512FBW__ */
  271. extern __inline __m512i
  272. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  273. _mm512_mask_gf2p8mul_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
  274. __m512i __D)
  275. {
  276. return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __C,
  277. (__v64qi) __D, (__v64qi)__A, __B);
  278. }
  279. extern __inline __m512i
  280. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  281. _mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C)
  282. {
  283. return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B,
  284. (__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A);
  285. }
  286. extern __inline __m512i
  287. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  288. _mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
  289. {
  290. return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
  291. (__v64qi) __B);
  292. }
  293. #ifdef __OPTIMIZE__
  294. extern __inline __m512i
  295. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  296. _mm512_mask_gf2p8affineinv_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
  297. __m512i __D, const int __E)
  298. {
  299. return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __C,
  300. (__v64qi) __D,
  301. __E,
  302. (__v64qi)__A,
  303. __B);
  304. }
  305. extern __inline __m512i
  306. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  307. _mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B,
  308. __m512i __C, const int __D)
  309. {
  310. return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __B,
  311. (__v64qi) __C, __D,
  312. (__v64qi) _mm512_setzero_si512 (), __A);
  313. }
  314. extern __inline __m512i
  315. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  316. _mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
  317. {
  318. return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
  319. (__v64qi) __B, __C);
  320. }
  321. extern __inline __m512i
  322. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  323. _mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
  324. __m512i __D, const int __E)
  325. {
  326. return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __C,
  327. (__v64qi) __D, __E, (__v64qi)__A, __B);
  328. }
  329. extern __inline __m512i
  330. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  331. _mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C,
  332. const int __D)
  333. {
  334. return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B,
  335. (__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A);
  336. }
  337. extern __inline __m512i
  338. __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  339. _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
  340. {
  341. return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
  342. (__v64qi) __B, __C);
  343. }
  344. #else
  345. #define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
  346. ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
  347. (__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \
  348. (__v64qi)(__m512i)(A), (__mmask64)(B)))
  349. #define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
  350. ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
  351. (__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \
  352. (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
  353. #define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \
  354. ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( \
  355. (__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
  356. #define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
  357. ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C),\
  358. (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
  359. #define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
  360. ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B),\
  361. (__v64qi)(__m512i)(C), (int)(D), \
  362. (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
  363. #define _mm512_gf2p8affine_epi64_epi8(A, B, C) \
  364. ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), \
  365. (__v64qi)(__m512i)(B), (int)(C)))
  366. #endif
  367. #ifdef __DISABLE_GFNIAVX512FBW__
  368. #undef __DISABLE_GFNIAVX512FBW__
  369. #pragma GCC pop_options
  370. #endif /* __GFNIAVX512FBW__ */
  371. #endif /* _GFNIINTRIN_H_INCLUDED */