diff --git a/lib/Headers/__wmmintrin_pclmul.h b/lib/Headers/__wmmintrin_pclmul.h index e94cc2a4df909cb15242fc2bdff94ffaf58f0f29..e9c6a9f6d4152b14c0a4068c7b085209930af42f 100644 --- a/lib/Headers/__wmmintrin_pclmul.h +++ b/lib/Headers/__wmmintrin_pclmul.h @@ -42,12 +42,11 @@ /// A 128-bit vector of [2 x i64] containing one of the source operands. /// \param __I /// An immediate value specifying which 64-bit values to select from the -/// operands. -/// Bit 0 is used to select a value from operand \a __X, and bit 4 is used -/// to select a value from operand \a __Y: -/// Bit[0]=0 indicates that bits[63:0] of operand \a __X are used. -/// Bit[0]=1 indicates that bits[127:64] of operand \a __X are used. -/// Bit[4]=0 indicates that bits[63:0] of operand \a __Y are used. +/// operands. Bit 0 is used to select a value from operand \a __X, and bit +/// 4 is used to select a value from operand \a __Y: \n +/// Bit[0]=0 indicates that bits[63:0] of operand \a __X are used. \n +/// Bit[0]=1 indicates that bits[127:64] of operand \a __X are used. \n +/// Bit[4]=0 indicates that bits[63:0] of operand \a __Y are used. \n /// Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used. /// \returns The 128-bit integer vector containing the result of the carry-less /// multiplication of the selected 64-bit values. diff --git a/lib/Headers/bmiintrin.h b/lib/Headers/bmiintrin.h index 650ee04e016dea662667ddb5dd61b037d2d5f7a3..488eb2dbd3d4b833865134c3f2f7263c1b995f8c 100644 --- a/lib/Headers/bmiintrin.h +++ b/lib/Headers/bmiintrin.h @@ -295,8 +295,8 @@ __tzcnt_u32(unsigned int __X) /// /// \param __X /// An unsigned 32-bit integer whose trailing zeros are to be counted. -/// \returns An 32-bit integer containing the number of trailing zero -/// bits in the operand. +/// \returns An 32-bit integer containing the number of trailing zero bits in +/// the operand. static __inline__ int __RELAXED_FN_ATTRS _mm_tzcnt_32(unsigned int __X) { @@ -532,8 +532,8 @@ __tzcnt_u64(unsigned long long __X) /// /// \param __X /// An unsigned 64-bit integer whose trailing zeros are to be counted. -/// \returns An 64-bit integer containing the number of trailing zero -/// bits in the operand. +/// \returns An 64-bit integer containing the number of trailing zero bits in +/// the operand. static __inline__ long long __RELAXED_FN_ATTRS _mm_tzcnt_64(unsigned long long __X) { diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index 1830254109d27a710430582f06e0419dd1bdb391..1512f9f0b47bcf60a0819f8b4d06256b40b9824c 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -551,7 +551,8 @@ _mm_cmpord_pd(__m128d __a, __m128d __b) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c> instruction. +/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c> +/// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. @@ -734,10 +735,10 @@ _mm_cmple_sd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each -/// of the two 128-bit floating-point vectors of [2 x double] to determine -/// if the value in the first parameter is greater than the corresponding -/// value in the second parameter. The comparison yields 0h for false, +/// \brief Compares the lower double-precision floating-point values in each of +/// the two 128-bit floating-point vectors of [2 x double] to determine if +/// the value in the first parameter is greater than the corresponding value +/// in the second parameter. The comparison yields 0h for false, /// FFFFFFFFFFFFFFFFh for true. /// /// \headerfile <x86intrin.h> @@ -784,9 +785,9 @@ _mm_cmpge_sd(__m128d __a, __m128d __b) return (__m128d) { __c[0], __a[1] }; } -/// \brief Compares the lower double-precision floating-point values in each -/// of the two 128-bit floating-point vectors of [2 x double] to determine -/// if the value in the first parameter is "ordered" with respect to the +/// \brief Compares the lower double-precision floating-point values in each of +/// the two 128-bit floating-point vectors of [2 x double] to determine if +/// the value in the first parameter is "ordered" with respect to the /// corresponding value in the second parameter. The comparison yields 0h for /// false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values are /// "ordered" with respect to each other if neither value is a NaN. @@ -809,16 +810,17 @@ _mm_cmpord_sd(__m128d __a, __m128d __b) return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); } -/// \brief Compares the lower double-precision floating-point values in each -/// of the two 128-bit floating-point vectors of [2 x double] to determine -/// if the value in the first parameter is "unordered" with respect to the +/// \brief Compares the lower double-precision floating-point values in each of +/// the two 128-bit floating-point vectors of [2 x double] to determine if +/// the value in the first parameter is "unordered" with respect to the /// corresponding value in the second parameter. The comparison yields 0h /// for false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values /// are "unordered" with respect to each other if one or both values are NaN. /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c> instruction. +/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c> +/// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is @@ -1115,8 +1117,8 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b) /// \brief Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in -/// the second parameter. The comparison yields 0 for false, 1 for true. -/// If either of the two lower double-precision values is NaN, 1 is returned. +/// the second parameter. The comparison yields 0 for false, 1 for true. If +/// either of the two lower double-precision values is NaN, 1 is returned. /// /// \headerfile <x86intrin.h> /// @@ -1414,7 +1416,8 @@ _mm_cvtss_sd(__m128d __a, __m128 __b) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c> instruction. +/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c> +/// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. @@ -1431,7 +1434,8 @@ _mm_cvttpd_epi32(__m128d __a) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> instruction. +/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> +/// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the @@ -1559,9 +1563,9 @@ _mm_load1_pd(double const *__dp) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction + needed -/// shuffling instructions. In AVX mode, the shuffling may be combined with the -/// \c VMOVAPD, resulting in only a \c VPERMILPD instruction. +/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction + +/// needed shuffling instructions. In AVX mode, the shuffling may be combined +/// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction. /// /// \param __dp /// A 16-byte aligned pointer to an array of double-precision values to be @@ -1624,7 +1628,7 @@ _mm_load_sd(double const *__dp) /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. /// /// \param __a -/// A 128-bit vector of [2 x double]. +/// A 128-bit vector of [2 x double]. \n /// Bits [63:0] are written to bits [63:0] of the result. /// \param __dp /// A pointer to a 64-bit memory location containing a double-precision @@ -1651,7 +1655,7 @@ _mm_loadh_pd(__m128d __a, double const *__dp) /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. /// /// \param __a -/// A 128-bit vector of [2 x double]. +/// A 128-bit vector of [2 x double]. \n /// Bits [127:64] are written to bits [127:64] of the result. /// \param __dp /// A pointer to a 64-bit memory location containing a double-precision @@ -2645,8 +2649,8 @@ _mm_xor_si128(__m128i __a, __m128i __b) /// \param a /// A 128-bit integer vector containing the source operand. /// \param imm -/// An immediate value specifying the number of bytes to left-shift -/// operand \a a. +/// An immediate value specifying the number of bytes to left-shift operand +/// \a a. /// \returns A 128-bit integer vector containing the left-shifted value. #define _mm_slli_si128(a, imm) __extension__ ({ \ (__m128i)__builtin_shufflevector( \ @@ -3247,7 +3251,8 @@ _mm_cvtsd_si64(__m128d __a) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> instruction. +/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> +/// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the @@ -3296,7 +3301,8 @@ _mm_cvtps_epi32(__m128 __a) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c> instruction. +/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c> +/// instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. @@ -3709,7 +3715,8 @@ _mm_set1_epi8(char __b) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> instruction. +/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> +/// instruction. /// /// \param __q0 /// A 64-bit integral value used to initialize the lower 64 bits of the @@ -3885,7 +3892,8 @@ _mm_storeu_si128(__m128i *__p, __m128i __b) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c> instruction. +/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c> +/// instruction. /// /// \param __d /// A 128-bit integer vector containing the values to be moved. @@ -4134,14 +4142,14 @@ _mm_packus_epi16(__m128i __a, __m128i __b) /// A 128-bit integer vector. /// \param __imm /// An immediate value. Bits [3:0] selects values from \a __a to be assigned -/// to bits[15:0] of the result. -/// 000: assign values from bits [15:0] of \a __a. -/// 001: assign values from bits [31:16] of \a __a. -/// 010: assign values from bits [47:32] of \a __a. -/// 011: assign values from bits [63:48] of \a __a. -/// 100: assign values from bits [79:64] of \a __a. -/// 101: assign values from bits [95:80] of \a __a. -/// 110: assign values from bits [111:96] of \a __a. +/// to bits[15:0] of the result. \n +/// 000: assign values from bits [15:0] of \a __a. \n +/// 001: assign values from bits [31:16] of \a __a. \n +/// 010: assign values from bits [47:32] of \a __a. \n +/// 011: assign values from bits [63:48] of \a __a. \n +/// 100: assign values from bits [79:64] of \a __a. \n +/// 101: assign values from bits [95:80] of \a __a. \n +/// 110: assign values from bits [111:96] of \a __a. \n /// 111: assign values from bits [127:112] of \a __a. /// \returns An integer, whose lower 16 bits are selected from the 128-bit /// integer vector parameter and the remaining bits are assigned zeros. @@ -4215,16 +4223,16 @@ _mm_movemask_epi8(__m128i __a) /// \param imm /// An immediate value containing an 8-bit value specifying which elements to /// copy from a. The destinations within the 128-bit destination are assigned -/// values as follows: -/// Bits [1:0] are used to assign values to bits [31:0] of the result. -/// Bits [3:2] are used to assign values to bits [63:32] of the result. -/// Bits [5:4] are used to assign values to bits [95:64] of the result. -/// Bits [7:6] are used to assign values to bits [127:96] of the result. -/// Bit value assignments: -/// 00: assign values from bits [31:0] of a. -/// 01: assign values from bits [63:32] of a. -/// 10: assign values from bits [95:64] of a. -/// 11: assign values from bits [127:96] of a. +/// values as follows: \n +/// Bits [1:0] are used to assign values to bits [31:0] of the result. \n +/// Bits [3:2] are used to assign values to bits [63:32] of the result. \n +/// Bits [5:4] are used to assign values to bits [95:64] of the result. \n +/// Bits [7:6] are used to assign values to bits [127:96] of the result. \n +/// Bit value assignments: \n +/// 00: assign values from bits [31:0] of \a a. \n +/// 01: assign values from bits [63:32] of \a a. \n +/// 10: assign values from bits [95:64] of \a a. \n +/// 11: assign values from bits [127:96] of \a a. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shuffle_epi32(a, imm) __extension__ ({ \ (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \ @@ -4248,16 +4256,16 @@ _mm_movemask_epi8(__m128i __a) /// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits /// [127:64] of the result. /// \param imm -/// An 8-bit immediate value specifying which elements to copy from a. -/// Bits[1:0] are used to assign values to bits [15:0] of the result. -/// Bits[3:2] are used to assign values to bits [31:16] of the result. -/// Bits[5:4] are used to assign values to bits [47:32] of the result. -/// Bits[7:6] are used to assign values to bits [63:48] of the result. -/// Bit value assignments: -/// 00: assign values from bits [15:0] of a. -/// 01: assign values from bits [31:16] of a. -/// 10: assign values from bits [47:32] of a. -/// 11: assign values from bits [63:48] of a. +/// An 8-bit immediate value specifying which elements to copy from \a a. \n +/// Bits[1:0] are used to assign values to bits [15:0] of the result. \n +/// Bits[3:2] are used to assign values to bits [31:16] of the result. \n +/// Bits[5:4] are used to assign values to bits [47:32] of the result. \n +/// Bits[7:6] are used to assign values to bits [63:48] of the result. \n +/// Bit value assignments: \n +/// 00: assign values from bits [15:0] of \a a. \n +/// 01: assign values from bits [31:16] of \a a. \n +/// 10: assign values from bits [47:32] of \a a. \n +/// 11: assign values from bits [63:48] of \a a. \n /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ @@ -4282,16 +4290,16 @@ _mm_movemask_epi8(__m128i __a) /// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits /// [63:0] of the result. /// \param imm -/// An 8-bit immediate value specifying which elements to copy from a. -/// Bits[1:0] are used to assign values to bits [79:64] of the result. -/// Bits[3:2] are used to assign values to bits [95:80] of the result. -/// Bits[5:4] are used to assign values to bits [111:96] of the result. -/// Bits[7:6] are used to assign values to bits [127:112] of the result. -/// Bit value assignments: -/// 00: assign values from bits [79:64] of a. -/// 01: assign values from bits [95:80] of a. -/// 10: assign values from bits [111:96] of a. -/// 11: assign values from bits [127:112] of a. +/// An 8-bit immediate value specifying which elements to copy from \a a. \n +/// Bits[1:0] are used to assign values to bits [79:64] of the result. \n +/// Bits[3:2] are used to assign values to bits [95:80] of the result. \n +/// Bits[5:4] are used to assign values to bits [111:96] of the result. \n +/// Bits[7:6] are used to assign values to bits [127:112] of the result. \n +/// Bit value assignments: \n +/// 00: assign values from bits [79:64] of \a a. \n +/// 01: assign values from bits [95:80] of \a a. \n +/// 10: assign values from bits [111:96] of \a a. \n +/// 11: assign values from bits [127:112] of \a a. \n /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ @@ -4307,28 +4315,29 @@ _mm_movemask_epi8(__m128i __a) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c> instruction. +/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c> +/// instruction. /// /// \param __a /// A 128-bit vector of [16 x i8]. -/// Bits [71:64] are written to bits [7:0] of the result -/// Bits [79:72] are written to bits [23:16] of the result. -/// Bits [87:80] are written to bits [39:32] of the result. -/// Bits [95:88] are written to bits [55:48] of the result. -/// Bits [103:96] are written to bits [71:64] of the result. -/// Bits [111:104] are written to bits [87:80] of the result. -/// Bits [119:112] are written to bits [103:96] of the result. +/// Bits [71:64] are written to bits [7:0] of the result. \n +/// Bits [79:72] are written to bits [23:16] of the result. \n +/// Bits [87:80] are written to bits [39:32] of the result. \n +/// Bits [95:88] are written to bits [55:48] of the result. \n +/// Bits [103:96] are written to bits [71:64] of the result. \n +/// Bits [111:104] are written to bits [87:80] of the result. \n +/// Bits [119:112] are written to bits [103:96] of the result. \n /// Bits [127:120] are written to bits [119:112] of the result. /// \param __b -/// A 128-bit vector of [16 x i8]. -/// Bits [71:64] are written to bits [15:8] of the result. -/// Bits [79:72] are written to bits [31:24] of the result. -/// Bits [87:80] are written to bits [47:40] of the result. -/// Bits [95:88] are written to bits [63:56] of the result. -/// Bits [103:96] are written to bits [79:72] of the result. -/// Bits [111:104] are written to bits [95:88] of the result. -/// Bits [119:112] are written to bits [111:104] of the result. -/// Bits [127:120] are written to bits [127:120] of the destination. +/// A 128-bit vector of [16 x i8]. \n +/// Bits [71:64] are written to bits [15:8] of the result. \n +/// Bits [79:72] are written to bits [31:24] of the result. \n +/// Bits [87:80] are written to bits [47:40] of the result. \n +/// Bits [95:88] are written to bits [63:56] of the result. \n +/// Bits [103:96] are written to bits [79:72] of the result. \n +/// Bits [111:104] are written to bits [95:88] of the result. \n +/// Bits [119:112] are written to bits [111:104] of the result. \n +/// Bits [127:120] are written to bits [127:120] of the result. /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b) @@ -4341,19 +4350,20 @@ _mm_unpackhi_epi8(__m128i __a, __m128i __b) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c> instruction. +/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c> +/// instruction. /// /// \param __a /// A 128-bit vector of [8 x i16]. -/// Bits [79:64] are written to bits [15:0] of the result. -/// Bits [95:80] are written to bits [47:32] of the result. -/// Bits [111:96] are written to bits [79:64] of the result. +/// Bits [79:64] are written to bits [15:0] of the result. \n +/// Bits [95:80] are written to bits [47:32] of the result. \n +/// Bits [111:96] are written to bits [79:64] of the result. \n /// Bits [127:112] are written to bits [111:96] of the result. /// \param __b /// A 128-bit vector of [8 x i16]. -/// Bits [79:64] are written to bits [31:16] of the result. -/// Bits [95:80] are written to bits [63:48] of the result. -/// Bits [111:96] are written to bits [95:80] of the result. +/// Bits [79:64] are written to bits [31:16] of the result. \n +/// Bits [95:80] are written to bits [63:48] of the result. \n +/// Bits [111:96] are written to bits [95:80] of the result. \n /// Bits [127:112] are written to bits [127:112] of the result. /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -4367,15 +4377,16 @@ _mm_unpackhi_epi16(__m128i __a, __m128i __b) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c> instruction. +/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c> +/// instruction. /// /// \param __a -/// A 128-bit vector of [4 x i32]. -/// Bits [95:64] are written to bits [31:0] of the destination. +/// A 128-bit vector of [4 x i32]. \n +/// Bits [95:64] are written to bits [31:0] of the destination. \n /// Bits [127:96] are written to bits [95:64] of the destination. /// \param __b -/// A 128-bit vector of [4 x i32]. -/// Bits [95:64] are written to bits [64:32] of the destination. +/// A 128-bit vector of [4 x i32]. \n +/// Bits [95:64] are written to bits [64:32] of the destination. \n /// Bits [127:96] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -4389,13 +4400,14 @@ _mm_unpackhi_epi32(__m128i __a, __m128i __b) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c> instruction. +/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c> +/// instruction. /// /// \param __a -/// A 128-bit vector of [2 x i64]. +/// A 128-bit vector of [2 x i64]. \n /// Bits [127:64] are written to bits [63:0] of the destination. /// \param __b -/// A 128-bit vector of [2 x i64]. +/// A 128-bit vector of [2 x i64]. \n /// Bits [127:64] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -4409,27 +4421,28 @@ _mm_unpackhi_epi64(__m128i __a, __m128i __b) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c> instruction. +/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c> +/// instruction. /// /// \param __a -/// A 128-bit vector of [16 x i8]. -/// Bits [7:0] are written to bits [7:0] of the result. -/// Bits [15:8] are written to bits [23:16] of the result. -/// Bits [23:16] are written to bits [39:32] of the result. -/// Bits [31:24] are written to bits [55:48] of the result. -/// Bits [39:32] are written to bits [71:64] of the result. -/// Bits [47:40] are written to bits [87:80] of the result. -/// Bits [55:48] are written to bits [103:96] of the result. -/// Bits [63:56] are written to bits [119:112] of the destination. +/// A 128-bit vector of [16 x i8]. \n +/// Bits [7:0] are written to bits [7:0] of the result. \n +/// Bits [15:8] are written to bits [23:16] of the result. \n +/// Bits [23:16] are written to bits [39:32] of the result. \n +/// Bits [31:24] are written to bits [55:48] of the result. \n +/// Bits [39:32] are written to bits [71:64] of the result. \n +/// Bits [47:40] are written to bits [87:80] of the result. \n +/// Bits [55:48] are written to bits [103:96] of the result. \n +/// Bits [63:56] are written to bits [119:112] of the result. /// \param __b /// A 128-bit vector of [16 x i8]. -/// Bits [7:0] are written to bits [15:8] of the result. -/// Bits [15:8] are written to bits [31:24] of the result. -/// Bits [23:16] are written to bits [47:40] of the result. -/// Bits [31:24] are written to bits [63:56] of the result. -/// Bits [39:32] are written to bits [79:72] of the result. -/// Bits [47:40] are written to bits [95:88] of the result. -/// Bits [55:48] are written to bits [111:104] of the result. +/// Bits [7:0] are written to bits [15:8] of the result. \n +/// Bits [15:8] are written to bits [31:24] of the result. \n +/// Bits [23:16] are written to bits [47:40] of the result. \n +/// Bits [31:24] are written to bits [63:56] of the result. \n +/// Bits [39:32] are written to bits [79:72] of the result. \n +/// Bits [47:40] are written to bits [95:88] of the result. \n +/// Bits [55:48] are written to bits [111:104] of the result. \n /// Bits [63:56] are written to bits [127:120] of the result. /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -4444,19 +4457,20 @@ _mm_unpacklo_epi8(__m128i __a, __m128i __b) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c> instruction. +/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c> +/// instruction. /// /// \param __a /// A 128-bit vector of [8 x i16]. -/// Bits [15:0] are written to bits [15:0] of the result. -/// Bits [31:16] are written to bits [47:32] of the result. -/// Bits [47:32] are written to bits [79:64] of the result. +/// Bits [15:0] are written to bits [15:0] of the result. \n +/// Bits [31:16] are written to bits [47:32] of the result. \n +/// Bits [47:32] are written to bits [79:64] of the result. \n /// Bits [63:48] are written to bits [111:96] of the result. /// \param __b /// A 128-bit vector of [8 x i16]. -/// Bits [15:0] are written to bits [31:16] of the result. -/// Bits [31:16] are written to bits [63:48] of the result. -/// Bits [47:32] are written to bits [95:80] of the result. +/// Bits [15:0] are written to bits [31:16] of the result. \n +/// Bits [31:16] are written to bits [63:48] of the result. \n +/// Bits [47:32] are written to bits [95:80] of the result. \n /// Bits [63:48] are written to bits [127:112] of the result. /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -4470,15 +4484,16 @@ _mm_unpacklo_epi16(__m128i __a, __m128i __b) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c> instruction. +/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c> +/// instruction. /// /// \param __a -/// A 128-bit vector of [4 x i32]. -/// Bits [31:0] are written to bits [31:0] of the destination. +/// A 128-bit vector of [4 x i32]. \n +/// Bits [31:0] are written to bits [31:0] of the destination. \n /// Bits [63:32] are written to bits [95:64] of the destination. /// \param __b -/// A 128-bit vector of [4 x i32]. -/// Bits [31:0] are written to bits [64:32] of the destination. +/// A 128-bit vector of [4 x i32]. \n +/// Bits [31:0] are written to bits [64:32] of the destination. \n /// Bits [63:32] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -4492,14 +4507,15 @@ _mm_unpacklo_epi32(__m128i __a, __m128i __b) /// /// \headerfile <x86intrin.h> /// -/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> instruction. +/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> +/// instruction. /// /// \param __a -/// A 128-bit vector of [2 x i64]. -/// Bits [63:0] are written to bits [63:0] of the destination. +/// A 128-bit vector of [2 x i64]. \n +/// Bits [63:0] are written to bits [63:0] of the destination. \n /// \param __b -/// A 128-bit vector of [2 x i64]. -/// Bits [63:0] are written to bits [127:64] of the destination. +/// A 128-bit vector of [2 x i64]. \n +/// Bits [63:0] are written to bits [127:64] of the destination. \n /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b) @@ -4568,10 +4584,10 @@ _mm_move_epi64(__m128i __a) /// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction. /// /// \param __a -/// A 128-bit vector of [2 x double]. +/// A 128-bit vector of [2 x double]. \n /// Bits [127:64] are written to bits [63:0] of the destination. /// \param __b -/// A 128-bit vector of [2 x double]. +/// A 128-bit vector of [2 x double]. \n /// Bits [127:64] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x double] containing the interleaved values. static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -4589,10 +4605,10 @@ _mm_unpackhi_pd(__m128d __a, __m128d __b) /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. /// /// \param __a -/// A 128-bit vector of [2 x double]. +/// A 128-bit vector of [2 x double]. \n /// Bits [63:0] are written to bits [63:0] of the destination. /// \param __b -/// A 128-bit vector of [2 x double]. +/// A 128-bit vector of [2 x double]. \n /// Bits [63:0] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x double] containing the interleaved values. static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -4639,11 +4655,11 @@ _mm_movemask_pd(__m128d __a) /// A 128-bit vector of [2 x double]. /// \param i /// An 8-bit immediate value. The least significant two bits specify which -/// elements to copy from a and b: -/// Bit[0] = 0: lower element of a copied to lower element of result. -/// Bit[0] = 1: upper element of a copied to lower element of result. -/// Bit[1] = 0: lower element of \a b copied to upper element of result. -/// Bit[1] = 1: upper element of \a b copied to upper element of result. +/// elements to copy from a and b: \n +/// Bit[0] = 0: lower element of a copied to lower element of result. \n +/// Bit[0] = 1: upper element of a copied to lower element of result. \n +/// Bit[1] = 0: lower element of \a b copied to upper element of result. \n +/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n /// \returns A 128-bit vector of [2 x double] containing the shuffled values. #define _mm_shuffle_pd(a, b, i) __extension__ ({ \ (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ diff --git a/lib/Headers/f16cintrin.h b/lib/Headers/f16cintrin.h index fbfc1860f9f98148408766c3e45968e8b88bfe5b..180712ffc680c92b974a4619ecfc7679bace074a 100644 --- a/lib/Headers/f16cintrin.h +++ b/lib/Headers/f16cintrin.h @@ -65,11 +65,11 @@ _cvtsh_ss(unsigned short __a) /// A 32-bit single-precision float value to be converted to a 16-bit /// half-precision float value. /// \param imm -/// An immediate value controlling rounding using bits [2:0]: -/// 000: Nearest -/// 001: Down -/// 010: Up -/// 011: Truncate +/// An immediate value controlling rounding using bits [2:0]: \n +/// 000: Nearest \n +/// 001: Down \n +/// 010: Up \n +/// 011: Truncate \n /// 1XX: Use MXCSR.RC for rounding /// \returns The converted 16-bit half-precision float value. #define _cvtss_sh(a, imm) \ @@ -90,11 +90,11 @@ _cvtsh_ss(unsigned short __a) /// \param a /// A 128-bit vector containing 32-bit float values. /// \param imm -/// An immediate value controlling rounding using bits [2:0]: -/// 000: Nearest -/// 001: Down -/// 010: Up -/// 011: Truncate +/// An immediate value controlling rounding using bits [2:0]: \n +/// 000: Nearest \n +/// 001: Down \n +/// 010: Up \n +/// 011: Truncate \n /// 1XX: Use MXCSR.RC for rounding /// \returns A 128-bit vector containing converted 16-bit half-precision float /// values. The lower 64 bits are used to store the converted 16-bit diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h index 0bdc921c788a8bad424bc24b0989d011708acf78..7f91d49fbcec0126cf9dd3a7862a62be33fd7977 100644 --- a/lib/Headers/immintrin.h +++ b/lib/Headers/immintrin.h @@ -84,11 +84,11 @@ /// A 256-bit vector containing 32-bit single-precision float values to be /// converted to 16-bit half-precision float values. /// \param imm -/// An immediate value controlling rounding using bits [2:0]: -/// 000: Nearest -/// 001: Down -/// 010: Up -/// 011: Truncate +/// An immediate value controlling rounding using bits [2:0]: \n +/// 000: Nearest \n +/// 001: Down \n +/// 010: Up \n +/// 011: Truncate \n /// 1XX: Use MXCSR.RC for rounding /// \returns A 128-bit vector containing the converted 16-bit half-precision /// float values. diff --git a/lib/Headers/mmintrin.h b/lib/Headers/mmintrin.h index 55db470de391c471d972a6fb084a6b960cbdf8d5..e0c277a65a33cfa923d73280a00dc07e4d624a67 100644 --- a/lib/Headers/mmintrin.h +++ b/lib/Headers/mmintrin.h @@ -211,16 +211,16 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2) /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction. /// /// \param __m1 -/// A 64-bit integer vector of [8 x i8]. -/// Bits [39:32] are written to bits [7:0] of the result. -/// Bits [47:40] are written to bits [23:16] of the result. -/// Bits [55:48] are written to bits [39:32] of the result. +/// A 64-bit integer vector of [8 x i8]. \n +/// Bits [39:32] are written to bits [7:0] of the result. \n +/// Bits [47:40] are written to bits [23:16] of the result. \n +/// Bits [55:48] are written to bits [39:32] of the result. \n /// Bits [63:56] are written to bits [55:48] of the result. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. -/// Bits [39:32] are written to bits [15:8] of the result. -/// Bits [47:40] are written to bits [31:24] of the result. -/// Bits [55:48] are written to bits [47:40] of the result. +/// Bits [39:32] are written to bits [15:8] of the result. \n +/// Bits [47:40] are written to bits [31:24] of the result. \n +/// Bits [55:48] are written to bits [47:40] of the result. \n /// Bits [63:56] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. @@ -239,11 +239,11 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. -/// Bits [47:32] are written to bits [15:0] of the result. +/// Bits [47:32] are written to bits [15:0] of the result. \n /// Bits [63:48] are written to bits [47:32] of the result. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. -/// Bits [47:32] are written to bits [31:16] of the result. +/// Bits [47:32] are written to bits [31:16] of the result. \n /// Bits [63:48] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. @@ -283,15 +283,15 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. -/// Bits [7:0] are written to bits [7:0] of the result. -/// Bits [15:8] are written to bits [23:16] of the result. -/// Bits [23:16] are written to bits [39:32] of the result. +/// Bits [7:0] are written to bits [7:0] of the result. \n +/// Bits [15:8] are written to bits [23:16] of the result. \n +/// Bits [23:16] are written to bits [39:32] of the result. \n /// Bits [31:24] are written to bits [55:48] of the result. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. -/// Bits [7:0] are written to bits [15:8] of the result. -/// Bits [15:8] are written to bits [31:24] of the result. -/// Bits [23:16] are written to bits [47:40] of the result. +/// Bits [7:0] are written to bits [15:8] of the result. \n +/// Bits [15:8] are written to bits [31:24] of the result. \n +/// Bits [23:16] are written to bits [47:40] of the result. \n /// Bits [31:24] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. @@ -310,11 +310,11 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. -/// Bits [15:0] are written to bits [15:0] of the result. +/// Bits [15:0] are written to bits [15:0] of the result. \n /// Bits [31:16] are written to bits [47:32] of the result. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. -/// Bits [15:0] are written to bits [31:16] of the result. +/// Bits [15:0] are written to bits [31:16] of the result. \n /// Bits [31:16] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. diff --git a/lib/Headers/pmmintrin.h b/lib/Headers/pmmintrin.h index 5f201fb53bd10e7a6a9d065079c598d4eb495aec..d4f6487af179cc1fd96a4e32ff8b4526c4b0f3ad 100644 --- a/lib/Headers/pmmintrin.h +++ b/lib/Headers/pmmintrin.h @@ -115,18 +115,18 @@ _mm_hsub_ps(__m128 __a, __m128 __b) /// \brief Moves and duplicates high-order (odd-indexed) values from a 128-bit /// vector of [4 x float] to float values stored in a 128-bit vector of -/// [4 x float]. -/// Bits [127:96] of the source are written to bits [127:96] and [95:64] of -/// the destination. -/// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the -/// destination. +/// [4 x float]. /// /// \headerfile <x86intrin.h> /// /// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction. /// /// \param __a -/// A 128-bit vector of [4 x float]. +/// A 128-bit vector of [4 x float]. \n +/// Bits [127:96] of the source are written to bits [127:96] and [95:64] of +/// the destination. \n +/// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the +/// destination. /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated /// values. static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -135,20 +135,19 @@ _mm_movehdup_ps(__m128 __a) return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3); } -/// \brief Duplicates low-order (even-indexed) values from a 128-bit -/// vector of [4 x float] to float values stored in a 128-bit vector of -/// [4 x float]. -/// Bits [95:64] of the source are written to bits [127:96] and [95:64] of -/// the destination. -/// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the -/// destination. +/// \brief Duplicates low-order (even-indexed) values from a 128-bit vector of +/// [4 x float] to float values stored in a 128-bit vector of [4 x float]. /// /// \headerfile <x86intrin.h> /// /// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction. /// /// \param __a -/// A 128-bit vector of [4 x float]. +/// A 128-bit vector of [4 x float] \n +/// Bits [95:64] of the source are written to bits [127:96] and [95:64] of +/// the destination. \n +/// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the +/// destination. /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated /// values. static __inline__ __m128 __DEFAULT_FN_ATTRS diff --git a/lib/Headers/tmmintrin.h b/lib/Headers/tmmintrin.h index 6a8e5c1bb4ec10b1576b3825202eac687a7934d4..80664043a06fb841dd3802b73327ff620e2385d3 100644 --- a/lib/Headers/tmmintrin.h +++ b/lib/Headers/tmmintrin.h @@ -580,11 +580,11 @@ _mm_mulhrs_pi16(__m64 __a, __m64 __b) /// \param __b /// A 128-bit integer vector containing control bytes corresponding to /// positions in the destination: -/// Bit 7: -/// 1: Clear the corresponding byte in the destination. +/// Bit 7: \n +/// 1: Clear the corresponding byte in the destination. \n /// 0: Copy the selected source byte to the corresponding byte in the -/// destination. -/// Bits [6:4] Reserved. +/// destination. \n +/// Bits [6:4] Reserved. \n /// Bits [3:0] select the source byte to be copied. /// \returns A 128-bit integer vector containing the copied or cleared values. static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -606,10 +606,10 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b) /// \param __b /// A 64-bit integer vector containing control bytes corresponding to /// positions in the destination: -/// Bit 7: -/// 1: Clear the corresponding byte in the destination. +/// Bit 7: \n +/// 1: Clear the corresponding byte in the destination. \n /// 0: Copy the selected source byte to the corresponding byte in the -/// destination. +/// destination. \n /// Bits [3:0] select the source byte to be copied. /// \returns A 64-bit integer vector containing the copied or cleared values. static __inline__ __m64 __DEFAULT_FN_ATTRS