wasmer/lib/emscripten/emtests/test_simd3.c
2019-01-10 21:38:10 -08:00

497 lines
14 KiB
C

/*
* Copyright 2016 The Emscripten Authors. All rights reserved.
* Emscripten is available under two separate licenses, the MIT license and the
* University of Illinois/NCSA Open Source License. Both these licenses can be
* found in the LICENSE file.
*/
#include <iostream>
#include <emmintrin.h>
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <bitset>
using namespace std;
void testSetPs() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
_mm_store_ps(ar, v);
assert(ar[0] == 4.0);
assert(ar[1] == 3.0);
assert(ar[2] == 2.0);
assert(ar[3] == 1.0);
}
void testSet1Ps() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v = _mm_set1_ps(5.5);
_mm_store_ps(ar, v);
assert(ar[0] == 5.5);
assert(ar[1] == 5.5);
assert(ar[2] == 5.5);
assert(ar[3] == 5.5);
}
void testSetZeroPs() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v = _mm_setzero_ps();
_mm_store_ps(ar, v);
assert(ar[0] == 0);
assert(ar[1] == 0);
assert(ar[2] == 0);
assert(ar[3] == 0);
}
void testSetEpi32() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128i v = _mm_set_epi32(5, 7, 126, 381);
_mm_store_si128((__m128i *)ar, v);
assert(ar[0] == 381);
assert(ar[1] == 126);
assert(ar[2] == 7);
assert(ar[3] == 5);
v = _mm_set_epi32(0x55555555, 0xaaaaaaaa, 0xffffffff, 0x12345678);
_mm_store_si128((__m128i *)ar, v);
assert(ar[0] == 0x12345678);
assert(ar[1] == 0xffffffff);
assert(ar[2] == 0xaaaaaaaa);
assert(ar[3] == 0x55555555);
}
void testSet1Epi32() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128i v = _mm_set1_epi32(-5);
_mm_store_si128((__m128i *)ar, v);
assert(ar[0] == -5);
assert(ar[1] == -5);
assert(ar[2] == -5);
assert(ar[3] == -5);
}
void testSetZeroSi128() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128i v = _mm_setzero_si128();
_mm_store_si128((__m128i *)ar, v);
assert(ar[0] == 0);
assert(ar[1] == 0);
assert(ar[2] == 0);
assert(ar[3] == 0);
}
void testBitCasts() {
int32_t __attribute__((__aligned__(16))) ar1[4];
float __attribute__((__aligned__(16))) ar2[4];
__m128i v1 = _mm_set_epi32(0x3f800000, 0x40000000, 0x40400000, 0x40800000);
__m128 v2 = _mm_castsi128_ps(v1);
_mm_store_ps(ar2, v2);
assert(ar2[0] == 4.0);
assert(ar2[1] == 3.0);
assert(ar2[2] == 2.0);
assert(ar2[3] == 1.0);
v2 = _mm_set_ps(5.0, 6.0, 7.0, 8.0);
v1 = _mm_castps_si128(v2);
_mm_store_si128((__m128i *)ar1, v1);
assert(ar1[0] == 0x41000000);
assert(ar1[1] == 0x40e00000);
assert(ar1[2] == 0x40c00000);
assert(ar1[3] == 0x40a00000);
float w = 0;
float z = -278.3;
float y = 5.2;
float x = -987654321;
v1 = _mm_castps_si128(_mm_set_ps(w, z, y, x));
_mm_store_ps(ar2, _mm_castsi128_ps(v1));
assert(ar2[0] == x);
assert(ar2[1] == y);
assert(ar2[2] == z);
assert(ar2[3] == w);
/*
std::bitset<sizeof(float)*CHAR_BIT> bits1x(*reinterpret_cast<unsigned
long*>(&(ar2[0])));
std::bitset<sizeof(float)*CHAR_BIT> bits1y(*reinterpret_cast<unsigned
long*>(&(ar2[1])));
std::bitset<sizeof(float)*CHAR_BIT> bits1z(*reinterpret_cast<unsigned
long*>(&(ar2[2])));
std::bitset<sizeof(float)*CHAR_BIT> bits1w(*reinterpret_cast<unsigned
long*>(&(ar2[3])));
std::bitset<sizeof(float)*CHAR_BIT> bits2x(*reinterpret_cast<unsigned
long*>(&x));
std::bitset<sizeof(float)*CHAR_BIT> bits2y(*reinterpret_cast<unsigned
long*>(&y));
std::bitset<sizeof(float)*CHAR_BIT> bits2z(*reinterpret_cast<unsigned
long*>(&z));
std::bitset<sizeof(float)*CHAR_BIT> bits2w(*reinterpret_cast<unsigned
long*>(&w));
assert(bits1x == bits2x);
assert(bits1y == bits2y);
assert(bits1z == bits2z);
assert(bits1w == bits2w);
*/
v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0, 0x5555cccc, 0xaaaaaaaa));
_mm_store_si128((__m128i *)ar1, _mm_castps_si128(v2));
assert(ar1[0] == 0xaaaaaaaa);
assert(ar1[1] == 0x5555cccc);
assert(ar1[2] == 0);
assert(ar1[3] == 0xffffffff);
}
void testConversions() {
int32_t __attribute__((__aligned__(16))) ar1[4];
float __attribute__((__aligned__(16))) ar2[4];
__m128i v1 = _mm_set_epi32(0, -3, -517, 256);
__m128 v2 = _mm_cvtepi32_ps(v1);
_mm_store_ps(ar2, v2);
assert(ar2[0] == 256.0);
assert(ar2[1] == -517.0);
assert(ar2[2] == -3.0);
assert(ar2[3] == 0);
v2 = _mm_set_ps(5.0, 6.0, 7.45, -8.0);
v1 = _mm_cvtps_epi32(v2);
_mm_store_si128((__m128i *)ar1, v1);
assert(ar1[0] == -8);
assert(ar1[1] == 7);
assert(ar1[2] == 6);
assert(ar1[3] == 5);
}
void testMoveMaskPs() {
__m128 v =
_mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
int mask = _mm_movemask_ps(v);
assert(mask == 13);
}
void testAddPs() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
__m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
__m128 v = _mm_add_ps(v1, v2);
_mm_store_ps(ar, v);
assert(ar[0] == 41.0);
assert(ar[1] == 32.0);
assert(ar[2] == 23.0);
assert(ar[3] == 14.0);
}
void testSubPs() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
__m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
__m128 v = _mm_sub_ps(v1, v2);
_mm_store_ps(ar, v);
assert(ar[0] == -39.0);
assert(ar[1] == -28.0);
assert(ar[2] == -17.0);
assert(ar[3] == -6.0);
}
void testMulPs() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
__m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
__m128 v = _mm_mul_ps(v1, v2);
_mm_store_ps(ar, v);
assert(ar[0] == 40.0);
assert(ar[1] == 60.0);
assert(ar[2] == 60.0);
assert(ar[3] == 40.0);
}
void testDivPs() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(4.0, 9.0, 8.0, 1.0);
__m128 v2 = _mm_set_ps(2.0, 3.0, 1.0, 0.5);
__m128 v = _mm_div_ps(v1, v2);
_mm_store_ps(ar, v);
assert(ar[0] == 2.0);
assert(ar[1] == 8.0);
assert(ar[2] == 3.0);
assert(ar[3] == 2.0);
}
void testMinPs() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5);
__m128 v2 = _mm_set_ps(2.0, 1.0, 50.0, 0.0);
__m128 v = _mm_min_ps(v1, v2);
_mm_store_ps(ar, v);
assert(ar[0] == 0.0);
assert(ar[1] == 30.0);
assert(ar[2] == 1.0);
assert(ar[3] == -20.0);
}
void testMaxPs() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5);
__m128 v2 = _mm_set_ps(2.5, 5.0, 55.0, 1.0);
__m128 v = _mm_max_ps(v1, v2);
_mm_store_ps(ar, v);
assert(ar[0] == 1.0);
assert(ar[1] == 55.0);
assert(ar[2] == 10.0);
assert(ar[3] == 2.5);
}
void testSqrtPs() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(16.0, 9.0, 4.0, 1.0);
__m128 v = _mm_sqrt_ps(v1);
_mm_store_ps(ar, v);
assert(ar[0] == 1.0);
assert(ar[1] == 2.0);
assert(ar[2] == 3.0);
assert(ar[3] == 4.0);
}
void testCmpLtPs() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
__m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
__m128 v = _mm_cmplt_ps(v1, v2);
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
assert(ar[0] == 0xffffffff);
assert(ar[1] == 0);
assert(ar[2] == 0);
assert(ar[3] == 0xffffffff);
assert(_mm_movemask_ps(v) == 9);
}
void testCmpLePs() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
__m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
__m128 v = _mm_cmple_ps(v1, v2);
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
assert(ar[0] == 0xffffffff);
assert(ar[1] == 0);
assert(ar[2] == 0xffffffff);
assert(ar[3] == 0xffffffff);
assert(_mm_movemask_ps(v) == 13);
}
void testCmpEqPs() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
__m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
__m128 v = _mm_cmpeq_ps(v1, v2);
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
assert(ar[0] == 0);
assert(ar[1] == 0);
assert(ar[2] == 0xffffffff);
assert(ar[3] == 0);
assert(_mm_movemask_ps(v) == 4);
}
void testCmpGePs() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
__m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
__m128 v = _mm_cmpge_ps(v1, v2);
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
assert(ar[0] == 0);
assert(ar[1] == 0xffffffff);
assert(ar[2] == 0xffffffff);
assert(ar[3] == 0);
assert(_mm_movemask_ps(v) == 6);
}
void testCmpGtPs() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
__m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
__m128 v = _mm_cmpgt_ps(v1, v2);
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
assert(ar[0] == 0);
assert(ar[1] == 0xffffffff);
assert(ar[2] == 0);
assert(ar[3] == 0);
assert(_mm_movemask_ps(v) == 2);
}
void testAndPs() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(425, -501, -32, 68);
__m128 v2 =
_mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
__m128 v = _mm_and_ps(v1, v2);
_mm_store_ps(ar, v);
assert(ar[0] == 68);
assert(ar[1] == 0);
assert(ar[2] == -501);
assert(ar[3] == 425);
int32_t __attribute__((__aligned__(16))) ar2[4];
v1 = _mm_castsi128_ps(
_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa));
v2 = _mm_castsi128_ps(
_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
v = _mm_and_ps(v1, v2);
_mm_store_si128((__m128i *)ar2, _mm_castps_si128(v));
assert(ar2[0] == 0);
assert(ar2[1] == 0);
assert(ar2[2] == 0);
assert(ar2[3] == 0);
}
void testAndNotPs() {
float __attribute__((__aligned__(16))) ar[4];
__m128 v1 = _mm_set_ps(425, -501, -32, 68);
__m128 v2 =
_mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
__m128 v = _mm_andnot_ps(v2, v1);
_mm_store_ps(ar, v);
assert(ar[0] == 0);
assert(ar[1] == -32);
assert(ar[2] == 0);
assert(ar[3] == 0);
int32_t __attribute__((__aligned__(16))) ar2[4];
v1 = _mm_castsi128_ps(
_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa));
v2 = _mm_castsi128_ps(
_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
v = _mm_andnot_ps(v1, v2);
_mm_store_si128((__m128i *)ar2, _mm_castps_si128(v));
assert(ar2[0] == 0x55555555);
assert(ar2[1] == 0x55555555);
assert(ar2[2] == 0x55555555);
assert(ar2[3] == 0x55555555);
}
void testOrPs() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128 v1 =
_mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0));
__m128 v2 = _mm_castsi128_ps(
_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
__m128 v = _mm_or_ps(v1, v2);
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
assert(ar[0] == 0x55555555);
assert(ar[1] == 0xffffffff);
assert(ar[2] == 0xffffffff);
assert(ar[3] == 0xffffffff);
}
void testXorPs() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128 v1 =
_mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0));
__m128 v2 = _mm_castsi128_ps(
_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
__m128 v = _mm_xor_ps(v1, v2);
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
assert(ar[0] == 0x55555555);
assert(ar[1] == 0xaaaaaaaa);
assert(ar[2] == 0xffffffff);
assert(ar[3] == 0xffffffff);
}
void testAndSi128() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa);
__m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
__m128i v = _mm_and_si128(v1, v2);
_mm_store_si128((__m128i *)ar, v);
assert(ar[0] == 0);
assert(ar[1] == 0);
assert(ar[2] == 0);
assert(ar[3] == 0);
}
void testAndNotSi128() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa);
__m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
__m128i v = _mm_andnot_si128(v1, v2);
_mm_store_si128((__m128i *)ar, v);
assert(ar[0] == 0x55555555);
assert(ar[1] == 0x55555555);
assert(ar[2] == 0x55555555);
assert(ar[3] == 0x55555555);
}
void testOrSi128() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0);
__m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
__m128i v = _mm_or_si128(v1, v2);
_mm_store_si128((__m128i *)ar, v);
assert(ar[0] == 0x55555555);
assert(ar[1] == 0xffffffff);
assert(ar[2] == 0xffffffff);
assert(ar[3] == 0xffffffff);
}
void testXorSi128() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0);
__m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
__m128i v = _mm_xor_si128(v1, v2);
_mm_store_si128((__m128i *)ar, v);
assert(ar[0] == 0x55555555);
assert(ar[1] == 0xaaaaaaaa);
assert(ar[2] == 0xffffffff);
assert(ar[3] == 0xffffffff);
}
void testAddEpi32() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128i v1 = _mm_set_epi32(4, 3, 2, 1);
__m128i v2 = _mm_set_epi32(10, 20, 30, 40);
__m128i v = _mm_add_epi32(v1, v2);
_mm_store_si128((__m128i *)ar, v);
assert(ar[0] == 41);
assert(ar[1] == 32);
assert(ar[2] == 23);
assert(ar[3] == 14);
}
void testSubEpi32() {
int32_t __attribute__((__aligned__(16))) ar[4];
__m128i v1 = _mm_set_epi32(4, 3, 2, 1);
__m128i v2 = _mm_set_epi32(10, 20, 30, 40);
__m128i v = _mm_sub_epi32(v1, v2);
_mm_store_si128((__m128i *)ar, v);
assert(ar[0] == -39);
assert(ar[1] == -28);
assert(ar[2] == -17);
assert(ar[3] == -6);
}
int main(int argc, char **argv) {
testSetPs();
testSet1Ps();
testSetZeroPs();
testSetEpi32();
testSet1Epi32();
testSetZeroSi128();
testBitCasts();
testConversions();
testMoveMaskPs();
testAddPs();
testSubPs();
testMulPs();
testDivPs();
testMaxPs();
testMinPs();
testSqrtPs();
testCmpLtPs();
testCmpLePs();
testCmpEqPs();
testCmpGePs();
testCmpGtPs();
testAndPs();
testAndNotPs();
testOrPs();
testXorPs();
testAndSi128();
testAndNotSi128();
testOrSi128();
testXorSi128();
testAddEpi32();
testSubEpi32();
printf("DONE");
return 0;
}