Initial implementation of SIMD in the LLVM backend.

This commit is contained in:
Nick Lewycky 2019-07-02 15:50:33 -07:00
parent 3ba8100e98
commit fdc13563b8
3 changed files with 1980 additions and 16 deletions

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,9 @@ use inkwell::{
builder::Builder, builder::Builder,
context::Context, context::Context,
module::Module, module::Module,
types::{BasicType, FloatType, FunctionType, IntType, PointerType, StructType, VoidType}, types::{
BasicType, FloatType, FunctionType, IntType, PointerType, StructType, VectorType, VoidType,
},
values::{BasicValue, BasicValueEnum, FloatValue, FunctionValue, IntValue, PointerValue}, values::{BasicValue, BasicValueEnum, FloatValue, FunctionValue, IntValue, PointerValue},
AddressSpace, AddressSpace,
}; };
@ -41,12 +43,18 @@ pub struct Intrinsics {
pub sqrt_f32: FunctionValue, pub sqrt_f32: FunctionValue,
pub sqrt_f64: FunctionValue, pub sqrt_f64: FunctionValue,
pub sqrt_f32x4: FunctionValue,
pub sqrt_f64x2: FunctionValue,
pub minimum_f32: FunctionValue, pub minimum_f32: FunctionValue,
pub minimum_f64: FunctionValue, pub minimum_f64: FunctionValue,
pub minimum_f32x4: FunctionValue,
pub minimum_f64x2: FunctionValue,
pub maximum_f32: FunctionValue, pub maximum_f32: FunctionValue,
pub maximum_f64: FunctionValue, pub maximum_f64: FunctionValue,
pub maximum_f32x4: FunctionValue,
pub maximum_f64x2: FunctionValue,
pub ceil_f32: FunctionValue, pub ceil_f32: FunctionValue,
pub ceil_f64: FunctionValue, pub ceil_f64: FunctionValue,
@ -62,10 +70,22 @@ pub struct Intrinsics {
pub fabs_f32: FunctionValue, pub fabs_f32: FunctionValue,
pub fabs_f64: FunctionValue, pub fabs_f64: FunctionValue,
pub fabs_f32x4: FunctionValue,
pub fabs_f64x2: FunctionValue,
pub copysign_f32: FunctionValue, pub copysign_f32: FunctionValue,
pub copysign_f64: FunctionValue, pub copysign_f64: FunctionValue,
pub sadd_sat_i8x16: FunctionValue,
pub sadd_sat_i16x8: FunctionValue,
pub uadd_sat_i8x16: FunctionValue,
pub uadd_sat_i16x8: FunctionValue,
pub ssub_sat_i8x16: FunctionValue,
pub ssub_sat_i16x8: FunctionValue,
pub usub_sat_i8x16: FunctionValue,
pub usub_sat_i16x8: FunctionValue,
pub expect_i1: FunctionValue, pub expect_i1: FunctionValue,
pub trap: FunctionValue, pub trap: FunctionValue,
@ -79,6 +99,14 @@ pub struct Intrinsics {
pub f32_ty: FloatType, pub f32_ty: FloatType,
pub f64_ty: FloatType, pub f64_ty: FloatType,
pub i1x128_ty: VectorType,
pub i8x16_ty: VectorType,
pub i16x8_ty: VectorType,
pub i32x4_ty: VectorType,
pub i64x2_ty: VectorType,
pub f32x4_ty: VectorType,
pub f64x2_ty: VectorType,
pub i8_ptr_ty: PointerType, pub i8_ptr_ty: PointerType,
pub i16_ptr_ty: PointerType, pub i16_ptr_ty: PointerType,
pub i32_ptr_ty: PointerType, pub i32_ptr_ty: PointerType,
@ -134,6 +162,14 @@ impl Intrinsics {
let f32_ty = context.f32_type(); let f32_ty = context.f32_type();
let f64_ty = context.f64_type(); let f64_ty = context.f64_type();
let i1x128_ty = i1_ty.vec_type(128);
let i8x16_ty = i8_ty.vec_type(16);
let i16x8_ty = i16_ty.vec_type(8);
let i32x4_ty = i32_ty.vec_type(4);
let i64x2_ty = i64_ty.vec_type(2);
let f32x4_ty = f32_ty.vec_type(4);
let f64x2_ty = f64_ty.vec_type(2);
let i8_ptr_ty = i8_ty.ptr_type(AddressSpace::Generic); let i8_ptr_ty = i8_ty.ptr_type(AddressSpace::Generic);
let i16_ptr_ty = i16_ty.ptr_type(AddressSpace::Generic); let i16_ptr_ty = i16_ty.ptr_type(AddressSpace::Generic);
let i32_ptr_ty = i32_ty.ptr_type(AddressSpace::Generic); let i32_ptr_ty = i32_ty.ptr_type(AddressSpace::Generic);
@ -154,6 +190,10 @@ impl Intrinsics {
let i64_ty_basic = i64_ty.as_basic_type_enum(); let i64_ty_basic = i64_ty.as_basic_type_enum();
let f32_ty_basic = f32_ty.as_basic_type_enum(); let f32_ty_basic = f32_ty.as_basic_type_enum();
let f64_ty_basic = f64_ty.as_basic_type_enum(); let f64_ty_basic = f64_ty.as_basic_type_enum();
let i8x16_ty_basic = i8x16_ty.as_basic_type_enum();
let i16x8_ty_basic = i16x8_ty.as_basic_type_enum();
let f32x4_ty_basic = f32x4_ty.as_basic_type_enum();
let f64x2_ty_basic = f64x2_ty.as_basic_type_enum();
let i8_ptr_ty_basic = i8_ptr_ty.as_basic_type_enum(); let i8_ptr_ty_basic = i8_ptr_ty.as_basic_type_enum();
let ctx_ty = context.opaque_struct_type("ctx"); let ctx_ty = context.opaque_struct_type("ctx");
@ -240,6 +280,9 @@ impl Intrinsics {
false, false,
); );
let ret_i8x16_take_i8x16_i8x16 = i8x16_ty.fn_type(&[i8x16_ty_basic, i8x16_ty_basic], false);
let ret_i16x8_take_i16x8_i16x8 = i16x8_ty.fn_type(&[i16x8_ty_basic, i16x8_ty_basic], false);
let ret_i32_take_i32_i1 = i32_ty.fn_type(&[i32_ty_basic, i1_ty_basic], false); let ret_i32_take_i32_i1 = i32_ty.fn_type(&[i32_ty_basic, i1_ty_basic], false);
let ret_i64_take_i64_i1 = i64_ty.fn_type(&[i64_ty_basic, i1_ty_basic], false); let ret_i64_take_i64_i1 = i64_ty.fn_type(&[i64_ty_basic, i1_ty_basic], false);
@ -248,9 +291,13 @@ impl Intrinsics {
let ret_f32_take_f32 = f32_ty.fn_type(&[f32_ty_basic], false); let ret_f32_take_f32 = f32_ty.fn_type(&[f32_ty_basic], false);
let ret_f64_take_f64 = f64_ty.fn_type(&[f64_ty_basic], false); let ret_f64_take_f64 = f64_ty.fn_type(&[f64_ty_basic], false);
let ret_f32x4_take_f32x4 = f32x4_ty.fn_type(&[f32x4_ty_basic], false);
let ret_f64x2_take_f64x2 = f64x2_ty.fn_type(&[f64x2_ty_basic], false);
let ret_f32_take_f32_f32 = f32_ty.fn_type(&[f32_ty_basic, f32_ty_basic], false); let ret_f32_take_f32_f32 = f32_ty.fn_type(&[f32_ty_basic, f32_ty_basic], false);
let ret_f64_take_f64_f64 = f64_ty.fn_type(&[f64_ty_basic, f64_ty_basic], false); let ret_f64_take_f64_f64 = f64_ty.fn_type(&[f64_ty_basic, f64_ty_basic], false);
let ret_f32x4_take_f32x4_f32x4 = f32x4_ty.fn_type(&[f32x4_ty_basic, f32x4_ty_basic], false);
let ret_f64x2_take_f64x2_f64x2 = f64x2_ty.fn_type(&[f64x2_ty_basic, f64x2_ty_basic], false);
let ret_i32_take_ctx_i32_i32 = i32_ty.fn_type( let ret_i32_take_ctx_i32_i32 = i32_ty.fn_type(
&[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic, i32_ty_basic], &[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic, i32_ty_basic],
@ -273,12 +320,34 @@ impl Intrinsics {
sqrt_f32: module.add_function("llvm.sqrt.f32", ret_f32_take_f32, None), sqrt_f32: module.add_function("llvm.sqrt.f32", ret_f32_take_f32, None),
sqrt_f64: module.add_function("llvm.sqrt.f64", ret_f64_take_f64, None), sqrt_f64: module.add_function("llvm.sqrt.f64", ret_f64_take_f64, None),
sqrt_f32x4: module.add_function("llvm.sqrt.v4f32", ret_f32x4_take_f32x4, None),
sqrt_f64x2: module.add_function("llvm.sqrt.v2f64", ret_f64x2_take_f64x2, None),
minimum_f32: module.add_function("llvm.minimum.f32", ret_f32_take_f32_f32, None), minimum_f32: module.add_function("llvm.minnum.f32", ret_f32_take_f32_f32, None),
minimum_f64: module.add_function("llvm.minimum.f64", ret_f64_take_f64_f64, None), minimum_f64: module.add_function("llvm.minnum.f64", ret_f64_take_f64_f64, None),
minimum_f32x4: module.add_function(
"llvm.minimum.v4f32",
ret_f32x4_take_f32x4_f32x4,
None,
),
minimum_f64x2: module.add_function(
"llvm.minimum.v2f64",
ret_f64x2_take_f64x2_f64x2,
None,
),
maximum_f32: module.add_function("llvm.maximum.f32", ret_f32_take_f32_f32, None), maximum_f32: module.add_function("llvm.maxnum.f32", ret_f32_take_f32_f32, None),
maximum_f64: module.add_function("llvm.maximum.f64", ret_f64_take_f64_f64, None), maximum_f64: module.add_function("llvm.maxnum.f64", ret_f64_take_f64_f64, None),
maximum_f32x4: module.add_function(
"llvm.maximum.v4f32",
ret_f32x4_take_f32x4_f32x4,
None,
),
maximum_f64x2: module.add_function(
"llvm.maximum.v2f64",
ret_f64x2_take_f64x2_f64x2,
None,
),
ceil_f32: module.add_function("llvm.ceil.f32", ret_f32_take_f32, None), ceil_f32: module.add_function("llvm.ceil.f32", ret_f32_take_f32, None),
ceil_f64: module.add_function("llvm.ceil.f64", ret_f64_take_f64, None), ceil_f64: module.add_function("llvm.ceil.f64", ret_f64_take_f64, None),
@ -294,10 +363,54 @@ impl Intrinsics {
fabs_f32: module.add_function("llvm.fabs.f32", ret_f32_take_f32, None), fabs_f32: module.add_function("llvm.fabs.f32", ret_f32_take_f32, None),
fabs_f64: module.add_function("llvm.fabs.f64", ret_f64_take_f64, None), fabs_f64: module.add_function("llvm.fabs.f64", ret_f64_take_f64, None),
fabs_f32x4: module.add_function("llvm.fabs.v4f32", ret_f32x4_take_f32x4, None),
fabs_f64x2: module.add_function("llvm.fabs.v2f64", ret_f64x2_take_f64x2, None),
copysign_f32: module.add_function("llvm.copysign.f32", ret_f32_take_f32_f32, None), copysign_f32: module.add_function("llvm.copysign.f32", ret_f32_take_f32_f32, None),
copysign_f64: module.add_function("llvm.copysign.f64", ret_f64_take_f64_f64, None), copysign_f64: module.add_function("llvm.copysign.f64", ret_f64_take_f64_f64, None),
sadd_sat_i8x16: module.add_function(
"llvm.sadd.sat.v16i8",
ret_i8x16_take_i8x16_i8x16,
None,
),
sadd_sat_i16x8: module.add_function(
"llvm.sadd.sat.v8i16",
ret_i16x8_take_i16x8_i16x8,
None,
),
uadd_sat_i8x16: module.add_function(
"llvm.uadd.sat.v16i8",
ret_i8x16_take_i8x16_i8x16,
None,
),
uadd_sat_i16x8: module.add_function(
"llvm.uadd.sat.v8i16",
ret_i16x8_take_i16x8_i16x8,
None,
),
ssub_sat_i8x16: module.add_function(
"llvm.ssub.sat.v16i8",
ret_i8x16_take_i8x16_i8x16,
None,
),
ssub_sat_i16x8: module.add_function(
"llvm.ssub.sat.v8i16",
ret_i16x8_take_i16x8_i16x8,
None,
),
usub_sat_i8x16: module.add_function(
"llvm.usub.sat.v16i8",
ret_i8x16_take_i8x16_i8x16,
None,
),
usub_sat_i16x8: module.add_function(
"llvm.usub.sat.v8i16",
ret_i16x8_take_i16x8_i16x8,
None,
),
expect_i1: module.add_function("llvm.expect.i1", ret_i1_take_i1_i1, None), expect_i1: module.add_function("llvm.expect.i1", ret_i1_take_i1_i1, None),
trap: module.add_function("llvm.trap", void_ty.fn_type(&[], false), None), trap: module.add_function("llvm.trap", void_ty.fn_type(&[], false), None),
@ -311,6 +424,14 @@ impl Intrinsics {
f32_ty, f32_ty,
f64_ty, f64_ty,
i1x128_ty,
i8x16_ty,
i16x8_ty,
i32x4_ty,
i64x2_ty,
f32x4_ty,
f64x2_ty,
i8_ptr_ty, i8_ptr_ty,
i16_ptr_ty, i16_ptr_ty,
i32_ptr_ty, i32_ptr_ty,

View File

@ -7,12 +7,7 @@ pub fn type_to_type(ty: WpType) -> Result<Type, BinaryReaderError> {
WpType::I64 => Type::I64, WpType::I64 => Type::I64,
WpType::F32 => Type::F32, WpType::F32 => Type::F32,
WpType::F64 => Type::F64, WpType::F64 => Type::F64,
WpType::V128 => { WpType::V128 => Type::V128,
return Err(BinaryReaderError {
message: "the wasmer llvm backend does not yet support the simd extension",
offset: -1isize as usize,
});
}
_ => { _ => {
return Err(BinaryReaderError { return Err(BinaryReaderError {
message: "that type is not supported as a wasmer type", message: "that type is not supported as a wasmer type",