mirror of
https://github.com/fluencelabs/wasmer
synced 2024-12-13 06:15:33 +00:00
Initial implementation of SIMD in the LLVM backend.
This commit is contained in:
parent
3ba8100e98
commit
fdc13563b8
File diff suppressed because it is too large
Load Diff
@ -3,7 +3,9 @@ use inkwell::{
|
|||||||
builder::Builder,
|
builder::Builder,
|
||||||
context::Context,
|
context::Context,
|
||||||
module::Module,
|
module::Module,
|
||||||
types::{BasicType, FloatType, FunctionType, IntType, PointerType, StructType, VoidType},
|
types::{
|
||||||
|
BasicType, FloatType, FunctionType, IntType, PointerType, StructType, VectorType, VoidType,
|
||||||
|
},
|
||||||
values::{BasicValue, BasicValueEnum, FloatValue, FunctionValue, IntValue, PointerValue},
|
values::{BasicValue, BasicValueEnum, FloatValue, FunctionValue, IntValue, PointerValue},
|
||||||
AddressSpace,
|
AddressSpace,
|
||||||
};
|
};
|
||||||
@ -41,12 +43,18 @@ pub struct Intrinsics {
|
|||||||
|
|
||||||
pub sqrt_f32: FunctionValue,
|
pub sqrt_f32: FunctionValue,
|
||||||
pub sqrt_f64: FunctionValue,
|
pub sqrt_f64: FunctionValue,
|
||||||
|
pub sqrt_f32x4: FunctionValue,
|
||||||
|
pub sqrt_f64x2: FunctionValue,
|
||||||
|
|
||||||
pub minimum_f32: FunctionValue,
|
pub minimum_f32: FunctionValue,
|
||||||
pub minimum_f64: FunctionValue,
|
pub minimum_f64: FunctionValue,
|
||||||
|
pub minimum_f32x4: FunctionValue,
|
||||||
|
pub minimum_f64x2: FunctionValue,
|
||||||
|
|
||||||
pub maximum_f32: FunctionValue,
|
pub maximum_f32: FunctionValue,
|
||||||
pub maximum_f64: FunctionValue,
|
pub maximum_f64: FunctionValue,
|
||||||
|
pub maximum_f32x4: FunctionValue,
|
||||||
|
pub maximum_f64x2: FunctionValue,
|
||||||
|
|
||||||
pub ceil_f32: FunctionValue,
|
pub ceil_f32: FunctionValue,
|
||||||
pub ceil_f64: FunctionValue,
|
pub ceil_f64: FunctionValue,
|
||||||
@ -62,10 +70,22 @@ pub struct Intrinsics {
|
|||||||
|
|
||||||
pub fabs_f32: FunctionValue,
|
pub fabs_f32: FunctionValue,
|
||||||
pub fabs_f64: FunctionValue,
|
pub fabs_f64: FunctionValue,
|
||||||
|
pub fabs_f32x4: FunctionValue,
|
||||||
|
pub fabs_f64x2: FunctionValue,
|
||||||
|
|
||||||
pub copysign_f32: FunctionValue,
|
pub copysign_f32: FunctionValue,
|
||||||
pub copysign_f64: FunctionValue,
|
pub copysign_f64: FunctionValue,
|
||||||
|
|
||||||
|
pub sadd_sat_i8x16: FunctionValue,
|
||||||
|
pub sadd_sat_i16x8: FunctionValue,
|
||||||
|
pub uadd_sat_i8x16: FunctionValue,
|
||||||
|
pub uadd_sat_i16x8: FunctionValue,
|
||||||
|
|
||||||
|
pub ssub_sat_i8x16: FunctionValue,
|
||||||
|
pub ssub_sat_i16x8: FunctionValue,
|
||||||
|
pub usub_sat_i8x16: FunctionValue,
|
||||||
|
pub usub_sat_i16x8: FunctionValue,
|
||||||
|
|
||||||
pub expect_i1: FunctionValue,
|
pub expect_i1: FunctionValue,
|
||||||
pub trap: FunctionValue,
|
pub trap: FunctionValue,
|
||||||
|
|
||||||
@ -79,6 +99,14 @@ pub struct Intrinsics {
|
|||||||
pub f32_ty: FloatType,
|
pub f32_ty: FloatType,
|
||||||
pub f64_ty: FloatType,
|
pub f64_ty: FloatType,
|
||||||
|
|
||||||
|
pub i1x128_ty: VectorType,
|
||||||
|
pub i8x16_ty: VectorType,
|
||||||
|
pub i16x8_ty: VectorType,
|
||||||
|
pub i32x4_ty: VectorType,
|
||||||
|
pub i64x2_ty: VectorType,
|
||||||
|
pub f32x4_ty: VectorType,
|
||||||
|
pub f64x2_ty: VectorType,
|
||||||
|
|
||||||
pub i8_ptr_ty: PointerType,
|
pub i8_ptr_ty: PointerType,
|
||||||
pub i16_ptr_ty: PointerType,
|
pub i16_ptr_ty: PointerType,
|
||||||
pub i32_ptr_ty: PointerType,
|
pub i32_ptr_ty: PointerType,
|
||||||
@ -134,6 +162,14 @@ impl Intrinsics {
|
|||||||
let f32_ty = context.f32_type();
|
let f32_ty = context.f32_type();
|
||||||
let f64_ty = context.f64_type();
|
let f64_ty = context.f64_type();
|
||||||
|
|
||||||
|
let i1x128_ty = i1_ty.vec_type(128);
|
||||||
|
let i8x16_ty = i8_ty.vec_type(16);
|
||||||
|
let i16x8_ty = i16_ty.vec_type(8);
|
||||||
|
let i32x4_ty = i32_ty.vec_type(4);
|
||||||
|
let i64x2_ty = i64_ty.vec_type(2);
|
||||||
|
let f32x4_ty = f32_ty.vec_type(4);
|
||||||
|
let f64x2_ty = f64_ty.vec_type(2);
|
||||||
|
|
||||||
let i8_ptr_ty = i8_ty.ptr_type(AddressSpace::Generic);
|
let i8_ptr_ty = i8_ty.ptr_type(AddressSpace::Generic);
|
||||||
let i16_ptr_ty = i16_ty.ptr_type(AddressSpace::Generic);
|
let i16_ptr_ty = i16_ty.ptr_type(AddressSpace::Generic);
|
||||||
let i32_ptr_ty = i32_ty.ptr_type(AddressSpace::Generic);
|
let i32_ptr_ty = i32_ty.ptr_type(AddressSpace::Generic);
|
||||||
@ -154,6 +190,10 @@ impl Intrinsics {
|
|||||||
let i64_ty_basic = i64_ty.as_basic_type_enum();
|
let i64_ty_basic = i64_ty.as_basic_type_enum();
|
||||||
let f32_ty_basic = f32_ty.as_basic_type_enum();
|
let f32_ty_basic = f32_ty.as_basic_type_enum();
|
||||||
let f64_ty_basic = f64_ty.as_basic_type_enum();
|
let f64_ty_basic = f64_ty.as_basic_type_enum();
|
||||||
|
let i8x16_ty_basic = i8x16_ty.as_basic_type_enum();
|
||||||
|
let i16x8_ty_basic = i16x8_ty.as_basic_type_enum();
|
||||||
|
let f32x4_ty_basic = f32x4_ty.as_basic_type_enum();
|
||||||
|
let f64x2_ty_basic = f64x2_ty.as_basic_type_enum();
|
||||||
let i8_ptr_ty_basic = i8_ptr_ty.as_basic_type_enum();
|
let i8_ptr_ty_basic = i8_ptr_ty.as_basic_type_enum();
|
||||||
|
|
||||||
let ctx_ty = context.opaque_struct_type("ctx");
|
let ctx_ty = context.opaque_struct_type("ctx");
|
||||||
@ -240,6 +280,9 @@ impl Intrinsics {
|
|||||||
false,
|
false,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let ret_i8x16_take_i8x16_i8x16 = i8x16_ty.fn_type(&[i8x16_ty_basic, i8x16_ty_basic], false);
|
||||||
|
let ret_i16x8_take_i16x8_i16x8 = i16x8_ty.fn_type(&[i16x8_ty_basic, i16x8_ty_basic], false);
|
||||||
|
|
||||||
let ret_i32_take_i32_i1 = i32_ty.fn_type(&[i32_ty_basic, i1_ty_basic], false);
|
let ret_i32_take_i32_i1 = i32_ty.fn_type(&[i32_ty_basic, i1_ty_basic], false);
|
||||||
let ret_i64_take_i64_i1 = i64_ty.fn_type(&[i64_ty_basic, i1_ty_basic], false);
|
let ret_i64_take_i64_i1 = i64_ty.fn_type(&[i64_ty_basic, i1_ty_basic], false);
|
||||||
|
|
||||||
@ -248,9 +291,13 @@ impl Intrinsics {
|
|||||||
|
|
||||||
let ret_f32_take_f32 = f32_ty.fn_type(&[f32_ty_basic], false);
|
let ret_f32_take_f32 = f32_ty.fn_type(&[f32_ty_basic], false);
|
||||||
let ret_f64_take_f64 = f64_ty.fn_type(&[f64_ty_basic], false);
|
let ret_f64_take_f64 = f64_ty.fn_type(&[f64_ty_basic], false);
|
||||||
|
let ret_f32x4_take_f32x4 = f32x4_ty.fn_type(&[f32x4_ty_basic], false);
|
||||||
|
let ret_f64x2_take_f64x2 = f64x2_ty.fn_type(&[f64x2_ty_basic], false);
|
||||||
|
|
||||||
let ret_f32_take_f32_f32 = f32_ty.fn_type(&[f32_ty_basic, f32_ty_basic], false);
|
let ret_f32_take_f32_f32 = f32_ty.fn_type(&[f32_ty_basic, f32_ty_basic], false);
|
||||||
let ret_f64_take_f64_f64 = f64_ty.fn_type(&[f64_ty_basic, f64_ty_basic], false);
|
let ret_f64_take_f64_f64 = f64_ty.fn_type(&[f64_ty_basic, f64_ty_basic], false);
|
||||||
|
let ret_f32x4_take_f32x4_f32x4 = f32x4_ty.fn_type(&[f32x4_ty_basic, f32x4_ty_basic], false);
|
||||||
|
let ret_f64x2_take_f64x2_f64x2 = f64x2_ty.fn_type(&[f64x2_ty_basic, f64x2_ty_basic], false);
|
||||||
|
|
||||||
let ret_i32_take_ctx_i32_i32 = i32_ty.fn_type(
|
let ret_i32_take_ctx_i32_i32 = i32_ty.fn_type(
|
||||||
&[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic, i32_ty_basic],
|
&[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic, i32_ty_basic],
|
||||||
@ -273,12 +320,34 @@ impl Intrinsics {
|
|||||||
|
|
||||||
sqrt_f32: module.add_function("llvm.sqrt.f32", ret_f32_take_f32, None),
|
sqrt_f32: module.add_function("llvm.sqrt.f32", ret_f32_take_f32, None),
|
||||||
sqrt_f64: module.add_function("llvm.sqrt.f64", ret_f64_take_f64, None),
|
sqrt_f64: module.add_function("llvm.sqrt.f64", ret_f64_take_f64, None),
|
||||||
|
sqrt_f32x4: module.add_function("llvm.sqrt.v4f32", ret_f32x4_take_f32x4, None),
|
||||||
|
sqrt_f64x2: module.add_function("llvm.sqrt.v2f64", ret_f64x2_take_f64x2, None),
|
||||||
|
|
||||||
minimum_f32: module.add_function("llvm.minimum.f32", ret_f32_take_f32_f32, None),
|
minimum_f32: module.add_function("llvm.minnum.f32", ret_f32_take_f32_f32, None),
|
||||||
minimum_f64: module.add_function("llvm.minimum.f64", ret_f64_take_f64_f64, None),
|
minimum_f64: module.add_function("llvm.minnum.f64", ret_f64_take_f64_f64, None),
|
||||||
|
minimum_f32x4: module.add_function(
|
||||||
|
"llvm.minimum.v4f32",
|
||||||
|
ret_f32x4_take_f32x4_f32x4,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
minimum_f64x2: module.add_function(
|
||||||
|
"llvm.minimum.v2f64",
|
||||||
|
ret_f64x2_take_f64x2_f64x2,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
|
||||||
maximum_f32: module.add_function("llvm.maximum.f32", ret_f32_take_f32_f32, None),
|
maximum_f32: module.add_function("llvm.maxnum.f32", ret_f32_take_f32_f32, None),
|
||||||
maximum_f64: module.add_function("llvm.maximum.f64", ret_f64_take_f64_f64, None),
|
maximum_f64: module.add_function("llvm.maxnum.f64", ret_f64_take_f64_f64, None),
|
||||||
|
maximum_f32x4: module.add_function(
|
||||||
|
"llvm.maximum.v4f32",
|
||||||
|
ret_f32x4_take_f32x4_f32x4,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
maximum_f64x2: module.add_function(
|
||||||
|
"llvm.maximum.v2f64",
|
||||||
|
ret_f64x2_take_f64x2_f64x2,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
|
||||||
ceil_f32: module.add_function("llvm.ceil.f32", ret_f32_take_f32, None),
|
ceil_f32: module.add_function("llvm.ceil.f32", ret_f32_take_f32, None),
|
||||||
ceil_f64: module.add_function("llvm.ceil.f64", ret_f64_take_f64, None),
|
ceil_f64: module.add_function("llvm.ceil.f64", ret_f64_take_f64, None),
|
||||||
@ -294,10 +363,54 @@ impl Intrinsics {
|
|||||||
|
|
||||||
fabs_f32: module.add_function("llvm.fabs.f32", ret_f32_take_f32, None),
|
fabs_f32: module.add_function("llvm.fabs.f32", ret_f32_take_f32, None),
|
||||||
fabs_f64: module.add_function("llvm.fabs.f64", ret_f64_take_f64, None),
|
fabs_f64: module.add_function("llvm.fabs.f64", ret_f64_take_f64, None),
|
||||||
|
fabs_f32x4: module.add_function("llvm.fabs.v4f32", ret_f32x4_take_f32x4, None),
|
||||||
|
fabs_f64x2: module.add_function("llvm.fabs.v2f64", ret_f64x2_take_f64x2, None),
|
||||||
|
|
||||||
copysign_f32: module.add_function("llvm.copysign.f32", ret_f32_take_f32_f32, None),
|
copysign_f32: module.add_function("llvm.copysign.f32", ret_f32_take_f32_f32, None),
|
||||||
copysign_f64: module.add_function("llvm.copysign.f64", ret_f64_take_f64_f64, None),
|
copysign_f64: module.add_function("llvm.copysign.f64", ret_f64_take_f64_f64, None),
|
||||||
|
|
||||||
|
sadd_sat_i8x16: module.add_function(
|
||||||
|
"llvm.sadd.sat.v16i8",
|
||||||
|
ret_i8x16_take_i8x16_i8x16,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
sadd_sat_i16x8: module.add_function(
|
||||||
|
"llvm.sadd.sat.v8i16",
|
||||||
|
ret_i16x8_take_i16x8_i16x8,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
uadd_sat_i8x16: module.add_function(
|
||||||
|
"llvm.uadd.sat.v16i8",
|
||||||
|
ret_i8x16_take_i8x16_i8x16,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
uadd_sat_i16x8: module.add_function(
|
||||||
|
"llvm.uadd.sat.v8i16",
|
||||||
|
ret_i16x8_take_i16x8_i16x8,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
|
||||||
|
ssub_sat_i8x16: module.add_function(
|
||||||
|
"llvm.ssub.sat.v16i8",
|
||||||
|
ret_i8x16_take_i8x16_i8x16,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
ssub_sat_i16x8: module.add_function(
|
||||||
|
"llvm.ssub.sat.v8i16",
|
||||||
|
ret_i16x8_take_i16x8_i16x8,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
usub_sat_i8x16: module.add_function(
|
||||||
|
"llvm.usub.sat.v16i8",
|
||||||
|
ret_i8x16_take_i8x16_i8x16,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
usub_sat_i16x8: module.add_function(
|
||||||
|
"llvm.usub.sat.v8i16",
|
||||||
|
ret_i16x8_take_i16x8_i16x8,
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
|
||||||
expect_i1: module.add_function("llvm.expect.i1", ret_i1_take_i1_i1, None),
|
expect_i1: module.add_function("llvm.expect.i1", ret_i1_take_i1_i1, None),
|
||||||
trap: module.add_function("llvm.trap", void_ty.fn_type(&[], false), None),
|
trap: module.add_function("llvm.trap", void_ty.fn_type(&[], false), None),
|
||||||
|
|
||||||
@ -311,6 +424,14 @@ impl Intrinsics {
|
|||||||
f32_ty,
|
f32_ty,
|
||||||
f64_ty,
|
f64_ty,
|
||||||
|
|
||||||
|
i1x128_ty,
|
||||||
|
i8x16_ty,
|
||||||
|
i16x8_ty,
|
||||||
|
i32x4_ty,
|
||||||
|
i64x2_ty,
|
||||||
|
f32x4_ty,
|
||||||
|
f64x2_ty,
|
||||||
|
|
||||||
i8_ptr_ty,
|
i8_ptr_ty,
|
||||||
i16_ptr_ty,
|
i16_ptr_ty,
|
||||||
i32_ptr_ty,
|
i32_ptr_ty,
|
||||||
|
@ -7,12 +7,7 @@ pub fn type_to_type(ty: WpType) -> Result<Type, BinaryReaderError> {
|
|||||||
WpType::I64 => Type::I64,
|
WpType::I64 => Type::I64,
|
||||||
WpType::F32 => Type::F32,
|
WpType::F32 => Type::F32,
|
||||||
WpType::F64 => Type::F64,
|
WpType::F64 => Type::F64,
|
||||||
WpType::V128 => {
|
WpType::V128 => Type::V128,
|
||||||
return Err(BinaryReaderError {
|
|
||||||
message: "the wasmer llvm backend does not yet support the simd extension",
|
|
||||||
offset: -1isize as usize,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
_ => {
|
_ => {
|
||||||
return Err(BinaryReaderError {
|
return Err(BinaryReaderError {
|
||||||
message: "that type is not supported as a wasmer type",
|
message: "that type is not supported as a wasmer type",
|
||||||
|
Loading…
Reference in New Issue
Block a user