Skip to content

Penultimate wasm SIMD update (hopefully) #101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions clang/include/clang/Basic/BuiltinsWebAssembly.def
Original file line number Diff line number Diff line change
Expand Up @@ -191,13 +191,6 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16UcV8UsV8Us", "nc", "simd
TARGET_BUILTIN(__builtin_wasm_narrow_s_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4UiV4Ui", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_extend_low_s_i32x4_i64x2, "V2LLiV4i", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extend_high_s_i32x4_i64x2, "V2LLiV4i", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extend_low_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extend_high_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_convert_low_s_i32x4_f64x2, "V2dV4i", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_convert_low_u_i32x4_f64x2, "V2dV4Ui", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_demote_zero_f64x2_f32x4, "V4fV2d", "nc", "simd128")
Expand Down
70 changes: 20 additions & 50 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16756,8 +16756,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
{ResT, Src->getType()});
Function *Callee =
CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
return Builder.CreateCall(Callee, {Src});
}
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
Expand All @@ -16767,8 +16767,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
{ResT, Src->getType()});
Function *Callee =
CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
return Builder.CreateCall(Callee, {Src});
}
case WebAssembly::BI__builtin_wasm_min_f32:
Expand Down Expand Up @@ -16819,19 +16819,19 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
switch (BuiltinID) {
case WebAssembly::BI__builtin_wasm_ceil_f32x4:
case WebAssembly::BI__builtin_wasm_ceil_f64x2:
IntNo = Intrinsic::wasm_ceil;
IntNo = Intrinsic::ceil;
break;
case WebAssembly::BI__builtin_wasm_floor_f32x4:
case WebAssembly::BI__builtin_wasm_floor_f64x2:
IntNo = Intrinsic::wasm_floor;
IntNo = Intrinsic::floor;
break;
case WebAssembly::BI__builtin_wasm_trunc_f32x4:
case WebAssembly::BI__builtin_wasm_trunc_f64x2:
IntNo = Intrinsic::wasm_trunc;
IntNo = Intrinsic::trunc;
break;
case WebAssembly::BI__builtin_wasm_nearest_f32x4:
case WebAssembly::BI__builtin_wasm_nearest_f64x2:
IntNo = Intrinsic::wasm_nearest;
IntNo = Intrinsic::nearbyint;
break;
default:
llvm_unreachable("unexpected builtin ID");
Expand Down Expand Up @@ -17158,58 +17158,28 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
return Builder.CreateCall(Callee, {Low, High});
}
case WebAssembly::BI__builtin_wasm_extend_low_s_i32x4_i64x2:
case WebAssembly::BI__builtin_wasm_extend_high_s_i32x4_i64x2:
case WebAssembly::BI__builtin_wasm_extend_low_u_i32x4_i64x2:
case WebAssembly::BI__builtin_wasm_extend_high_u_i32x4_i64x2: {
Value *Vec = EmitScalarExpr(E->getArg(0));
unsigned IntNo;
switch (BuiltinID) {
case WebAssembly::BI__builtin_wasm_extend_low_s_i32x4_i64x2:
IntNo = Intrinsic::wasm_extend_low_signed;
break;
case WebAssembly::BI__builtin_wasm_extend_high_s_i32x4_i64x2:
IntNo = Intrinsic::wasm_extend_high_signed;
break;
case WebAssembly::BI__builtin_wasm_extend_low_u_i32x4_i64x2:
IntNo = Intrinsic::wasm_extend_low_unsigned;
break;
case WebAssembly::BI__builtin_wasm_extend_high_u_i32x4_i64x2:
IntNo = Intrinsic::wasm_extend_high_unsigned;
break;
}
Function *Callee = CGM.getIntrinsic(IntNo);
return Builder.CreateCall(Callee, Vec);
}
case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2:
case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2: {
Value *Vec = EmitScalarExpr(E->getArg(0));
unsigned IntNo;
switch (BuiltinID) {
case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2:
IntNo = Intrinsic::wasm_convert_low_signed;
break;
case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2:
IntNo = Intrinsic::wasm_convert_low_unsigned;
break;
}
Function *Callee = CGM.getIntrinsic(IntNo);
return Builder.CreateCall(Callee, Vec);
}
case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4:
case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: {
Value *Vec = EmitScalarExpr(E->getArg(0));
unsigned IntNo;
switch (BuiltinID) {
case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4:
IntNo = Intrinsic::wasm_trunc_sat_zero_signed;
IntNo = Intrinsic::fptosi_sat;
break;
case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4:
IntNo = Intrinsic::wasm_trunc_sat_zero_unsigned;
IntNo = Intrinsic::fptoui_sat;
break;
}
Function *Callee = CGM.getIntrinsic(IntNo);
return Builder.CreateCall(Callee, Vec);
llvm::Type *SrcT = Vec->getType();
llvm::Type *TruncT =
SrcT->getWithNewType(llvm::IntegerType::get(getLLVMContext(), 32));
Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
Value *Trunc = Builder.CreateCall(Callee, Vec);
Value *Splat = Builder.CreateVectorSplat(2, Builder.getInt32(0));
Value *ConcatMask =
llvm::ConstantVector::get({Builder.getInt32(0), Builder.getInt32(1),
Builder.getInt32(2), Builder.getInt32(3)});
return Builder.CreateShuffleVector(Trunc, Splat, ConcatMask);
}
case WebAssembly::BI__builtin_wasm_demote_zero_f64x2_f32x4: {
Value *Vec = EmitScalarExpr(E->getArg(0));
Expand Down
82 changes: 24 additions & 58 deletions clang/test/CodeGen/builtins-wasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,49 +123,49 @@ long long trunc_u_i64_f64(double f) {

int trunc_saturate_s_i32_f32(float f) {
return __builtin_wasm_trunc_saturate_s_i32_f32(f);
// WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float %f)
// WEBASSEMBLY: call i32 @llvm.fptosi.sat.i32.f32(float %f)
// WEBASSEMBLY-NEXT: ret
}

int trunc_saturate_u_i32_f32(float f) {
return __builtin_wasm_trunc_saturate_u_i32_f32(f);
// WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float %f)
// WEBASSEMBLY: call i32 @llvm.fptoui.sat.i32.f32(float %f)
// WEBASSEMBLY-NEXT: ret
}

int trunc_saturate_s_i32_f64(double f) {
return __builtin_wasm_trunc_saturate_s_i32_f64(f);
// WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double %f)
// WEBASSEMBLY: call i32 @llvm.fptosi.sat.i32.f64(double %f)
// WEBASSEMBLY-NEXT: ret
}

int trunc_saturate_u_i32_f64(double f) {
return __builtin_wasm_trunc_saturate_u_i32_f64(f);
// WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double %f)
// WEBASSEMBLY: call i32 @llvm.fptoui.sat.i32.f64(double %f)
// WEBASSEMBLY-NEXT: ret
}

long long trunc_saturate_s_i64_f32(float f) {
return __builtin_wasm_trunc_saturate_s_i64_f32(f);
// WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float %f)
// WEBASSEMBLY: call i64 @llvm.fptosi.sat.i64.f32(float %f)
// WEBASSEMBLY-NEXT: ret
}

long long trunc_saturate_u_i64_f32(float f) {
return __builtin_wasm_trunc_saturate_u_i64_f32(f);
// WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float %f)
// WEBASSEMBLY: call i64 @llvm.fptoui.sat.i64.f32(float %f)
// WEBASSEMBLY-NEXT: ret
}

long long trunc_saturate_s_i64_f64(double f) {
return __builtin_wasm_trunc_saturate_s_i64_f64(f);
// WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double %f)
// WEBASSEMBLY: call i64 @llvm.fptosi.sat.i64.f64(double %f)
// WEBASSEMBLY-NEXT: ret
}

long long trunc_saturate_u_i64_f64(double f) {
return __builtin_wasm_trunc_saturate_u_i64_f64(f);
// WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double %f)
// WEBASSEMBLY: call i64 @llvm.fptoui.sat.i64.f64(double %f)
// WEBASSEMBLY-NEXT: ret
}

Expand Down Expand Up @@ -792,49 +792,49 @@ f64x2 pmax_f64x2(f64x2 x, f64x2 y) {

f32x4 ceil_f32x4(f32x4 x) {
return __builtin_wasm_ceil_f32x4(x);
// WEBASSEMBLY: call <4 x float> @llvm.wasm.ceil.v4f32(<4 x float> %x)
// WEBASSEMBLY: call <4 x float> @llvm.ceil.v4f32(<4 x float> %x)
// WEBASSEMBLY: ret
}

f32x4 floor_f32x4(f32x4 x) {
return __builtin_wasm_floor_f32x4(x);
// WEBASSEMBLY: call <4 x float> @llvm.wasm.floor.v4f32(<4 x float> %x)
// WEBASSEMBLY: call <4 x float> @llvm.floor.v4f32(<4 x float> %x)
// WEBASSEMBLY: ret
}

f32x4 trunc_f32x4(f32x4 x) {
return __builtin_wasm_trunc_f32x4(x);
// WEBASSEMBLY: call <4 x float> @llvm.wasm.trunc.v4f32(<4 x float> %x)
// WEBASSEMBLY: call <4 x float> @llvm.trunc.v4f32(<4 x float> %x)
// WEBASSEMBLY: ret
}

f32x4 nearest_f32x4(f32x4 x) {
return __builtin_wasm_nearest_f32x4(x);
// WEBASSEMBLY: call <4 x float> @llvm.wasm.nearest.v4f32(<4 x float> %x)
// WEBASSEMBLY: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x)
// WEBASSEMBLY: ret
}

f64x2 ceil_f64x2(f64x2 x) {
return __builtin_wasm_ceil_f64x2(x);
// WEBASSEMBLY: call <2 x double> @llvm.wasm.ceil.v2f64(<2 x double> %x)
// WEBASSEMBLY: call <2 x double> @llvm.ceil.v2f64(<2 x double> %x)
// WEBASSEMBLY: ret
}

f64x2 floor_f64x2(f64x2 x) {
return __builtin_wasm_floor_f64x2(x);
// WEBASSEMBLY: call <2 x double> @llvm.wasm.floor.v2f64(<2 x double> %x)
// WEBASSEMBLY: call <2 x double> @llvm.floor.v2f64(<2 x double> %x)
// WEBASSEMBLY: ret
}

f64x2 trunc_f64x2(f64x2 x) {
return __builtin_wasm_trunc_f64x2(x);
// WEBASSEMBLY: call <2 x double> @llvm.wasm.trunc.v2f64(<2 x double> %x)
// WEBASSEMBLY: call <2 x double> @llvm.trunc.v2f64(<2 x double> %x)
// WEBASSEMBLY: ret
}

f64x2 nearest_f64x2(f64x2 x) {
return __builtin_wasm_nearest_f64x2(x);
// WEBASSEMBLY: call <2 x double> @llvm.wasm.nearest.v2f64(<2 x double> %x)
// WEBASSEMBLY: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %x)
// WEBASSEMBLY: ret
}

Expand All @@ -852,13 +852,13 @@ f64x2 sqrt_f64x2(f64x2 x) {

i32x4 trunc_saturate_s_i32x4_f32x4(f32x4 f) {
return __builtin_wasm_trunc_saturate_s_i32x4_f32x4(f);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.saturate.signed.v4i32.v4f32(<4 x float> %f)
// WEBASSEMBLY: call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %f)
// WEBASSEMBLY-NEXT: ret
}

i32x4 trunc_saturate_u_i32x4_f32x4(f32x4 f) {
return __builtin_wasm_trunc_saturate_u_i32x4_f32x4(f);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.saturate.unsigned.v4i32.v4f32(<4 x float> %f)
// WEBASSEMBLY: call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f)
// WEBASSEMBLY-NEXT: ret
}

Expand Down Expand Up @@ -890,52 +890,18 @@ u16x8 narrow_u_i16x8_i32x4(u32x4 low, u32x4 high) {
// WEBASSEMBLY: ret
}

i64x2 extend_low_s_i32x4_i64x2(i32x4 x) {
return __builtin_wasm_extend_low_s_i32x4_i64x2(x);
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.low.signed(<4 x i32> %x)
// WEBASSEMBLY: ret
}

i64x2 extend_high_s_i32x4_i64x2(i32x4 x) {
return __builtin_wasm_extend_high_s_i32x4_i64x2(x);
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.high.signed(<4 x i32> %x)
// WEBASSEMBLY: ret
}

u64x2 extend_low_u_i32x4_i64x2(u32x4 x) {
return __builtin_wasm_extend_low_u_i32x4_i64x2(x);
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.low.unsigned(<4 x i32> %x)
// WEBASSEMBLY: ret
}

u64x2 extend_high_u_i32x4_i64x2(u32x4 x) {
return __builtin_wasm_extend_high_u_i32x4_i64x2(x);
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.high.unsigned(<4 x i32> %x)
// WEBASSEMBLY: ret
}

f64x2 convert_low_s_i32x4_f64x2(i32x4 x) {
return __builtin_wasm_convert_low_s_i32x4_f64x2(x);
// WEBASSEMBLY: call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %x)
// WEBASSEMBLY: ret
}

f64x2 convert_low_u_i32x4_f64x2(u32x4 x) {
return __builtin_wasm_convert_low_u_i32x4_f64x2(x);
// WEBASSEMBLY: call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %x)
// WEBASSEMBLY: ret
}

i32x4 trunc_sat_zero_s_f64x2_i32x4(f64x2 x) {
return __builtin_wasm_trunc_sat_zero_s_f64x2_i32x4(x);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.signed(<2 x double> %x)
// WEBASSEMBLY: ret
// WEBASSEMBLY: %0 = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %x)
// WEBASSEMBLY: %1 = shufflevector <2 x i32> %0, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// WEBASSEMBLY: ret <4 x i32> %1
}

u32x4 trunc_sat_zero_u_f64x2_i32x4(f64x2 x) {
return __builtin_wasm_trunc_sat_zero_u_f64x2_i32x4(x);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.unsigned(<2 x double> %x)
// WEBASSEMBLY: ret
// WEBASSEMBLY: %0 = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %x)
// WEBASSEMBLY: %1 = shufflevector <2 x i32> %0, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// WEBASSEMBLY: ret <4 x i32> %1
}

f32x4 wasm_demote_zero_f64x2_f32x4(f64x2 x) {
Expand Down
43 changes: 0 additions & 43 deletions llvm/include/llvm/IR/IntrinsicsWebAssembly.td
Original file line number Diff line number Diff line change
Expand Up @@ -157,17 +157,6 @@ def int_wasm_narrow_unsigned :
[llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem, IntrSpeculatable]>;

// TODO: Replace these intrinsics with normal ISel patterns once i32x4 to i64x2
// extending is merged to the proposal.
def int_wasm_extend_low_signed :
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_wasm_extend_high_signed :
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_wasm_extend_low_unsigned :
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_wasm_extend_high_unsigned :
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;

def int_wasm_q15mulr_sat_signed :
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_v8i16_ty],
Expand All @@ -183,26 +172,6 @@ def int_wasm_pmax :
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;

// TODO: Replace these instrinsics with normal ISel patterns once the
// rounding instructions are merged to the proposal
// (https://github.com/WebAssembly/simd/pull/232).
def int_wasm_ceil :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_floor :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_trunc :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_nearest :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;

// TODO: Replace these intrinsic with normal ISel patterns once the
// load_zero instructions are merged to the proposal.
def int_wasm_load32_zero :
Expand Down Expand Up @@ -295,18 +264,6 @@ def int_wasm_extadd_pairwise_unsigned :
[IntrNoMem, IntrSpeculatable]>;

// TODO: Remove these if possible if they are merged to the spec.
def int_wasm_convert_low_signed :
Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_convert_low_unsigned :
Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_trunc_sat_zero_signed :
Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_trunc_sat_zero_unsigned :
Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_demote_zero :
Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty],
[IntrNoMem, IntrSpeculatable]>;
Expand Down
Loading