From 70450305323ecd1055223110b0843d6f64861f08 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nicholas@mxc.ca>
Date: Thu, 11 Jul 2019 14:48:07 -0700
Subject: [PATCH 1/2] Try a new list of optimization passes.

A few notes:
a) the inliner doesn't help because all the calls are indirect and not even opt -O2 can figure out which functions they're actually calling.
b) aggressive instruction combining is not a super-set of the instruction combiner. Instcombine is made up of a large number (probably 10,000s) of patterns, and some particularly slow ones were taken out and moved to the aggressive instruction combiner. Aggressive instcombine *only* runs that handful of optimizations, which fired zero times on our example wasm files.
c) NewGVN is not ready for production, it has asserts that fire when building sqlite or cowsay. This is why sqlite didn't build with the llvm backend.
d) Scalar-replacement-of-aggregates (sroa) is a strict superset of promote-memory-to-registers (mem2reg), and you probably want sroa because it's usually faster. It also fires 10,000s more times than mem2reg on lua.wasm.
e) Aggressive-dead-code-elimination was only deleting as much regular dead-code-elimination, but is slower because it depends on a postdominator tree (PDT) analysis that. Other passes don't need PDT so we'll have to build it for just this one pass (as opposed to regular dominator-tree which is reused by many passes). I've replaced this with bit-tracking dead-code-elimination which deletes more code than dce/adce.
---
 lib/llvm-backend/src/code.rs | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/lib/llvm-backend/src/code.rs b/lib/llvm-backend/src/code.rs
index 7fe98805f..f1962639d 100644
--- a/lib/llvm-backend/src/code.rs
+++ b/lib/llvm-backend/src/code.rs
@@ -2583,13 +2583,19 @@ impl ModuleCodeGenerator<LLVMFunctionCodeGenerator, LLVMBackend, CodegenError>
         if cfg!(test) {
             pass_manager.add_verifier_pass();
         }
-        pass_manager.add_function_inlining_pass();
-        pass_manager.add_promote_memory_to_register_pass();
+        pass_manager.add_lower_expect_intrinsic_pass();
+        pass_manager.add_scalar_repl_aggregates_pass();
+        pass_manager.add_instruction_combining_pass();
         pass_manager.add_cfg_simplification_pass();
-        pass_manager.add_aggressive_inst_combiner_pass();
-        pass_manager.add_merged_load_store_motion_pass();
-        pass_manager.add_new_gvn_pass();
-        pass_manager.add_aggressive_dce_pass();
+        pass_manager.add_gvn_pass();
+        pass_manager.add_jump_threading_pass();
+        pass_manager.add_correlated_value_propagation_pass();
+        pass_manager.add_sccp_pass();
+        pass_manager.add_instruction_combining_pass();
+        pass_manager.add_reassociate_pass();
+        pass_manager.add_cfg_simplification_pass();
+        pass_manager.add_bit_tracking_dce_pass();
+        pass_manager.add_slp_vectorize_pass();
         pass_manager.run_on_module(&self.module);
 
         // self.module.print_to_stderr();

From 7fb88251d475eb8f0a3ef1e3d1c11f1223b9ed34 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nicholas@mxc.ca>
Date: Thu, 11 Jul 2019 16:24:09 -0700
Subject: [PATCH 2/2] Simplify trap_if_not_representable_as_int.

Fix typo in function name. Use two fcmp instructions instead of unpacking the bits of the IEEE float and using integer arithmetic to determine details about its value.
---
 lib/llvm-backend/src/code.rs | 121 ++++++++---------------------------
 1 file changed, 28 insertions(+), 93 deletions(-)

diff --git a/lib/llvm-backend/src/code.rs b/lib/llvm-backend/src/code.rs
index f1962639d..2eb8d441e 100644
--- a/lib/llvm-backend/src/code.rs
+++ b/lib/llvm-backend/src/code.rs
@@ -60,102 +60,37 @@ fn type_to_llvm(intrinsics: &Intrinsics, ty: Type) -> BasicTypeEnum {
     }
 }
 
-fn trap_if_not_representatable_as_int(
+fn trap_if_not_representable_as_int(
     builder: &Builder,
     intrinsics: &Intrinsics,
     context: &Context,
     function: &FunctionValue,
-    lower_bounds: f64,
+    lower_bound: f64,
     upper_bound: f64,
     value: FloatValue,
 ) {
-    enum FloatSize {
-        Bits32,
-        Bits64,
-    }
+    let float_ty = value.get_type();
+
+    let lower_bound = float_ty.const_float(lower_bound);
+    let upper_bound = float_ty.const_float(upper_bound);
+
+    // The 'U' in the float predicate is short for "unordered" which means that
+    // the comparison will compare true if either operand is a NaN. Thus, NaNs
+    // are out of bounds.
+    let above_upper_bound_cmp =
+        builder.build_float_compare(FloatPredicate::UGT, value, upper_bound, "above_upper_bound");
+    let below_lower_bound_cmp =
+        builder.build_float_compare(FloatPredicate::ULT, value, lower_bound, "below_lower_bound");
+    let out_of_bounds = builder.build_or(
+        above_upper_bound_cmp,
+        below_lower_bound_cmp,
+        "out_of_bounds",
+    );
 
     let failure_block = context.append_basic_block(function, "conversion_failure_block");
     let continue_block = context.append_basic_block(function, "conversion_success_block");
 
-    let float_ty = value.get_type();
-    let (int_ty, float_size) = if float_ty == intrinsics.f32_ty {
-        (intrinsics.i32_ty, FloatSize::Bits32)
-    } else if float_ty == intrinsics.f64_ty {
-        (intrinsics.i64_ty, FloatSize::Bits64)
-    } else {
-        unreachable!()
-    };
-
-    let (exponent, invalid_exponent) = {
-        let float_bits = builder
-            .build_bitcast(value, int_ty, "float_bits")
-            .into_int_value();
-        let (shift_amount, exponent_mask, invalid_exponent) = match float_size {
-            FloatSize::Bits32 => (23, 0b01111111100000000000000000000000, 0b11111111),
-            FloatSize::Bits64 => (
-                52,
-                0b0111111111110000000000000000000000000000000000000000000000000000,
-                0b11111111111,
-            ),
-        };
-
-        builder.build_and(
-            float_bits,
-            int_ty.const_int(exponent_mask, false),
-            "masked_bits",
-        );
-
-        (
-            builder.build_right_shift(
-                float_bits,
-                int_ty.const_int(shift_amount, false),
-                false,
-                "exponent",
-            ),
-            invalid_exponent,
-        )
-    };
-
-    let is_invalid_float = builder.build_or(
-        builder.build_int_compare(
-            IntPredicate::EQ,
-            exponent,
-            int_ty.const_int(invalid_exponent, false),
-            "is_not_normal",
-        ),
-        builder.build_or(
-            builder.build_float_compare(
-                FloatPredicate::ULT,
-                value,
-                float_ty.const_float(lower_bounds),
-                "less_than_lower_bounds",
-            ),
-            builder.build_float_compare(
-                FloatPredicate::UGT,
-                value,
-                float_ty.const_float(upper_bound),
-                "greater_than_upper_bounds",
-            ),
-            "float_not_in_bounds",
-        ),
-        "is_invalid_float",
-    );
-
-    let is_invalid_float = builder
-        .build_call(
-            intrinsics.expect_i1,
-            &[
-                is_invalid_float.as_basic_value_enum(),
-                intrinsics.i1_ty.const_int(0, false).as_basic_value_enum(),
-            ],
-            "is_invalid_float_expect",
-        )
-        .try_as_basic_value()
-        .left()
-        .unwrap()
-        .into_int_value();
-
-    builder.build_conditional_branch(is_invalid_float, &failure_block, &continue_block);
+    builder.build_conditional_branch(out_of_bounds, &failure_block, &continue_block);
     builder.position_at_end(&failure_block);
     builder.build_call(
         intrinsics.throw_trap,
@@ -1722,7 +1657,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32TruncSF32 => {
                 let v1 = state.pop1()?.into_float_value();
-                trap_if_not_representatable_as_int(
+                trap_if_not_representable_as_int(
                     builder,
                     intrinsics,
                     context,
@@ -1737,7 +1672,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32TruncSF64 => {
                 let v1 = state.pop1()?.into_float_value();
-                trap_if_not_representatable_as_int(
+                trap_if_not_representable_as_int(
                     builder,
                     intrinsics,
                     context,
@@ -1758,7 +1693,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I64TruncSF32 => {
                 let v1 = state.pop1()?.into_float_value();
-                trap_if_not_representatable_as_int(
+                trap_if_not_representable_as_int(
                     builder,
                     intrinsics,
                     context,
@@ -1773,7 +1708,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I64TruncSF64 => {
                 let v1 = state.pop1()?.into_float_value();
-                trap_if_not_representatable_as_int(
+                trap_if_not_representable_as_int(
                     builder,
                     intrinsics,
                     context,
@@ -1794,7 +1729,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32TruncUF32 => {
                 let v1 = state.pop1()?.into_float_value();
-                trap_if_not_representatable_as_int(
+                trap_if_not_representable_as_int(
                     builder,
                     intrinsics,
                     context,
@@ -1809,7 +1744,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32TruncUF64 => {
                 let v1 = state.pop1()?.into_float_value();
-                trap_if_not_representatable_as_int(
+                trap_if_not_representable_as_int(
                     builder,
                     intrinsics,
                     context,
@@ -1830,7 +1765,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I64TruncUF32 => {
                 let v1 = state.pop1()?.into_float_value();
-                trap_if_not_representatable_as_int(
+                trap_if_not_representable_as_int(
                     builder,
                     intrinsics,
                     context,
@@ -1845,7 +1780,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I64TruncUF64 => {
                 let v1 = state.pop1()?.into_float_value();
-                trap_if_not_representatable_as_int(
+                trap_if_not_representable_as_int(
                     builder,
                     intrinsics,
                     context,