Auto merge of #80200 - mahkoh:dst-offset, r=nagisa

bors · bors · commit dfdfaa1f0442 · 2021-01-07T03:13:21.000Z
Optimize DST field access

For

    struct X&lt;T: ?Sized&gt;(T)
    struct Y&lt;T: ?Sized&gt;(u8, T)

the offset of the unsized field is

    0
    mem::align_of_val(&amp;self.1)

respectively. This patch changes the expression used to compute these
offsets so that the optimizer can perform this optimization.

Consider

```rust
fn f(x: &amp;X&lt;dyn Any&gt;) -&gt; &amp;dyn Any {
    &amp;x.0
}
```

Before:

```asm
test:
	movq	%rsi, %rdx
	movq	16(%rsi), %rax
	leaq	-1(%rax), %rcx
	negq	%rax
	andq	%rcx, %rax
	addq	%rdi, %rax
	retq
```

After:

```asm
test:
	movq	%rsi, %rdx
	movq	%rdi, %rax
	retq
```
diff --git a/compiler/rustc_codegen_ssa/src/mir/place.rs b/compiler/rustc_codegen_ssa/src/mir/place.rs
@@ -178,16 +178,8 @@ impl<'a, 'tcx, V: CodegenObject> PlaceRef<'tcx, V> {
         // Get the alignment of the field
         let (_, unsized_align) = glue::size_and_align_of_dst(bx, field.ty, meta);
 
-        // Bump the unaligned offset up to the appropriate alignment using the
-        // following expression:
-        //
-        //     (unaligned offset + (align - 1)) & -align
-
-        // Calculate offset.
-        let align_sub_1 = bx.sub(unsized_align, bx.cx().const_usize(1u64));
-        let and_lhs = bx.add(unaligned_offset, align_sub_1);
-        let and_rhs = bx.neg(unsized_align);
-        let offset = bx.and(and_lhs, and_rhs);
+        // Bump the unaligned offset up to the appropriate alignment
+        let offset = round_up_const_value_to_alignment(bx, unaligned_offset, unsized_align);
 
         debug!("struct_field_ptr: DST field offset: {:?}", offset);
 
@@ -518,3 +510,45 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         self.monomorphize(place_ty.ty)
     }
 }
+
+fn round_up_const_value_to_alignment<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
+    bx: &mut Bx,
+    value: Bx::Value,
+    align: Bx::Value,
+) -> Bx::Value {
+    // In pseudo code:
+    //
+    //     if value & (align - 1) == 0 {
+    //         value
+    //     } else {
+    //         (value & !(align - 1)) + align
+    //     }
+    //
+    // Usually this is written without branches as
+    //
+    //     (value + align - 1) & !(align - 1)
+    //
+    // But this formula cannot take advantage of constant `value`. E.g. if `value` is known
+    // at compile time to be `1`, this expression should be optimized to `align`. However,
+    // optimization only holds if `align` is a power of two. Since the optimizer doesn't know
+    // that `align` is a power of two, it cannot perform this optimization.
+    //
+    // Instead we use
+    //
+    //     value + (-value & (align - 1))
+    //
+    // Since `align` is used only once, the expression can be optimized. For `value = 0`
+    // its optimized to `0` even in debug mode.
+    //
+    // NB: The previous version of this code used
+    //
+    //     (value + align - 1) & -align
+    //
+    // Even though `-align == !(align - 1)`, LLVM failed to optimize this even for
+    // `value = 0`. Bug report: https://bugs.llvm.org/show_bug.cgi?id=48559
+    let one = bx.const_usize(1);
+    let align_minus_1 = bx.sub(align, one);
+    let neg_value = bx.neg(value);
+    let offset = bx.and(neg_value, align_minus_1);
+    bx.add(value, offset)
+}