Skip to content

Commit dfdfaa1

Browse files
committed
Auto merge of #80200 - mahkoh:dst-offset, r=nagisa
Optimize DST field access For struct X<T: ?Sized>(T) struct Y<T: ?Sized>(u8, T) the offset of the unsized field is 0 mem::align_of_val(&self.1) respectively. This patch changes the expression used to compute these offsets so that the optimizer can perform this optimization. Consider ```rust fn f(x: &X<dyn Any>) -> &dyn Any { &x.0 } ``` Before: ```asm test: movq %rsi, %rdx movq 16(%rsi), %rax leaq -1(%rax), %rcx negq %rax andq %rcx, %rax addq %rdi, %rax retq ``` After: ```asm test: movq %rsi, %rdx movq %rdi, %rax retq ```
2 parents 5b3d524 + be15114 commit dfdfaa1

File tree

1 file changed

+44
-10
lines changed
  • compiler/rustc_codegen_ssa/src/mir

1 file changed

+44
-10
lines changed

compiler/rustc_codegen_ssa/src/mir/place.rs

+44-10
Original file line numberDiff line numberDiff line change
@@ -178,16 +178,8 @@ impl<'a, 'tcx, V: CodegenObject> PlaceRef<'tcx, V> {
178178
// Get the alignment of the field
179179
let (_, unsized_align) = glue::size_and_align_of_dst(bx, field.ty, meta);
180180

181-
// Bump the unaligned offset up to the appropriate alignment using the
182-
// following expression:
183-
//
184-
// (unaligned offset + (align - 1)) & -align
185-
186-
// Calculate offset.
187-
let align_sub_1 = bx.sub(unsized_align, bx.cx().const_usize(1u64));
188-
let and_lhs = bx.add(unaligned_offset, align_sub_1);
189-
let and_rhs = bx.neg(unsized_align);
190-
let offset = bx.and(and_lhs, and_rhs);
181+
// Bump the unaligned offset up to the appropriate alignment
182+
let offset = round_up_const_value_to_alignment(bx, unaligned_offset, unsized_align);
191183

192184
debug!("struct_field_ptr: DST field offset: {:?}", offset);
193185

@@ -518,3 +510,45 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
518510
self.monomorphize(place_ty.ty)
519511
}
520512
}
513+
514+
fn round_up_const_value_to_alignment<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
515+
bx: &mut Bx,
516+
value: Bx::Value,
517+
align: Bx::Value,
518+
) -> Bx::Value {
519+
// In pseudo code:
520+
//
521+
// if value & (align - 1) == 0 {
522+
// value
523+
// } else {
524+
// (value & !(align - 1)) + align
525+
// }
526+
//
527+
// Usually this is written without branches as
528+
//
529+
// (value + align - 1) & !(align - 1)
530+
//
531+
// But this formula cannot take advantage of constant `value`. E.g. if `value` is known
532+
// at compile time to be `1`, this expression should be optimized to `align`. However,
533+
// optimization only holds if `align` is a power of two. Since the optimizer doesn't know
534+
// that `align` is a power of two, it cannot perform this optimization.
535+
//
536+
// Instead we use
537+
//
538+
// value + (-value & (align - 1))
539+
//
540+
// Since `align` is used only once, the expression can be optimized. For `value = 0`
541+
// its optimized to `0` even in debug mode.
542+
//
543+
// NB: The previous version of this code used
544+
//
545+
// (value + align - 1) & -align
546+
//
547+
// Even though `-align == !(align - 1)`, LLVM failed to optimize this even for
548+
// `value = 0`. Bug report: https://bugs.llvm.org/show_bug.cgi?id=48559
549+
let one = bx.const_usize(1);
550+
let align_minus_1 = bx.sub(align, one);
551+
let neg_value = bx.neg(value);
552+
let offset = bx.and(neg_value, align_minus_1);
553+
bx.add(value, offset)
554+
}

0 commit comments

Comments
 (0)