LLVM fails to eliminate bounds checking for fixed-size arrays #9024

zeux · 2013-09-06T15:35:27Z

Consider the following code:

fn add(a: [f32, ..3], b: [f32, ..3]) -> [f32, ..3] {
    let mut s: [f32, ..3] = [0.0, ..3];
    for i in range(0, 3) {
        s[i] = a[i] + b[i];
    }
    s
}

This function results in the following LLVM assembly when compiled with --opt-level 3:

define void @_ZN3bar16_d2fb2ba5dc6ea744v0.0E([3 x float]* nocapture, { i64, %tydesc*, i8*, i8*, i8 }* nocapture readnone, [3 x float]* nocapture readonly, [3 x float]* nocapture readonly) #1 {
"function top level":
  %s = alloca [3 x float], align 4
  %4 = bitcast [3 x float]* %s to i8*
  call void @llvm.memset.p0i8.i64(i8* %4, i8 0, i64 12, i32 4, i1 false)
  br label %match_else

match_else:                                       ; preds = %"function top level", %next6
  %.sroa.013.0.load1922 = phi i64 [ 0, %"function top level" ], [ %8, %next6 ]
  %5 = shl i64 %.sroa.013.0.load1922, 2
  %6 = icmp ugt i64 %5, 11
  br i1 %6, label %cond, label %next6

match_case:                                       ; preds = %next6
  %7 = bitcast [3 x float]* %0 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %4, i64 12, i32 4, i1 false)
  ret void

cond:                                             ; preds = %match_else
  tail call void @"_ZN8unstable4lang17fail_bounds_check17_7112ff25e39642a211v0.8$x2dpreE"({ i64, %tydesc*, i8*, i8*, i8 }* undef, i8* getelementptr inbounds ([8 x i8]* @str998, i64 0, i64 0), i64 5, i64 %.sroa.013.0.load1922, i64 3)
  unreachable

next6:                                            ; preds = %match_else
  %8 = add i64 %.sroa.013.0.load1922, 1
  %9 = getelementptr inbounds [3 x float]* %3, i64 0, i64 %.sroa.013.0.load1922
  %10 = getelementptr inbounds [3 x float]* %2, i64 0, i64 %.sroa.013.0.load1922
  %11 = load float* %9, align 4
  %12 = load float* %10, align 4
  %13 = fadd float %11, %12
  %14 = getelementptr inbounds [3 x float]* %s, i64 0, i64 %.sroa.013.0.load1922
  store float %13, float* %14, align 4
  %15 = icmp slt i64 %8, 3
  br i1 %15, label %match_else, label %match_case
}

What's weird is that llvm opt (from LLVM 3.2) can optimize this just fine, maybe an optimization regression or we're lacking some optimization passes.

Also on an unrelated (?) note Mul can overflow:

let x = [0i32, ..16]; x[1u << 63]

The address computation overflows as well, yielding x[0] as a result.

The text was updated successfully, but these errors were encountered:

alexcrichton · 2013-09-07T02:34:39Z

Interesting. If you remove the alloca of %s at the top, the memcpy at the end, and store directly into %0 instead of %s, then the bounds check will be eliminated.

For some reason, it appears that the last memcpy in the match_case block is preventing the cond block from being eliminated...

I was testing this by passing the following code to llvm's opt command

(I added the noalias attributes myself, didn't help)

; ModuleID = 'foo.rc'

%tydesc = type { i64, i64, void ({}*, i8*)*, void ({}*, i8*)*, void ({}*, i8*)*, void ({}*, i8*)*, i64, { i8*, i64 } }

declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)

declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)

define void @_ZN3add16_d492e97021e66524v0.0E([3 x float]* nocapture noalias, {
        i64, %tydesc*, i8*, i8*, i8 }* nocapture readnone, [3 x float]*
        nocapture noalias readonly, [3 x float]* nocapture noalias readonly) {
"function top level":
  %s = alloca [3 x float], align 4
  %4 = bitcast [3 x float]* %s to i8*
  call void @llvm.memset.p0i8.i64(i8* %4, i8 0, i64 12, i32 4, i1 false)
  br label %match_else

match_else:                                       ; preds = %"function top level", %next6
  %.sroa.013.0.load1922 = phi i64 [ 0, %"function top level" ], [ %8, %next6 ]
  %5 = shl i64 %.sroa.013.0.load1922, 2
  %6 = icmp ugt i64 %.sroa.013.0.load1922, 2
  br i1 %6, label %cond, label %next6

match_case:                                       ; preds = %next6
  %7 = bitcast [3 x float]* %0 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %4, i64 12, i32 4, i1 false)
  ret void

cond:                                             ; preds = %match_else
    ret void

next6:                                            ; preds = %match_else
  %8 = add i64 %.sroa.013.0.load1922, 1
  %9 = getelementptr inbounds [3 x float]* %3, i64 0, i64 %.sroa.013.0.load1922
  %10 = getelementptr inbounds [3 x float]* %2, i64 0, i64 %.sroa.013.0.load1922
  %11 = load float* %9, align 4
  %12 = load float* %10, align 4
  %13 = fadd float %11, %12
  %14 = getelementptr inbounds [3 x float]* %s, i64 0, i64 %.sroa.013.0.load1922
  store float %13, float* %14, align 4
  %15 = icmp slt i64 %8, 3
  br i1 %15, label %match_else, label %match_case
}

thestinger · 2013-09-18T01:27:47Z

This is still an issue despite the return value being tagged as sret and noalias now.

dotdash · 2014-01-12T13:12:02Z

This looks like a regression that happened somewhere between LLVM 3.2 and 3.3. I reduced the code to just a loop with the bounds check:

; ModuleID = '<stdin>'

define void @test() {
"function top level":
  br label %loop

loop:                                             ; preds = %body, %"function top level"
  %0 = phi i64 [ 0, %"function top level" ], [ %2, %body ]
  %1 = icmp ugt i64 %0, 2
  br i1 %1, label %fail, label %body

fail:                                             ; preds = %loop
  tail call void @bounds_fail()
  unreachable

body:                                             ; preds = %loop
  %2 = add i64 %0, 1
  %3 = icmp slt i64 %2, 3
  br i1 %3, label %loop, label %out

out:                                              ; preds = %body
  ret void
}

declare void @bounds_fail()

and opt from LLVM 3.2 optimizes that to:

; ModuleID = '<stdin>'

define void @test() {
body.2:
  ret void
}

but opt from LLVM 3.3, 3.4, and a 3.5 snapshot as of svn197556 leave it unchanged.

dotdash · 2014-01-12T14:16:03Z

Filed a bug against LLVM at http://llvm.org/bugs/show_bug.cgi?id=18449

dotdash · 2014-03-02T18:03:37Z

Looks like this is fixed. Using rustc 0.10-pre (3d117cf 2014-03-01 03:06:31 -0800) I get:

; Function Attrs: uwtable
define void @_ZN6addf3220h7431f4d506cb9520daa4v0.0E([3 x float]* noalias nocapture sret, [3 x float]* noalias nocapture readonly, [3 x float]* noalias nocapture readonly) unnamed_addr #1 {
entry-block:
  %s = alloca [3 x float], align 4
  %3 = bitcast [3 x float]* %s to i8*
  call void @llvm.memset.p0i8.i64(i8* %3, i8 0, i64 12, i32 4, i1 false)
  br label %next7

next7:                                            ; preds = %next7, %entry-block
  %4 = phi i64 [ 0, %entry-block ], [ %5, %next7 ]
  %5 = add i64 %4, 1
  %6 = getelementptr inbounds [3 x float]* %2, i64 0, i64 %4
  %7 = getelementptr inbounds [3 x float]* %1, i64 0, i64 %4
  %8 = load float* %6, align 4
  %9 = load float* %7, align 4
  %10 = fadd float %8, %9
  %11 = getelementptr inbounds [3 x float]* %s, i64 0, i64 %4
  store float %10, float* %11, align 4
  %12 = icmp slt i64 %5, 3
  br i1 %12, label %next7, label %join9

join9:                                            ; preds = %next7
  %13 = bitcast [3 x float]* %0 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %13, i8* %3, i64 12, i32 4, i1 false)
  ret void
}

unused_async: lint async methods Now lints: ```rust impl Foo { async fn method(&self) -> &'static str { "no await here" } } ``` changelog: [`unused_async`]: lint async methods Fixes rust-lang#9024

thestinger closed this as completed Mar 2, 2014

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

LLVM fails to eliminate bounds checking for fixed-size arrays #9024

LLVM fails to eliminate bounds checking for fixed-size arrays #9024

zeux commented Sep 6, 2013

alexcrichton commented Sep 7, 2013

thestinger commented Sep 18, 2013

dotdash commented Jan 12, 2014

dotdash commented Jan 12, 2014

dotdash commented Mar 2, 2014

LLVM fails to eliminate bounds checking for fixed-size arrays #9024

LLVM fails to eliminate bounds checking for fixed-size arrays #9024

Comments

zeux commented Sep 6, 2013

alexcrichton commented Sep 7, 2013

thestinger commented Sep 18, 2013

dotdash commented Jan 12, 2014

dotdash commented Jan 12, 2014

dotdash commented Mar 2, 2014