JuliaLang · Jan 16, 2017
diff --git a/‎deps/llvm.mk
+6-1 b/‎deps/llvm.mk
+6-1
diff --git a/‎deps/patches/llvm-PR276266.patch
+51 b/‎deps/patches/llvm-PR276266.patch
+51
diff --git a/‎deps/patches/llvm-PR277939.patch
+169 b/‎deps/patches/llvm-PR277939.patch
+169
@@ -488,7 +488,7 @@ $(eval $(call LLVM_PATCH,llvm-r282182)) # Remove for 4.0
 $(eval $(call LLVM_PATCH,llvm-3.9.0_cygwin)) # R283427, Remove for 4.0
 endif
 $(eval $(call LLVM_PATCH,llvm-PR22923)) # Remove for 4.0
-$(eval $(call LLVM_PATCH,llvm-arm-fix-prel31))
+$(eval $(call LLVM_PATCH,llvm-arm-fix-prel31)) # Remove for 4.0
 $(eval $(call LLVM_PATCH,llvm-D25865-cmakeshlib)) # Remove for 4.0
 # Cygwin and openSUSE still use win32-threads mingw, https://llvm.org/bugs/show_bug.cgi?id=26365
 $(eval $(call LLVM_PATCH,llvm-3.9.0_threads))
@@ -505,6 +505,11 @@ $(eval $(call LLVM_PATCH,llvm-D27397)) # Julia issue #19792, Remove for 4.0
 $(eval $(call LLVM_PATCH,llvm-D28009)) # Julia issue #19792, Remove for 4.0
 $(eval $(call LLVM_PATCH,llvm-D28215_FreeBSD_shlib))
 $(eval $(call LLVM_PATCH,llvm-D28221-avx512)) # mentioned in issue #19797
+$(eval $(call LLVM_PATCH,llvm-PR276266)) # Issue #19976, Remove for 4.0
+$(eval $(call LLVM_PATCH,llvm-PR278088)) # Issue #19976, Remove for 4.0
+$(eval $(call LLVM_PATCH,llvm-PR277939)) # Issue #19976, Remove for 4.0
+$(eval $(call LLVM_PATCH,llvm-PR278321)) # Issue #19976, Remove for 4.0
+$(eval $(call LLVM_PATCH,llvm-PR278923)) # Issue #19976, Remove for 4.0
 endif # LLVM_VER
 
 ifeq ($(LLVM_VER),3.7.1)
 
@@ -0,0 +1,51 @@
+From 64d1e8b748bca22ce205eab7634cc5418c827f18 Mon Sep 17 00:00:00 2001
+From: Marina Yatsina <marina.yatsina@intel.com>
+Date: Thu, 21 Jul 2016 12:37:07 +0000
+Subject: [PATCH 3/5] ExecutionDepsFix - Fix bug in clearance calculation
+
+The clearance calculation did not take into account registers defined as outputs or clobbers in inline assembly machine instructions because these register defs are implicit.
+
+Differential Revision: http://reviews.llvm.org/D22580
+
+
+
+git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@276266 91177308-0d34-0410-b5e6-96231b3b80d8
+---
+ lib/CodeGen/ExecutionDepsFix.cpp    |  2 --
+ test/CodeGen/X86/break-false-dep.ll | 10 ++++++++++
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
+index 566b8d507b2..1fe5f459b69 100644
+--- a/lib/CodeGen/ExecutionDepsFix.cpp
++++ b/lib/CodeGen/ExecutionDepsFix.cpp
+@@ -520,8 +520,6 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
+     MachineOperand &MO = MI->getOperand(i);
+     if (!MO.isReg())
+       continue;
+-    if (MO.isImplicit())
+-      break;
+     if (MO.isUse())
+       continue;
+     for (int rx : regIndices(MO.getReg())) {
+diff --git a/test/CodeGen/X86/break-false-dep.ll b/test/CodeGen/X86/break-false-dep.ll
+index 74a0728f918..a7cda499dab 100644
+--- a/test/CodeGen/X86/break-false-dep.ll
++++ b/test/CodeGen/X86/break-false-dep.ll
+@@ -199,3 +199,13 @@ for.end16:                                        ; preds = %for.inc14
+ ;AVX-NEXT: vmulsd {{.*}}, [[XMM0]], [[XMM0]]
+ ;AVX-NEXT: vmovsd [[XMM0]],
+ }
++
++define double @inlineasmdep(i64 %arg) {
++top:
++  tail call void asm sideeffect "", "~{xmm0},~{dirflag},~{fpsr},~{flags}"()
++  %tmp1 = sitofp i64 %arg to double
++  ret double %tmp1
++;AVX-LABEL:@inlineasmdep
++;AVX: vxorps  [[XMM0:%xmm[0-9]+]], [[XMM0]], [[XMM0]]
++;AVX-NEXT: vcvtsi2sdq {{.*}}, [[XMM0]], {{%xmm[0-9]+}}
++}
+-- 
+2.11.0
+
@@ -0,0 +1,169 @@
+From 9790ab8bccdbc71dfcc166860ab6ce9c369bf686 Mon Sep 17 00:00:00 2001
+From: Simon Pilgrim <llvm-dev@redking.me.uk>
+Date: Sat, 6 Aug 2016 21:21:12 +0000
+Subject: [PATCH 1/5] [X86][AVX2] Improve sign/zero extension on AVX2 targets
+
+Split extensions to large vectors into 256-bit chunks - the equivalent of what we do with pre-AVX2 into 128-bit chunks
+
+git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@277939 91177308-0d34-0410-b5e6-96231b3b80d8
+---
+ lib/Target/X86/X86ISelLowering.cpp | 22 +++++++++++++++-------
+ test/CodeGen/X86/vec_int_to_fp.ll  | 24 ++++++++----------------
+ test/CodeGen/X86/vector-sext.ll    | 10 ++--------
+ 3 files changed, 25 insertions(+), 31 deletions(-)
+
+diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
+index ca205335013..2bbedd4bd97 100644
+--- a/lib/Target/X86/X86ISelLowering.cpp
++++ b/lib/Target/X86/X86ISelLowering.cpp
+@@ -30164,11 +30164,9 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
+                : DAG.getZeroExtendVectorInReg(ExOp, DL, VT);
+   }
+ 
+-  // On pre-AVX2 targets, split into 128-bit nodes of
+-  // ISD::*_EXTEND_VECTOR_INREG.
+-  if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128)) {
+-    unsigned NumVecs = VT.getSizeInBits() / 128;
+-    unsigned NumSubElts = 128 / SVT.getSizeInBits();
++  auto SplitAndExtendInReg = [&](unsigned SplitSize) {
++    unsigned NumVecs = VT.getSizeInBits() / SplitSize;
++    unsigned NumSubElts = SplitSize / SVT.getSizeInBits();
+     EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
+     EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
+ 
+@@ -30176,14 +30174,24 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
+     for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
+       SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
+                                    DAG.getIntPtrConstant(Offset, DL));
+-      SrcVec = ExtendVecSize(DL, SrcVec, 128);
++      SrcVec = ExtendVecSize(DL, SrcVec, SplitSize);
+       SrcVec = Opcode == ISD::SIGN_EXTEND
+                    ? DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT)
+                    : DAG.getZeroExtendVectorInReg(SrcVec, DL, SubVT);
+       Opnds.push_back(SrcVec);
+     }
+     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
+-  }
++  };
++
++  // On pre-AVX2 targets, split into 128-bit nodes of
++  // ISD::*_EXTEND_VECTOR_INREG.
++  if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128))
++    return SplitAndExtendInReg(128);
++
++  // On pre-AVX512 targets, split into 256-bit nodes of
++  // ISD::*_EXTEND_VECTOR_INREG.
++  if (!Subtarget.hasAVX512() && !(VT.getSizeInBits() % 256))
++    return SplitAndExtendInReg(256);
+ 
+   return SDValue();
+ }
+diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll
+index 43f5318a607..5d8f91385c7 100644
+--- a/test/CodeGen/X86/vec_int_to_fp.ll
++++ b/test/CodeGen/X86/vec_int_to_fp.ll
+@@ -153,8 +153,7 @@ define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
+ ;
+ ; AVX2-LABEL: sitofp_16i8_to_2f64:
+ ; AVX2:       # BB#0:
+-; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
+-; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
++; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
+ ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+ ; AVX2-NEXT:    # kill
+ ; AVX2-NEXT:    vzeroupper
+@@ -325,8 +324,7 @@ define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
+ ;
+ ; AVX2-LABEL: sitofp_16i8_to_4f64:
+ ; AVX2:       # BB#0:
+-; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
+-; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
++; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
+ ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+ ; AVX2-NEXT:    retq
+   %cvt = sitofp <16 x i8> %a to <16 x double>
+@@ -543,8 +541,7 @@ define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
+ ;
+ ; AVX2-LABEL: uitofp_16i8_to_2f64:
+ ; AVX2:       # BB#0:
+-; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
++; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+ ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+ ; AVX2-NEXT:    # kill
+ ; AVX2-NEXT:    vzeroupper
+@@ -778,8 +775,7 @@ define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
+ ;
+ ; AVX2-LABEL: uitofp_16i8_to_4f64:
+ ; AVX2:       # BB#0:
+-; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
++; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+ ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+ ; AVX2-NEXT:    retq
+   %cvt = uitofp <16 x i8> %a to <16 x double>
+@@ -958,8 +954,7 @@ define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
+ ;
+ ; AVX2-LABEL: sitofp_16i8_to_4f32:
+ ; AVX2:       # BB#0:
+-; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
+-; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
++; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
+ ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+ ; AVX2-NEXT:    # kill
+ ; AVX2-NEXT:    vzeroupper
+@@ -1134,8 +1129,7 @@ define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) {
+ ;
+ ; AVX2-LABEL: sitofp_16i8_to_8f32:
+ ; AVX2:       # BB#0:
+-; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
+-; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
++; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
+ ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+ ; AVX2-NEXT:    retq
+   %cvt = sitofp <16 x i8> %a to <16 x float>
+@@ -1456,8 +1450,7 @@ define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
+ ;
+ ; AVX2-LABEL: uitofp_16i8_to_4f32:
+ ; AVX2:       # BB#0:
+-; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
++; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+ ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+ ; AVX2-NEXT:    # kill
+ ; AVX2-NEXT:    vzeroupper
+@@ -1813,8 +1806,7 @@ define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) {
+ ;
+ ; AVX2-LABEL: uitofp_16i8_to_8f32:
+ ; AVX2:       # BB#0:
+-; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
++; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+ ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+ ; AVX2-NEXT:    retq
+   %cvt = uitofp <16 x i8> %a to <16 x float>
+diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll
+index 018c5922a43..e29f3e5f91f 100644
+--- a/test/CodeGen/X86/vector-sext.ll
++++ b/test/CodeGen/X86/vector-sext.ll
+@@ -407,15 +407,9 @@ define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp
+ ;
+ ; AVX2-LABEL: sext_16i8_to_8i64:
+ ; AVX2:       # BB#0: # %entry
+-; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+-; AVX2-NEXT:    vpslld $24, %xmm1, %xmm1
+-; AVX2-NEXT:    vpsrad $24, %xmm1, %xmm1
+-; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm2
++; AVX2-NEXT:    vpmovsxbq %xmm0, %ymm2
+ ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+-; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+-; AVX2-NEXT:    vpslld $24, %xmm0, %xmm0
+-; AVX2-NEXT:    vpsrad $24, %xmm0, %xmm0
+-; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm1
++; AVX2-NEXT:    vpmovsxbq %xmm0, %ymm1
+ ; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
+ ; AVX2-NEXT:    retq
+ ;
+-- 
+2.11.0
+