diff --git a/base/inference.jl b/base/inference.jl
index cf6e2768cd87e..c5534d83c10f7 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -1129,6 +1129,9 @@ function tmerge(typea::ANY, typeb::ANY)
     if typeb <: typea
         return typea
     end
+    if typea <: Tuple && typeb <: Tuple
+        return Tuple
+    end
     u = Union(typea, typeb)
     if length(u.types) > MAX_TYPEUNION_LEN || type_too_complex(u, 0)
         # don't let type unions get too big
@@ -1824,7 +1827,7 @@ function without_linenums(a::Array{Any,1})
     l
 end
 
-const _pure_builtins = {tuple, tupleref, tuplelen, fieldtype, apply_type, is, isa, typeof} # known affect-free calls (also effect-free)
+const _pure_builtins = {tuple, tupleref, tuplelen, fieldtype, apply_type, is, isa, typeof, typeassert} # known affect-free calls (also effect-free)
 const _pure_builtins_volatile = {getfield, arrayref} # known effect-free calls (might not be affect-free)
 
 function is_pure_builtin(f)
@@ -1854,6 +1857,9 @@ function effect_free(e::ANY, sv, allow_volatile::Bool)
         isa(e,TopNode) || isa(e,QuoteNode) || isa(e,Type) || isa(e,Tuple)
         return true
     end
+    if isconstantfunc(e, sv) !== false
+        return true
+    end
     if isa(e,Expr)
         e = e::Expr
         if e.head === :static_typeof
@@ -1862,62 +1868,55 @@ function effect_free(e::ANY, sv, allow_volatile::Bool)
         ea = e.args
         if e.head === :call || e.head === :call1
             if is_known_call_p(e, is_pure_builtin, sv)
-                if !allow_volatile && is_known_call_p(e, (f)->contains_is(_pure_builtins_volatile, f), sv)
-                    # arguments must be immutable to ensure e is affect_free
-                    first = true
-                    for a in ea
-                        if first # first "arg" is the function name
-                            first = false
-                            continue
-                        end
-                        if isa(a,Symbol)
-                            return false
-                        end
-                        if isa(a,SymbolNode)
-                            typ = (a::SymbolNode).typ
-                            if !isa(typ,Tuple)
-                                if !isa(typ,DataType) || typ.mutable
-                                    return false
+                if !allow_volatile
+                    if is_known_call(e, arrayref, sv)
+                        return false
+                    elseif is_known_call(e, getfield, sv)
+                        # arguments must be immutable to ensure e is affect_free
+                        first = true
+                        for a in ea
+                            if first # first "arg" is the function name
+                                first = false
+                                continue
+                            end
+                            if isa(a,Symbol)
+                                return false
+                            end
+                            if isa(a,SymbolNode)
+                                typ = (a::SymbolNode).typ
+                                if !isa(typ,Tuple)
+                                    if !isa(typ,DataType) || typ.mutable
+                                        return false
+                                    end
                                 end
                             end
+                            if !effect_free(a,sv,allow_volatile)
+                                return false
+                            end
                         end
-                        if !effect_free(a,sv,allow_volatile)
-                            return false
-                        end
-                    end
-                else
-                    # arguments must also be effect_free
-                    for a in ea
-                        if !effect_free(a,sv,allow_volatile)
-                            return false
-                        end
+                        return true
                     end
                 end
-                return true
             end
-        elseif e.head == :new
-            first = !allow_volatile
-            for a in ea
-                if first
-                    first = false
-                    typ = exprtype(a)
-                    if !isType(typ) || !isa((typ::Type).parameters[1],DataType) || ((typ::Type).parameters[1]::DataType).mutable
-                        return false
-                    end
-                end
-                if !effect_free(a,sv,allow_volatile)
+        elseif e.head === :new
+            if !allow_volatile
+                a = ea[1]
+                typ = exprtype(a)
+                if !isType(typ) || !isa((typ::Type).parameters[1],DataType) || ((typ::Type).parameters[1]::DataType).mutable
                     return false
                 end
             end
-            return true
-        elseif e.head == :return
-            for a in ea
-                if !effect_free(a,sv,allow_volatile)
-                    return false
-                end
+        elseif e.head === :return
+            # pass
+        else
+            return false
+        end
+        for a in ea
+            if !effect_free(a,sv,allow_volatile)
+                return false
             end
-            return true
         end
+        return true
     end
     return false
 end
@@ -2005,12 +2004,6 @@ function inlineable(f, e::Expr, atypes, sv, enclosing_ast)
     #     end
     # end
 
-    # when 1 method matches the inferred types, there is still a chance
-    # of a no-method error at run time, unless the inferred types are a
-    # subset of the method signature.
-    if !(atypes <: meth[1])
-        return NF
-    end
     if !isa(linfo,LambdaStaticData) || meth[3].func.env !== ()
         return NF
     end
@@ -2026,7 +2019,16 @@ function inlineable(f, e::Expr, atypes, sv, enclosing_ast)
             spvals[i] = QuoteNode(spvals[i])
         end
     end
-    (ast, ty) = typeinf(linfo, meth[1], meth[2], linfo)
+
+    methargs = meth[1]::Tuple
+    nm = length(methargs)
+    if !(atypes <: methargs)
+        incompletematch = true
+    else
+        incompletematch = false
+    end
+
+    (ast, ty) = typeinf(linfo, methargs, meth[2]::Tuple, linfo)
     if is(ast,())
         return NF
     end
@@ -2046,8 +2048,40 @@ function inlineable(f, e::Expr, atypes, sv, enclosing_ast)
 
     body = Expr(:block)
     body.args = without_linenums(ast.args[3].args)::Array{Any,1}
-    if !inline_worthy(body)
-        return NF
+    need_mod_annotate = true
+    cost = 1.0
+    if incompletematch
+        cost *= 4
+    end
+    if is(f, next) || is(f, done)
+        cost /= 4
+    end
+    if !inline_worthy(body, cost)
+        if incompletematch
+            # inline a typeassert-based call-site, rather than a
+            # full generic lookup, using the inliner to handle
+            # all the fiddly details
+            numarg = length(argexprs)
+            newnames = unique_names(ast,numarg)
+            sp = ()
+            spvals = {}
+            meth = (methargs, sp)
+            locals = {}
+            newcall = Expr(:call, e.args[1])
+            newcall.typ = ty
+            for i = 1:numarg
+                name = newnames[i]
+                argtype = exprtype(argexprs[i])
+                argtype = typeintersect(argtype,Any)  # remove Undef
+                push!(locals, {name,argtype,0})
+                push!(newcall.args, argtype===Any ? name : SymbolNode(name, argtype))
+            end
+            body.args = {Expr(:return, newcall)}
+            ast = Expr(:lambda, newnames, {{}, locals, {}}, body)
+            need_mod_annotate = false
+        else
+            return NF
+        end
     end
 
     spnames = { sp[i].name for i=1:2:length(sp) }
@@ -2059,6 +2093,7 @@ function inlineable(f, e::Expr, atypes, sv, enclosing_ast)
     args = f_argnames(ast)
     na = length(args)
 
+    isva = false
     if na>0 && is_rest_arg(ast.args[1][na])
         vaname = args[na]
         len_argexprs = length(argexprs)
@@ -2093,8 +2128,17 @@ function inlineable(f, e::Expr, atypes, sv, enclosing_ast)
             # construct tuple-forming expression for argument tail
             vararg = mk_tuplecall(argexprs[na:end])
             argexprs = {argexprs[1:(na-1)]..., vararg}
-        end
+            isva = true
+        end
+    elseif na != length(argexprs)
+        # we have a method match only because an earlier
+        # inference step shortened our call args list, even
+        # though we have too many arguments to actually
+        # call this function
+        @assert isvarargtype(atypes[na])
+        return NF
     end
+
     @assert na == length(argexprs)
 
     if needcopy
@@ -2116,23 +2160,76 @@ function inlineable(f, e::Expr, atypes, sv, enclosing_ast)
     end
 
     # annotate variables in the body expression with their module
-    mfrom = linfo.module; mto = (inference_stack::CallStack).mod
-    try
-        body = resolve_globals(body, enc_locllist, enclosing_ast.args[1], mfrom, mto, args, spnames)
-    catch ex
-        if isa(ex,GetfieldNode)
-            return NF
+    if need_mod_annotate
+        mfrom = linfo.module; mto = (inference_stack::CallStack).mod
+        try
+            body = resolve_globals(body, enc_locllist, enclosing_ast.args[1], mfrom, mto, args, spnames)
+        catch ex
+            if isa(ex,GetfieldNode)
+                return NF
+            end
+            rethrow(ex)
         end
-        rethrow(ex)
     end
 
     # see if each argument occurs only once in the body expression
     stmts = {}
     stmts_free = true # true = all entries of stmts are effect_free
-    for i=length(args):-1:1 # stmts_free needs to be calculated in reverse-argument order
+
+    # when 1 method matches the inferred types, there is still a chance
+    # of a no-method error at run time, unless the inferred types are a
+    # subset of the method signature.
+    if incompletematch
+        t = Expr(:call) # tuple(args...)
+        t.typ = Tuple
+        argexprs2 = t.args
+        icall = LabelNode(label_counter(body.args)+1)
+        partmatch = Expr(:gotoifnot, false, icall.label)
+        thrw = Expr(:call, :throw, Expr(:call, Main.Base.MethodError, f, t))
+        thrw.typ = None
+    end
+
+    for i=na:-1:1 # stmts_free needs to be calculated in reverse-argument order
         a = args[i]
         aei = argexprs[i]
-        aeitype = exprtype(aei)
+        aeitype = argtype = exprtype(aei)
+        needtypeassert = false
+        if incompletematch
+            if isva
+                if nm == 0
+                    methitype = ()
+                elseif i > nm
+                    methitype = methargs[end]
+                    if isvarargtype(methitype)
+                        methitype = (methitype,)
+                    else
+                        methitype = ()
+                    end
+                else
+                    methitype = tuple(methargs[i:end]...)
+                end
+                isva = false
+            else
+                if i < nm
+                    methitype = methargs[i]
+                else
+                    methitype = methargs[end]
+                    if isvarargtype(methitype)
+                        methitype = (methitype::Vararg).parameters[1]
+                    else
+                        @assert i==nm
+                    end
+                end
+            end
+            if isa(methitype,TypeVar) # eliminate ANY
+                methitype = methitype.ub
+            end
+            if !(aeitype <: methitype)
+                #TODO: make Undef a faster special-case
+                needtypeassert = true
+                aeitype = methitype
+            end
+        end
 
         islocal = false # if the argument name is also used as a local variable,
                         # we need to keep it as a variable name
@@ -2151,26 +2248,51 @@ function inlineable(f, e::Expr, atypes, sv, enclosing_ast)
         # ok for argument to occur more than once if the actual argument
         # is a symbol or constant, or is not affected by previous statements
         # that will exist after the inlining pass finishes
-        affect_free = stmts_free && !islocal # false = previous statements might affect the result of evaluating argument
-        occ = 0
-        for j = length(body.args):-1:1
-            b = body.args[j]
-            if occ < 1
-                occ += occurs_more(b, x->is(x,a), 5)
-            end
-            if occ > 0 && affect_free && !effect_free(b, sv, true) #TODO: we could short-circuit this test better by memoizing effect_free(b) in the for loop over i
-                affect_free = false
-            end
-            if occ > 5
-                occ = 6
-                break
+        if needtypeassert
+            vnew1 = unique_name(enclosing_ast, ast)
+            add_variable(enclosing_ast, vnew1, aeitype, !islocal)
+            v1 = (aeitype===Any ? vnew1 : SymbolNode(vnew1,aeitype))
+            push!(spnames, a)
+            push!(spvals, v1)
+            vnew2 = unique_name(enclosing_ast, ast)
+            v2 = (argtype===Any ? vnew2 : SymbolNode(vnew2,argtype))
+            unshift!(body.args, Expr(:(=), a, v2))
+            args[i] = a = vnew2
+            islocal = false
+            aeitype = argtype
+            affect_free = stmts_free
+            occ = 3
+            # it's really late in codegen, so we expand the typeassert manually: cond = !isa(vnew2, methitype) | cond
+            cond = Expr(:call, Intrinsics.isa, v2, methitype)
+            cond.typ = Bool
+            cond = Expr(:call, Intrinsics.not_int, cond)
+            cond.typ = Bool
+            cond = Expr(:call, Intrinsics.or_int, cond, partmatch.args[1])
+            cond.typ = Bool
+            cond = Expr(:call, Intrinsics.box, Bool, cond)
+            cond.typ = Bool
+            partmatch.args[1] = cond
+        else
+            affect_free = stmts_free && !islocal # false = previous statements might affect the result of evaluating argument
+            occ = 0
+            for j = length(body.args):-1:1
+                b = body.args[j]
+                if occ < 1
+                    occ += occurs_more(b, x->is(x,a), 5)
+                end
+                if occ > 0 && affect_free && !effect_free(b, sv, true) #TODO: we could short-circuit this test better by memoizing effect_free(b) in the for loop over i
+                    affect_free = false
+                end
+                if occ > 5
+                    occ = 6
+                    break
+                end
             end
         end
         free = effect_free(aei,sv,true)
-        if ((occ==0 && is(aeitype,None)) || islocal || (occ > 1 && !inline_worthy(aei, occ)) ||
+        if ((occ==0 && is(aeitype,None)) || islocal || (occ > 1 && !inline_worthy(aei, occ*2)) ||
                 (affect_free && !free) || (!affect_free && !effect_free(aei,sv,false)))
-            if occ != 0 # islocal=true is implied
-                # introduce variable for this argument
+            if occ != 0 # islocal=true is implied by occ!=0
                 vnew = unique_name(enclosing_ast, ast)
                 add_variable(enclosing_ast, vnew, aeitype, !islocal)
                 unshift!(stmts, Expr(:(=), vnew, aei))
@@ -2181,6 +2303,15 @@ function inlineable(f, e::Expr, atypes, sv, enclosing_ast)
                 stmts_free = false
             end
         end
+        if incompletematch
+            unshift!(argexprs2, (argtype===Any ? a : SymbolNode(a,argtype)))
+        end
+    end
+    if incompletematch && partmatch.args[1] != false
+        unshift!(body.args, icall)
+        unshift!(body.args, thrw)
+        unshift!(body.args, partmatch)
+        unshift!(argexprs2, top_tuple)
     end
 
     # ok, substitute argument expressions for argument names in the body
@@ -2261,15 +2392,15 @@ function inlineable(f, e::Expr, atypes, sv, enclosing_ast)
     return (expr, stmts)
 end
 
-inline_worthy(body, occurences::Int) = true
-function inline_worthy(body::Expr, occurences::Int=1) # 0 < occurrences <= 6
+inline_worthy(body, cost::Real) = true
+function inline_worthy(body::Expr, cost::Real=1.0) # precondition: 0<cost
 #    if isa(body.args[1],QuoteNode) && (body.args[1]::QuoteNode).value === :inline
 #        shift!(body.args)
 #        return true
 #    end
-    symlim = div(6,occurences)
+    symlim = 1+5/cost
     if length(body.args) < symlim
-        symlim *= 6
+        symlim *= 16
         if occurs_more(body, e->true, symlim) < symlim
             return true
         end
@@ -2308,19 +2439,9 @@ function inlining_pass(e::Expr, sv, ast)
         return (e,())
     end
     arg1 = eargs[1]
-    # don't inline first (global) arguments of ccall, as this needs to be evaluated
-    # by the interpreter and inlining might put in something it can't handle,
-    # like another ccall (or try to move the variables out into the function)
-    if is_known_call(e, Core.Intrinsics.ccall, sv)
-        i0 = 5
-        isccall = true
-    else
-        i0 = 1
-        isccall = false
-    end
     stmts = {}
     if e.head === :body
-        i = i0
+        i = 1
         while i <= length(eargs)
             ei = eargs[i]
             if isa(ei,Expr)
@@ -2337,6 +2458,16 @@ function inlining_pass(e::Expr, sv, ast)
             i += 1
         end
     else
+        # don't inline first (global) arguments of ccall, as this needs to be evaluated
+        # by the interpreter and inlining might put in something it can't handle,
+        # like another ccall (or try to move the variables out into the function)
+        if is_known_call(e, Core.Intrinsics.ccall, sv)
+            i0 = 5
+            isccall = true
+        else
+            i0 = 1
+            isccall = false
+        end
         has_stmts = false # needed to preserve order-of-execution
         for i=length(eargs):-1:i0
             ei = eargs[i]
@@ -2377,94 +2508,94 @@ function inlining_pass(e::Expr, sv, ast)
                 end
             end
         end
-    end
-    if isccall
-        le = length(eargs)
-        for i=5:2:le
-            if i<le && (isa(eargs[i],Symbol) || isa(eargs[i],SymbolNode))
-                eargs[i+1] = 0
+        if isccall
+            le = length(eargs)
+            for i=5:2:le
+                if i<le && (isa(eargs[i],Symbol) || isa(eargs[i],SymbolNode))
+                    eargs[i+1] = 0
+                end
             end
         end
-    end
-    if is(e.head,:call1)
-        e.head = :call
-        ET = exprtype(arg1)
-        if isType(ET)
-            f = ET.parameters[1]
-        else
-            f = _ieval(arg1)
-        end
-
-        if is(f, ^) || is(f, .^)
-            if length(e.args) == 3 && isa(e.args[3],Union(Int32,Int64))
-                a1 = e.args[2]
-                if isa(a1,basenumtype) || ((isa(a1,Symbol) || isa(a1,SymbolNode)) &&
-                                           exprtype(a1) <: basenumtype)
-                    if e.args[3]==2
-                        e.args = {TopNode(:*), a1, a1}
-                        f = *
-                    elseif e.args[3]==3
-                        e.args = {TopNode(:*), a1, a1, a1}
-                        f = *
+        if is(e.head,:call1)
+            e.head = :call
+            ET = exprtype(arg1)
+            if isType(ET)
+                f = ET.parameters[1]
+            else
+                f = _ieval(arg1)
+            end
+
+            if is(f, ^) || is(f, .^)
+                if length(e.args) == 3 && isa(e.args[3],Union(Int32,Int64))
+                    a1 = e.args[2]
+                    if isa(a1,basenumtype) || ((isa(a1,Symbol) || isa(a1,SymbolNode)) &&
+                                               exprtype(a1) <: basenumtype)
+                        if e.args[3]==2
+                            e.args = {TopNode(:*), a1, a1}
+                            f = *
+                        elseif e.args[3]==3
+                            e.args = {TopNode(:*), a1, a1, a1}
+                            f = *
+                        end
                     end
                 end
             end
-        end
 
-        for ninline = 1:100
-            atypes = tuple(map(exprtype, e.args[2:end])...)
-            if length(atypes) > MAX_TUPLETYPE_LEN
-                atypes = limit_tuple_type(atypes)
-            end
-            res = inlineable(f, e, atypes, sv, ast)
-            if isa(res,Tuple)
-                if isa(res[2],Array)
-                    append!(stmts,res[2])
+            for ninline = 1:100
+                atypes = tuple(map(exprtype, e.args[2:end])...)
+                if length(atypes) > MAX_TUPLETYPE_LEN
+                    atypes = limit_tuple_type(atypes)
                 end
-                res = res[1]
-            end
-
-            if !is(res,NF)
-                # iteratively inline apply(f, tuple(...), tuple(...), ...) in order
-                # to simplify long vararg lists as in multi-arg +
-                if isa(res,Expr) && is_known_call(res, apply, sv)
-                    e = res::Expr
-                    f = apply
-                else
-                    return (res,stmts)
+                res = inlineable(f, e, atypes, sv, ast)
+                if isa(res,Tuple)
+                    if isa(res[2],Array)
+                        append!(stmts,res[2])
+                    end
+                    res = res[1]
                 end
-            end
 
-            if is(f,apply)
-                na = length(e.args)
-                newargs = cell(na-2)
-                for i = 3:na
-                    aarg = e.args[i]
-                    t = to_tuple_of_Types(exprtype(aarg))
-                    if isa(aarg,Expr) && is_known_call(aarg, tuple, sv)
-                        # apply(f,tuple(x,y,...)) => f(x,y,...)
-                        newargs[i-2] = aarg.args[2:end]
-                    elseif isa(aarg, Tuple)
-                        newargs[i-2] = { QuoteNode(x) for x in aarg }
-                    elseif isa(t,Tuple) && !isvatuple(t) && effect_free(aarg,sv,true)
-                        # apply(f,t::(x,y)) => f(t[1],t[2])
-                        newargs[i-2] = { mk_tupleref(aarg,j,t[j]) for j=1:length(t) }
+                if !is(res,NF)
+                    # iteratively inline apply(f, tuple(...), tuple(...), ...) in order
+                    # to simplify long vararg lists as in multi-arg +
+                    if isa(res,Expr) && is_known_call(res, apply, sv)
+                        e = res::Expr
+                        f = apply
                     else
-                        # not all args expandable
-                        return (e,stmts)
+                        return (res,stmts)
                     end
                 end
-                e.args = [{e.args[2]}, newargs...]
 
-                # now try to inline the simplified call
+                if is(f,apply)
+                    na = length(e.args)
+                    newargs = cell(na-2)
+                    for i = 3:na
+                        aarg = e.args[i]
+                        t = to_tuple_of_Types(exprtype(aarg))
+                        if isa(aarg,Expr) && is_known_call(aarg, tuple, sv)
+                            # apply(f,tuple(x,y,...)) => f(x,y,...)
+                            newargs[i-2] = aarg.args[2:end]
+                        elseif isa(aarg, Tuple)
+                            newargs[i-2] = { QuoteNode(x) for x in aarg }
+                        elseif isa(t,Tuple) && !isvatuple(t) && effect_free(aarg,sv,true)
+                            # apply(f,t::(x,y)) => f(t[1],t[2])
+                            newargs[i-2] = { mk_tupleref(aarg,j,t[j]) for j=1:length(t) }
+                        else
+                            # not all args expandable
+                            return (e,stmts)
+                        end
+                    end
+                    e.args = [{e.args[2]}, newargs...]
+
+                    # now try to inline the simplified call
 
-                f = isconstantfunc(e.args[1], sv)
-                if f===false
+                    f = isconstantfunc(e.args[1], sv)
+                    if f===false
+                        return (e,stmts)
+                    end
+                    f = _ieval(f)
+                else
                     return (e,stmts)
                 end
-                f = _ieval(f)
-            else
-                return (e,stmts)
             end
         end
     end
@@ -2483,32 +2614,39 @@ const some_names = {:_var0, :_var1, :_var2, :_var3, :_var4, :_var5, :_var6,
                     :_var7, :_var8, :_var9, :_var10, :_var11, :_var12,
                     :_var13, :_var14, :_var15, :_var16, :_var17, :_var18,
                     :_var19, :_var20, :_var21, :_var22, :_var23, :_var24}
-
+function contains_is1(vinflist::Array{Any,1}, x::Symbol)
+    for y in vinflist
+        if is(y[1],x)
+            return true
+        end
+    end
+    return false
+end
 function unique_name(ast)
-    locllist = ast.args[2][1]::Array{Any,1}
+    locllist = ast.args[2][2]::Array{Any,1}
     for g in some_names
-        if !contains_is(locllist, g)
+        if !contains_is1(locllist, g)
             return g
         end
     end
     g = gensym()
-    while contains_is(locllist, g)
+    while contains_is1(locllist, g)
         g = gensym()
     end
     g
 end
 function unique_name(ast1, ast2)
-    locllist1 = ast1.args[2][1]::Array{Any,1}
-    locllist2 = ast2.args[2][1]::Array{Any,1}
+    locllist1 = ast1.args[2][2]::Array{Any,1}
+    locllist2 = ast2.args[2][2]::Array{Any,1}
     for g in some_names
-        if !contains_is(locllist1, g) &&
-           !contains_is(locllist2, g)
+        if !contains_is1(locllist1, g) &&
+           !contains_is1(locllist2, g)
             return g
         end
     end
     g = gensym()
-    while contains_is(locllist1, g) |
-          contains_is(locllist2, g)
+    while contains_is1(locllist1, g) |
+          contains_is1(locllist2, g)
         g = gensym()
     end
     g
@@ -2516,9 +2654,9 @@ end
 
 function unique_names(ast, n)
     ns = {}
-    locllist = ast.args[2][1]::Array{Any,1}
+    locllist = ast.args[2][2]::Array{Any,1}
     for g in some_names
-        if !contains_is(locllist, g)
+        if !contains_is1(locllist, g)
             push!(ns, g)
             if length(ns)==n
                 return ns
@@ -2527,7 +2665,7 @@ function unique_names(ast, n)
     end
     while length(ns)<n
         g = gensym()
-        while contains_is(locllist, g) || contains_is(ns, g)
+        while contains_is1(locllist, g) || contains_is(ns, g)
             g = gensym()
         end
         push!(ns, g)
@@ -2583,13 +2721,30 @@ function remove_redundant_temp_vars(ast, sa)
                 # this transformation is not valid for vars used before def.
                 # we need to preserve the point of assignment to know where to
                 # throw errors (issue #4645).
-            delete_var!(ast, v)
-            sym_replace(ast.args[3], {v}, {}, {init}, {})
+
+                if (isa(init,SymbolNode) ? (init.typ<:local_typeof(v, varinfo)) : true)
+                    # the transformation is not ideal if the assignment
+                    # is present for the auto-unbox functionality
+                    # (from inlining improved type inference information)
+                    # and this transformation would worsen the type information
+                    # everywhere later in the function
+
+                    delete_var!(ast, v)
+                    sym_replace(ast.args[3], {v}, {}, {init}, {})
+                end
+            end
         end
     end
-    end
     ast
 end
+function local_typeof(v, varinfo)
+    for (v2, typ, info) in varinfo
+        if v === v2
+            return typ
+        end
+    end
+    @assert false "v not in varinfo"
+end
 
 occurs_undef(var, expr) =
     occurs_more(expr,
diff --git a/base/printf.jl b/base/printf.jl
index b12eef368c149..515e618db5b60 100644
--- a/base/printf.jl
+++ b/base/printf.jl
@@ -3,10 +3,12 @@ using Base.Grisu
 export @printf, @sprintf
 
 ### printf formatter generation ###
+const SmallFloatingPoint = Union(Float64,Float32,Float16)
+const SmallNumber = Union(SmallFloatingPoint,Base.Signed64,Base.Unsigned64,Uint128,Int128)
 
 function gen(s::String)
     args = {}
-    blk = Expr(:block, :(local neg, pt, len, exp))
+    blk = Expr(:block, :(local neg, pt, len, exp, do_out, args))
     for x in parse(s)
         if isa(x,String)
             push!(blk.args, :(write(out, $(length(x)==1 ? x[1] : x))))
@@ -161,10 +163,8 @@ function pad(m::Int, n, c::Char)
     end
 end
 
-function print_fixed(out, precision)
+function print_fixed(out, precision, pt, ndigits)
     pdigits = pointer(DIGITS)
-    ndigits = LEN[1]
-    pt = POINT[1]
     if pt <= 0
         # 0.0dddd0
         write(out, '0')
@@ -198,11 +198,17 @@ function print_fixed(out, precision)
     end
 end
 
-function print_exp(out, exp)
+function print_exp(out, exp::Integer)
     write(out, exp < 0 ? '-' : '+')
     exp = abs(exp)
     d = div(exp,100)
-    d > 0 && write(out, char('0'+d))
+    if d > 0
+        if d >= 10
+            print(out, exp)
+            return
+        end
+        write(out, char('0'+d))
+    end
     exp = rem(exp,100)
     write(out, char('0'+div(exp,10)))
     write(out, char('0'+rem(exp,10)))
@@ -226,19 +232,21 @@ function gen_d(flags::ASCIIString, width::Int, precision::Int, c::Char)
     # interpret the number
     prefix = ""
     if lowercase(c)=='o'
-        f = '#' in flags ? :int_0ct : :int_oct
-        push!(blk.args, :(($f)($x)))
+        fn = '#' in flags ? :decode_0ct : :decode_oct
     elseif c=='x'
         '#' in flags && (prefix = "0x")
-        push!(blk.args, :(int_hex($x)))
+        fn = :decode_hex
     elseif c=='X'
         '#' in flags && (prefix = "0X")
-        push!(blk.args, :(int_HEX($x)))
+        fn = :decode_HEX
     else
-        push!(blk.args, :(int_dec($x)))
+        fn = :decode_dec
     end
-    push!(blk.args, :(neg = NEG[1]))
-    push!(blk.args, :(pt  = POINT[1]))
+    push!(blk.args, :((do_out, args) = $fn(out, $x, $flags, $width, $precision, $c)))
+    ifblk = Expr(:if, :do_out, Expr(:block))
+    push!(blk.args, ifblk)
+    blk = ifblk.args[2]
+    push!(blk.args, :((len, pt, neg) = args))
     # calculate padding
     width -= length(prefix)
     space_pad = width > max(1,precision) && '-' in flags ||
@@ -301,10 +309,11 @@ function gen_f(flags::ASCIIString, width::Int, precision::Int, c::Char)
     x, ex, blk = special_handler(flags,width)
     # interpret the number
     if precision < 0; precision = 6; end
-    push!(blk.args, :(fix_dec($x,$precision)))
-    push!(blk.args, :(neg = NEG[1]))
-    push!(blk.args, :(pt  = POINT[1]))
-    push!(blk.args, :(len = LEN[1]))
+    push!(blk.args, :((do_out, args) = fix_dec(out, $x, $flags, $width, $precision, $c)))
+    ifblk = Expr(:if, :do_out, Expr(:block))
+    push!(blk.args, ifblk)
+    blk = ifblk.args[2]
+    push!(blk.args, :((len, pt, neg) = args))
     # calculate padding
     padding = nothing
     if precision > 0 || '#' in flags
@@ -334,7 +343,7 @@ function gen_f(flags::ASCIIString, width::Int, precision::Int, c::Char)
     end
     # print digits
     if precision > 0
-        push!(blk.args, :(print_fixed(out,$precision)))
+        push!(blk.args, :(print_fixed(out,$precision,pt,len)))
     else
         push!(blk.args, :(write(out, pointer(DIGITS), len)))
         push!(blk.args, :(while pt >= (len+=1) write(out,'0') end))
@@ -364,9 +373,12 @@ function gen_e(flags::ASCIIString, width::Int, precision::Int, c::Char)
     # interpret the number
     if precision < 0; precision = 6; end
     ndigits = min(precision+1,BUFLEN-1)
-    push!(blk.args, :(ini_dec($x,$ndigits)))
-    push!(blk.args, :(neg = NEG[1]))
-    push!(blk.args, :(exp = POINT[1]-1))
+    push!(blk.args, :((do_out, args) = ini_dec(out,$x,$ndigits, $flags, $width, $precision, $c)))
+    ifblk = Expr(:if, :do_out, Expr(:block))
+    push!(blk.args, ifblk)
+    blk = ifblk.args[2]
+    push!(blk.args, :((len, pt, neg) = args))
+    push!(blk.args, :(exp = pt-1))
     expmark = c=='E' ? "E" : "e"
     if precision==0 && '#' in flags
         expmark = string(".",expmark)
@@ -378,11 +390,31 @@ function gen_e(flags::ASCIIString, width::Int, precision::Int, c::Char)
     if '+' in flags || ' ' in flags
         width -= 1 # for the sign indicator
         if width > 0
-            padding = :($width-((exp<=-100)|(100<=exp)))
+            padding = quote
+                padn=$width
+                if (exp<=-100)|(100<=exp)
+                    if isa($x,SmallNumber)
+                        padn -= 1
+                    else
+                        padn -= Base.ndigits0z(exp) - 2
+                    end
+                end
+                padn
+            end
         end
     else
         if width > 0
-            padding = :($width-((exp<=-100)|(100<=exp))-neg)
+            padding = quote
+                padn=$width-neg
+                if (exp<=-100)|(100<=exp)
+                    if isa($x,SmallNumber)
+                        padn -= 1
+                    else
+                        padn -= Base.ndigits0z(exp) - 2
+                    end
+                end
+                padn
+            end
         end
     end
     # print space padding
@@ -504,88 +536,125 @@ end
 macro handle_zero(ex)
     quote
         if $(esc(ex)) == 0
-            POINT[1] = 1
             DIGITS[1] = '0'
-            return
+            return int32(1), int32(1), $(esc(:neg))
         end
     end
 end
 
-function decode_oct(x::Unsigned)
+decode_oct(out::IO, d, flags::ASCIIString, width::Int, precision::Int, c::Char) = (true, decode_oct(d))
+decode_0ct(out::IO, d, flags::ASCIIString, width::Int, precision::Int, c::Char) = (true, decode_0ct(d))
+decode_dec(out::IO, d, flags::ASCIIString, width::Int, precision::Int, c::Char) = (true, decode_dec(d))
+decode_hex(out::IO, d, flags::ASCIIString, width::Int, precision::Int, c::Char) = (true, decode_hex(d))
+decode_HEX(out::IO, d, flags::ASCIIString, width::Int, precision::Int, c::Char) = (true, decode_HEX(d))
+fix_dec(out::IO, d, flags::ASCIIString, width::Int, precision::Int, c::Char) = (true, fix_dec(d, precision))
+ini_dec(out::IO, d, ndigits::Int, flags::ASCIIString, width::Int, precision::Int, c::Char) = (true, ini_dec(d, ndigits))
+
+
+# fallbacks for Real types without explicit decode_* implementation
+decode_oct(d::Real) = decode_oct(integer(d))
+decode_0ct(d::Real) = decode_0ct(integer(d))
+decode_dec(d::Real) = decode_dec(integer(d))
+decode_hex(d::Real) = decode_hex(integer(d))
+decode_HEX(d::Real) = decode_HEX(integer(d))
+
+handlenegative(d::Unsigned) = (false, d)
+function handlenegative(d::Integer)
+    if d < 0
+        return true, unsigned(oftype(d,-d))
+    else
+        return false, unsigned(d)
+    end
+end
+
+function decode_oct(d::Integer)
+    neg, x = handlenegative(d)
     @handle_zero x
-    POINT[1] = i = div((sizeof(x)<<3)-leading_zeros(x)+2,3)
+    pt = i = div((sizeof(x)<<3)-leading_zeros(x)+2,3)
     while i > 0
         DIGITS[i] = '0'+(x&0x7)
         x >>= 3
         i -= 1
     end
+    return int32(pt), int32(pt), neg
 end
 
-function decode_0ct(x::Unsigned)
+function decode_0ct(d::Integer)
+    neg, x = handlenegative(d)
     # doesn't need special handling for zero
-    POINT[1] = i = div((sizeof(x)<<3)-leading_zeros(x)+5,3)
+    pt = i = div((sizeof(x)<<3)-leading_zeros(x)+5,3)
     while i > 0
         DIGITS[i] = '0'+(x&0x7)
         x >>= 3
         i -= 1
     end
+    return int32(pt), int32(pt), neg
 end
 
-function decode_dec(x::Unsigned)
+function decode_dec(d::Integer)
+    neg, x = handlenegative(d)
     @handle_zero x
-    POINT[1] = i = Base.ndigits0z(x)
+    pt = i = Base.ndigits0z(x)
     while i > 0
         DIGITS[i] = '0'+rem(x,10)
         x = div(x,10)
         i -= 1
     end
+    return int32(pt), int32(pt), neg
 end
 
-function decode_hex(x::Unsigned, symbols::Array{Uint8,1})
+function decode_hex(d::Integer, symbols::Array{Uint8,1})
+    neg, x = handlenegative(d)
     @handle_zero x
-    POINT[1] = i = (sizeof(x)<<1)-(leading_zeros(x)>>2)
+    pt = i = (sizeof(x)<<1)-(leading_zeros(x)>>2)
     while i > 0
         DIGITS[i] = symbols[(x&0xf)+1]
         x >>= 4
         i -= 1
     end
+    return int32(pt), int32(pt), neg
 end
 
 const hex_symbols = "0123456789abcdef".data
 const HEX_symbols = "0123456789ABCDEF".data
 
-decode_hex(x::Unsigned) = decode_hex(x,hex_symbols)
-decode_HEX(x::Unsigned) = decode_hex(x,HEX_symbols)
+decode_hex(x::Integer) = decode_hex(x,hex_symbols)
+decode_HEX(x::Integer) = decode_hex(x,HEX_symbols)
 
 function decode(b::Int, x::BigInt)
-    neg = NEG[1] = x.size < 0
-    pt = POINT[1] = Base.ndigits(x, abs(b))
+    neg = x.size < 0
+    pt = Base.ndigits(x, abs(b))
     length(DIGITS) < pt+1 && resize!(DIGITS, pt+1)
     neg && (x.size = -x.size)
     ccall((:__gmpz_get_str, :libgmp), Ptr{Uint8},
           (Ptr{Uint8}, Cint, Ptr{BigInt}), DIGITS, b, &x)
     neg && (x.size = -x.size)
+    return int32(pt), int32(pt), neg
 end
+decode_oct(x::BigInt) = decode(8, x)
+decode_dec(x::BigInt) = decode(10, x)
+decode_hex(x::BigInt) = decode(16, x)
+decode_HEX(x::BigInt) = decode(-16, x)
 
 function decode_0ct(x::BigInt)
-    neg = NEG[1] = x.size < 0
+    neg = x.size < 0
     DIGITS[1] = '0'
     if x.size == 0
-        POINT[1] = 1
-        return
+        return int32(1), int32(1), neg
     end
-    pt = POINT[1] = Base.ndigits0z(x, 8) + 1
+    pt = Base.ndigits0z(x, 8) + 1
     length(DIGITS) < pt+1 && resize!(DIGITS, pt+1)
     neg && (x.size = -x.size)
     p = convert(Ptr{Uint8}, DIGITS) + 1
     ccall((:__gmpz_get_str, :libgmp), Ptr{Uint8},
           (Ptr{Uint8}, Cint, Ptr{BigInt}), p, 8, &x)
     neg && (x.size = -x.size)
+    return neg, int32(pt), int32(pt)
 end
 
 ### decoding functions directly used by printf generated code ###
 
-# int_*(x)   => fixed precision, to 0th place, filled out
+# decode_*(x)=> fixed precision, to 0th place, filled out
 # fix_*(x,n) => fixed precision, to nth place, not filled out
 # ini_*(x,n) => n initial digits, filled out
 
@@ -593,100 +662,63 @@ end
 #   *_0ct(x,n) => ensure that the first octal digits is zero
 #   *_HEX(x,n) => use uppercase digits for hexadecimal
 
-## "int" decoding functions ##
-#
-# - sets neg[1]
-# - sets point[1]
-# - implies len[1] = point[1]
+# - returns (len, point, neg)
+# - implies len = point
 #
 
-int_oct(x::Unsigned) = (NEG[1]=false; decode_oct(x))
-int_0ct(x::Unsigned) = (NEG[1]=false; decode_0ct(x))
-int_dec(x::Unsigned) = (NEG[1]=false; decode_dec(x))
-int_hex(x::Unsigned) = (NEG[1]=false; decode_hex(x))
-int_HEX(x::Unsigned) = (NEG[1]=false; decode_HEX(x))
-
-macro handle_negative()
-    quote
-        if $(esc(:x)) < 0
-            NEG[1] = true
-            $(esc(:x)) = oftype($(esc(:x)),-$(esc(:x)))
-        else
-            NEG[1] = false
-        end
-    end
-end
-
-int_oct(x::Integer) = (@handle_negative; decode_oct(unsigned(x)))
-int_0ct(x::Integer) = (@handle_negative; decode_0ct(unsigned(x)))
-int_dec(x::Integer) = (@handle_negative; decode_dec(unsigned(x)))
-int_hex(x::Integer) = (@handle_negative; decode_hex(unsigned(x)))
-int_HEX(x::Integer) = (@handle_negative; decode_HEX(unsigned(x)))
-
-int_oct(x::BigInt) = decode(8, x)
-int_0ct(x::BigInt) = decode_0ct(x)
-int_dec(x::BigInt) = decode(10, x)
-int_hex(x::BigInt) = decode(16, x)
-int_HEX(x::BigInt) = decode(-16, x)
-
-const SmallFloatingPoint = Union(Float64,Float32,Float16)
-
-function int_dec(x::SmallFloatingPoint)
+function decode_dec(x::SmallFloatingPoint)
     if x == 0.0
-        NEG[1] = false
-        POINT[1] = 1
         DIGITS[1] = '0'
-        return
+        return (int32(1), int32(1), false)
     end
     @grisu_ccall x Grisu.FIXED 0
     if LEN[1] == 0
-        NEG[1] = false
-        POINT[1] = 1
         DIGITS[1] = '0'
+        return (int32(1), int32(1), false)
     else
         for i = LEN[1]+1:POINT[1]
             DIGITS[i] = '0'
         end
     end
+    return LEN[1], POINT[1], NEG[1]
 end
-
-int_oct(x::Real) = int_oct(integer(x)) # TODO: real float decoding.
-int_0ct(x::Real) = int_0ct(integer(x)) # TODO: real float decoding.
-int_dec(x::Real) = int_dec(float(x))
-int_hex(x::Real) = int_hex(integer(x)) # TODO: real float decoding.
-int_HEX(x::Real) = int_HEX(integer(x)) # TODO: real float decoding.
+# TODO: implement decode_oct, decode_0ct, decode_hex, decode_HEX for SmallFloatingPoint
 
 ## fix decoding functions ##
 #
-# - sets neg[1]
-# - sets point[1]
-# - sets len[1]; if less than point[1], trailing zeros implied
+# - returns (neg, point, len)
+# - if len less than point, trailing zeros implied
 #
 
-fix_dec(x::Integer, n::Int) = (int_dec(x); LEN[1]=POINT[1])
+# fallback for Real types without explicit fix_dec implementation
 fix_dec(x::Real, n::Int) = fix_dec(float(x),n)
 
+fix_dec(x::Integer, n::Int) = decode_dec(x)
+
 function fix_dec(x::SmallFloatingPoint, n::Int)
     if n > BUFLEN-1; n = BUFLEN-1; end
     @grisu_ccall x Grisu.FIXED n
     if LEN[1] == 0
-        NEG[1] = false
-        POINT[1] = 1
         DIGITS[1] = '0'
+        return (int32(1), int32(1), NEG[1]) 
     end
+    return LEN[1], POINT[1], NEG[1]
 end
 
 ## ini decoding functions ##
 #
-# - sets neg[1]
-# - sets point[1]
-# - implies len[1] = n (requested digits)
+# - returns (neg, point, len)
+# - implies len = n (requested digits)
 #
 
-function ini_dec(x::Unsigned, n::Int)
+# fallback for Real types without explicit fix_dec implementation
+ini_dec(x::Real, n::Int) = ini_dec(float(x),n)
+
+function ini_dec(d::Integer, n::Int)
+    neg, x = handlenegative(d)
     k = ndigits(x)
     if k <= n
-        POINT[1] = k
+        pt = k
         for i = k:-1:1
             DIGITS[i] = '0'+rem(x,10)
             x = div(x,10)
@@ -704,45 +736,75 @@ function ini_dec(x::Unsigned, n::Int)
                 k += 1
             end
         end
-        POINT[1] = k
+        pt = k
         x = div(x,p)
         for i = n:-1:1
             DIGITS[i] = '0'+rem(x,10)
             x = div(x,10)
         end
     end
+    return n, pt, neg
 end
 
-ini_dec(x::Integer, n::Int) = (@handle_negative; ini_dec(unsigned(x),n))
-ini_dec(x::Real, n::Int) = ini_dec(float(x),n)
-
 function ini_dec(x::SmallFloatingPoint, n::Int)
     if x == 0.0
-        POINT[1] = 1
-        NEG[1] = signbit(x)
         ccall(:memset, Ptr{Void}, (Ptr{Void}, Cint, Csize_t), DIGITS, '0', n)
+        return int32(1), int32(1), bool(signbit(x))
     else
         @grisu_ccall x Grisu.PRECISION n
     end
+    return LEN[1], POINT[1], NEG[1]
 end
 
 function ini_dec(x::BigInt, n::Int)
     if x.size == 0
-        POINT[1] = 1
-        NEG[1] = false
         ccall(:memset, Ptr{Void}, (Ptr{Void}, Cint, Csize_t), DIGITS, '0', n)
-    else
-        d = Base.ndigits0z(x)
-        if d <= n
-            int_dec(x)
-            d == n && return
-            p = convert(Ptr{Void}, DIGITS) + POINT[1]
-            ccall(:memset, Ptr{Void}, (Ptr{Void}, Cint, Csize_t), p, '0', n - POINT[1])
-        else
-            int_dec(iround(x/big(10)^(d-n)))
-            POINT[1] = d
-        end
+        return int32(1), int32(1), false
+    end
+    d = Base.ndigits0z(x)
+    if d <= n
+        info = decode_dec(x)
+        d == n && return info
+        p = convert(Ptr{Void}, DIGITS) + info[2]
+        ccall(:memset, Ptr{Void}, (Ptr{Void}, Cint, Csize_t), p, '0', n - info[2])
+        return info
+    end
+    return (n, d, decode_dec(iround(x/big(10)^(d-n)))[3])
+end
+
+#BigFloat
+fix_dec(out::IO, d::BigFloat, flags::ASCIIString, width::Int, precision::Int, c::Char) = bigfloat_printf(out, d, flags, width, precision, c)
+ini_dec(out::IO, d::BigFloat, ndigits::Int, flags::ASCIIString, width::Int, precision::Int, c::Char) = bigfloat_printf(out, d, flags, width, precision, c)
+function bigfloat_printf(out::IO, d, flags::ASCIIString, width::Int, precision::Int, c::Char)
+    fmt_len = sizeof(flags)+4
+    if width > 0
+        fmt_len += ndigits(width)
+    end
+    if precision >= 0
+        fmt_len += ndigits(precision)+1
     end
+    fmt = IOBuffer(fmt_len)
+    write(fmt, '%')
+    write(fmt, flags)
+    if width > 0
+        print(fmt, width)
+    end
+    if precision == 0
+        write(fmt, '.')
+        write(fmt, '0')
+    elseif precision > 0
+        write(fmt, '.')
+        print(fmt, precision+1)
+    end
+    write(fmt, 'R')
+    write(fmt, c)
+    write(fmt, uint8(0))
+    printf_fmt = takebuf_array(fmt)
+    @assert length(printf_fmt) == fmt_len
+    lng = ccall((:mpfr_snprintf,:libmpfr), Int32, (Ptr{Uint8}, Culong, Ptr{Uint8}, Ptr{BigFloat}...), DIGITS, BUFLEN-1, printf_fmt, &d)
+    lng > 0 || error("invalid printf formatting for BigFloat")
+    write(out, pointer(DIGITS), lng)
+    return (false, ())
 end
 
 ### external printf interface ###
@@ -780,7 +842,9 @@ macro sprintf(args...)
     !isempty(args) || error("@sprintf: called with zero arguments")
     isa(args[1], String) || is_str_expr(args[1]) || 
         error("@sprintf: first argument must be a format string")
-    :(sprint(io->$(_printf("@sprintf", :io, args[1], args[2:end]))))
+    blk = _printf("@sprintf", :(IOBuffer()), args[1], args[2:end])
+    push!(blk.args, :(takebuf_string(out)))
+    blk
 end
 
 end # module