diff --git a/NEWS.md b/NEWS.md
index 17b984765d2c0..cfdcf06332243 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -260,6 +260,8 @@ Library improvements
       * new LAPACK wrappers
         - condition number estimate `cond(A::Triangular)` ([#5255])
 
+      * parametrize Triangular on matrix type ([#7064])
+
     * Dense linear algebra for generic matrix element types
 
       * LU factorization ([#5381] and [#5430])
diff --git a/base/array.jl b/base/array.jl
index 698b1a0132ee0..35ffa760642ce 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -168,7 +168,7 @@ for (fname, felt) in ((:zeros,:zero), (:ones,:one))
     @eval begin
         ($fname){T}(::Type{T}, dims...)  = fill!(Array(T, dims...), ($felt)(T))
         ($fname)(dims...)                = fill!(Array(Float64, dims...), ($felt)(Float64))
-        ($fname){T}(x::AbstractArray{T}) = ($fname)(T, size(x))
+        ($fname){T}(A::AbstractArray{T}) = fill!(similar(A), ($felt)(T))
     end
 end
 
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 7028cd848fc3a..1fd77152368e7 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -198,6 +198,8 @@ end
 @deprecate (/)(x::Number,A::Array)          x ./ A
 @deprecate (\)(A::Array,x::Number)          A .\ x
 
+@deprecate Triangular(A::Matrix)    Triangular(A, istril(A) ? :L : (istriu(A) ? istriu(A) : throw(ArgumentError("Matrix is not triangular"))))
+
 deprecated_ls() = run(`ls -l`)
 deprecated_ls(args::Cmd) = run(`ls -l $args`)
 deprecated_ls(args::String...) = run(`ls -l $args`)
diff --git a/base/linalg/bitarray.jl b/base/linalg/bitarray.jl
index e20ab2b8ab3e1..4b023ce02e0fb 100644
--- a/base/linalg/bitarray.jl
+++ b/base/linalg/bitarray.jl
@@ -40,7 +40,7 @@ end
 
 #aCb{T, S}(A::BitMatrix{T}, B::BitMatrix{S}) = aTb(A, B)
 
-function triu(B::BitMatrix, k::Integer)
+function triu(B::BitMatrix, k::Integer=0)
     m,n = size(B)
     A = falses(m,n)
     Ac = A.chunks
@@ -52,7 +52,7 @@ function triu(B::BitMatrix, k::Integer)
     A
 end
 
-function tril(B::BitMatrix, k::Integer)
+function tril(B::BitMatrix, k::Integer=0)
     m,n = size(B)
     A = falses(m, n)
     Ac = A.chunks
diff --git a/base/linalg/blas.jl b/base/linalg/blas.jl
index 0f2d844c921c9..a42d273d7959d 100644
--- a/base/linalg/blas.jl
+++ b/base/linalg/blas.jl
@@ -400,7 +400,7 @@ for (fname, elty) in ((:dtrmv_,:Float64),
                 (Ptr{Uint8}, Ptr{Uint8}, Ptr{Uint8}, Ptr{BlasInt},
                  Ptr{$elty}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}),
                  &uplo, &trans, &diag, &n, 
-                 A, &max(1,stride(A,2)), x, &1)
+                 A, &max(1,stride(A,2)), x, &max(1,stride(x, 1)))
             x
         end
         function trmv(uplo::Char, trans::Char, diag::Char, A::StridedMatrix{$elty}, x::StridedVector{$elty})
diff --git a/base/linalg/dense.jl b/base/linalg/dense.jl
index 304ad8e788dca..e783d85d8da12 100644
--- a/base/linalg/dense.jl
+++ b/base/linalg/dense.jl
@@ -300,7 +300,7 @@ function sqrtm{T<:Real}(A::StridedMatrix{T})
     issym(A) && return sqrtm(Symmetric(A))
     n = chksquare(A)
     SchurF = schurfact(complex(A))
-    R = full(sqrtm(Triangular(SchurF[:T])))
+    R = full(sqrtm(Triangular(SchurF[:T], :U, false)))
     retmat = SchurF[:vectors]*R*SchurF[:vectors]'
     all(imag(retmat) .== 0) ? real(retmat) : retmat
 end
@@ -308,7 +308,7 @@ function sqrtm{T<:Complex}(A::StridedMatrix{T})
     ishermitian(A) && return sqrtm(Hermitian(A))
     n = chksquare(A)
     SchurF = schurfact(A)
-    R = full(sqrtm(Triangular(SchurF[:T])))
+    R = full(sqrtm(Triangular(SchurF[:T], :U, false)))
     SchurF[:vectors]*R*SchurF[:vectors]'
 end
 sqrtm(a::Number) = (b = sqrt(complex(a)); imag(b) == 0 ? real(b) : b)
diff --git a/base/linalg/diagonal.jl b/base/linalg/diagonal.jl
index b788c909285a2..309f8a3a8a8cc 100644
--- a/base/linalg/diagonal.jl
+++ b/base/linalg/diagonal.jl
@@ -8,6 +8,7 @@ Diagonal(A::Matrix) = Diagonal(diag(A))
 convert{T}(::Type{Diagonal{T}}, D::Diagonal{T}) = D
 convert{T}(::Type{Diagonal{T}}, D::Diagonal) = Diagonal{T}(convert(Vector{T}, D.diag))
 convert{T}(::Type{AbstractMatrix{T}}, D::Diagonal) = convert(Diagonal{T}, D)
+convert{T}(::Type{Triangular}, A::Diagonal{T}) = Triangular{T, Diagonal{T}, :U, false}(A)
 
 function similar{T}(D::Diagonal, ::Type{T}, d::(Int,Int))
     d[1] == d[2] || throw(ArgumentError("Diagonal matrix must be square"))
@@ -19,6 +20,8 @@ copy!(D1::Diagonal, D2::Diagonal) = (copy!(D1.diag, D2.diag); D1)
 size(D::Diagonal) = (length(D.diag),length(D.diag))
 size(D::Diagonal,d::Integer) = d<1 ? error("dimension out of range") : (d<=2 ? length(D.diag) : 1)
 
+fill!(D::Diagonal, x) = (fill!(D.diag, x); D)
+
 full(D::Diagonal) = diagm(D.diag)
 getindex(D::Diagonal, i::Integer, j::Integer) = i == j ? D.diag[i] : zero(eltype(D.diag))
 
@@ -33,6 +36,9 @@ ishermitian(D::Diagonal) = true
 issym(D::Diagonal) = true
 isposdef(D::Diagonal) = all(D.diag .> 0)
 
+tril!(D::Diagonal,i::Integer) = i == 0 ? D : zeros(D)
+triu!(D::Diagonal,i::Integer) = i == 0 ? D : zeros(D)
+
 ==(Da::Diagonal, Db::Diagonal) = Da.diag == Db.diag
 
 +(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag + Db.diag)
diff --git a/base/linalg/factorization.jl b/base/linalg/factorization.jl
index b60d9b2154c9e..9b9acdce7975e 100644
--- a/base/linalg/factorization.jl
+++ b/base/linalg/factorization.jl
@@ -191,17 +191,20 @@ convert{T}(::Type{QRPivoted{T}},A::QRPivoted) = QRPivoted(convert(AbstractMatrix
 convert{T}(::Type{Factorization{T}}, A::QRPivoted) = convert(QRPivoted{T}, A)
 
 function getindex(A::QR, d::Symbol)
-    d == :R && return triu(A.factors[1:minimum(size(A)),:])
+    m, n = size(A)
+    d == :R && return triu!(A.factors[1:min(m,n), 1:n])
     d == :Q && return QRPackedQ(A.factors,A.τ)
     throw(KeyError(d))
 end
 function getindex(A::QRCompactWY, d::Symbol)
-    d == :R && return triu(A.factors[1:minimum(size(A)),:])
+    m, n = size(A)
+    d == :R && return triu!(A.factors[1:min(m,n), 1:n])
     d == :Q && return QRCompactWYQ(A.factors,A.T)
     throw(KeyError(d))
 end
 function getindex{T}(A::QRPivoted{T}, d::Symbol)
-    d == :R && return triu(A.factors[1:minimum(size(A)),:])
+    m, n = size(A)
+    d == :R && return triu!(A.factors[1:min(m,n), 1:n])
     d == :Q && return QRPackedQ(A.factors,A.τ)
     d == :p && return A.jpvt
     if d == :P
diff --git a/base/linalg/generic.jl b/base/linalg/generic.jl
index 7b343b4c278d2..c164268fa87ce 100644
--- a/base/linalg/generic.jl
+++ b/base/linalg/generic.jl
@@ -22,15 +22,11 @@ scale!(s::Number, X::AbstractArray) = generic_scale!(X, s)
 
 cross(a::AbstractVector, b::AbstractVector) = [a[2]*b[3]-a[3]*b[2], a[3]*b[1]-a[1]*b[3], a[1]*b[2]-a[2]*b[1]]
 
-triu(M::AbstractMatrix) = triu(M,0)
-tril(M::AbstractMatrix) = tril(M,0)
-#triu{T}(M::AbstractMatrix{T}, k::Integer)
-#tril{T}(M::AbstractMatrix{T}, k::Integer)
+triu(M::AbstractMatrix) = triu!(copy(M))
+tril(M::AbstractMatrix) = tril!(copy(M))
 triu!(M::AbstractMatrix) = triu!(M,0)
 tril!(M::AbstractMatrix) = tril!(M,0)
 
-#diff(a::AbstractVector)
-#diff(a::AbstractMatrix, dim::Integer)
 diff(a::AbstractMatrix) = diff(a, 1)
 diff(a::AbstractVector) = [ a[i+1] - a[i] for i=1:length(a)-1 ]
 
@@ -45,10 +41,8 @@ end
 
 gradient(F::AbstractVector) = gradient(F, [1:length(F)])
 gradient(F::AbstractVector, h::Real) = gradient(F, [h*(1:length(F))])
-#gradient(F::AbstractVector, h::AbstractVector)
 
 diag(A::AbstractVector) = error("use diagm instead of diag to construct a diagonal matrix")
-#diag(A::AbstractMatrix)
 
 #diagm{T}(v::AbstractVecOrMat{T})
 
diff --git a/base/linalg/lu.jl b/base/linalg/lu.jl
index 2f2f0e39dddd0..d8e885023306d 100644
--- a/base/linalg/lu.jl
+++ b/base/linalg/lu.jl
@@ -93,8 +93,12 @@ end
 
 function getindex{T,S<:StridedMatrix}(A::LU{T,S}, d::Symbol)
     m, n = size(A)
-    d == :L && return tril(A.factors[1:m, 1:min(m,n)], -1) + eye(T, m, min(m,n))
-    d == :U && return triu(A.factors[1:min(m,n),1:n])
+    if d == :L
+        L = tril!(A.factors[1:m, 1:min(m,n)])
+        for i = 1:min(m,n); L[i,i] = one(T); end
+        return L
+    end
+    d == :U && return triu!(A.factors[1:min(m,n), 1:n])
     d == :p && return ipiv2perm(A.ipiv, m)
     if d == :P
         p = A[:p]
@@ -144,7 +148,7 @@ end
 
 inv{T<:BlasFloat,S<:StridedMatrix}(A::LU{T,S}) = @assertnonsingular LAPACK.getri!(copy(A.factors), A.ipiv) A.info
 
-cond{T<:BlasFloat,S<:StridedMatrix}(A::LU{T,S}, p::Number) = inv(LAPACK.gecon!(p == 1 ? '1' : 'I', A.factors, norm(A[:L][A[:p],:]*A[:U], p)))
+cond{T<:BlasFloat,S<:StridedMatrix}(A::LU{T,S}, p::Number) = inv(LAPACK.gecon!(p == 1 ? '1' : 'I', A.factors, norm((A[:L]*A[:U])[A[:p],:], p)))
 cond(A::LU, p::Number) = norm(A[:L]*A[:U],p)*norm(inv(A),p)
 
 # Tridiagonal
diff --git a/base/linalg/special.jl b/base/linalg/special.jl
index 4cf8797b40b75..a6b0ff942f76f 100644
--- a/base/linalg/special.jl
+++ b/base/linalg/special.jl
@@ -4,7 +4,8 @@
 convert{T}(::Type{Bidiagonal}, A::Diagonal{T})=Bidiagonal(A.diag, zeros(T, size(A.diag,1)-1), true)
 convert{T}(::Type{SymTridiagonal}, A::Diagonal{T})=SymTridiagonal(A.diag, zeros(T, size(A.diag,1)-1))
 convert{T}(::Type{Tridiagonal}, A::Diagonal{T})=Tridiagonal(zeros(T, size(A.diag,1)-1), A.diag, zeros(T, size(A.diag,1)-1))
-convert(::Type{Triangular}, A::Union(Diagonal, Bidiagonal, SymTridiagonal, Tridiagonal))=Triangular(full(A))
+convert(::Type{Triangular}, A::Diagonal) = Triangular(full(A), :L)
+convert(::Type{Triangular}, A::Bidiagonal) = Triangular(full(A), A.isupper ? :U : :L)
 convert(::Type{Matrix}, D::Diagonal) = diagm(D.diag)
 
 function convert(::Type{Diagonal}, A::Union(Bidiagonal, SymTridiagonal))
diff --git a/base/linalg/triangular.jl b/base/linalg/triangular.jl
index 1c0c18b2449e0..7f8a369c9e268 100644
--- a/base/linalg/triangular.jl
+++ b/base/linalg/triangular.jl
@@ -1,42 +1,46 @@
 ## Triangular
-immutable Triangular{T<:Number} <: AbstractMatrix{T}
-    UL::Matrix{T}
-    uplo::Char
-    unitdiag::Char
+immutable Triangular{T,S<:AbstractMatrix{T},UpLo,IsUnit} <: AbstractMatrix{T}
+    data::S
 end
-function Triangular{T<:Number}(A::Matrix{T}, uplo::Symbol, unitdiag::Bool)
-    if size(A, 1) != size(A, 2) throw(DimensionMismatch("matrix must be square")) end
-    return Triangular(A, string(uplo)[1], unitdiag ? 'U' : 'N')
-end
-Triangular(A::Matrix, uplo::Symbol) = Triangular(A, uplo, all(diag(A) .== 1) ? true : false)
-function Triangular(A::Matrix)
-    if istriu(A) return Triangular(A, :U) end
-    if istril(A) return Triangular(A, :L) end
-    throw(ArgumentError("matrix is not triangular"))
+function Triangular{T}(A::AbstractMatrix{T}, uplo::Symbol, isunit::Bool=false)
+    uplo != :L && uplo != :U && throw(ArgumentError("uplo argument must be either :U or :L"))
+    return Triangular{T,typeof(A),uplo,isunit}(A)
 end
 
 ######################
 # BlasFloat routines #
 ######################
 
+### Note! the BlasFloat restriction can be removed if generic triangular multiplication methods are written.
+for (func1, func2) in ((:*, :A_mul_B!), (:Ac_mul_B, :Ac_mul_B!), (:/, :A_rdiv_B!))
+    @eval begin
+        ($func1){T<:BlasFloat,S<:StridedMatrix,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, B::Triangular{T,S,UpLo,IsUnit}) = ($func2)(A, full(B))
+        ($func1){T<:BlasFloat,S<:StridedMatrix,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, B::StridedVecOrMat) = ($func2)(A, copy(B))
+    end
+end
+for (func1, func2) in ((:A_mul_Bc, :A_mul_Bc!), (:A_rdiv_Bc, :A_rdiv_Bc!))
+    @eval begin
+        ($func1){T<:BlasFloat,S<:StridedMatrix,UpLo,IsUnit}(A::StridedMatrix, B::Triangular{T,S,UpLo,IsUnit}) = ($func2)(copy(A), B)
+    end
+end
+
 # Vector multiplication
-*{T<:BlasFloat}(A::Triangular{T}, b::Vector{T}) = BLAS.trmv(A.uplo, 'N', A.unitdiag, A.UL, b)
-Ac_mul_B{T<:BlasComplex}(A::Triangular{T}, b::Vector{T}) = BLAS.trmv(A.uplo, 'C', A.unitdiag, A.UL, b)
-At_mul_B{T<:BlasReal}(A::Triangular{T}, b::Vector{T}) = BLAS.trmv(A.uplo, 'T', A.unitdiag, A.UL, b)
+A_mul_B!{T<:BlasFloat,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, b::StridedVector{T}) = BLAS.trmv!(UpLo == :L ? 'L' : 'U', 'N', IsUnit ? 'U' : 'N', A.data, b)
+Ac_mul_B!{T<:BlasComplex,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, b::StridedVector{T}) = BLAS.trmv!(UpLo == :L ? 'L' : 'U', 'C', IsUnit ? 'U' : 'N', A.data, b)
+At_mul_B!{T<:BlasReal,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, b::StridedVector{T}) = BLAS.trmv!(UpLo == :L ? 'L' : 'U', 'T', IsUnit ? 'U' : 'N', A.data, b)
 
 # Matrix multiplication
-*{T<:BlasFloat}(A::Triangular{T}, B::StridedMatrix{T}) = BLAS.trmm('L', A.uplo, 'N', A.unitdiag, one(T), A.UL, B)
-*{T<:BlasFloat}(A::StridedMatrix{T}, B::Triangular{T}) = BLAS.trmm('R', B.uplo, 'N', B.unitdiag, one(T), B.UL, A)
-A_mul_B!{T<:BlasFloat}(A::Triangular{T},B::Matrix{T}) = BLAS.trmm!('L',A.uplo,'N',A.unitdiag,one(eltype(A)),A.UL,B)
-Ac_mul_B!{T<:BlasComplex}(A::Triangular{T}, B::StridedMatrix{T}) = BLAS.trmm('L', A.uplo, 'C', A.unitdiag, one(T), A.UL, B)
-Ac_mul_B!{T<:BlasReal}(A::Triangular{T}, B::StridedMatrix{T}) = BLAS.trmm('L', A.uplo, 'T', A.unitdiag, one(T), A.UL, B)
-A_mul_Bc!{T<:BlasComplex}(A::StridedMatrix{T}, B::Triangular{T}) = BLAS.trmm('R', B.uplo, 'C', B.unitdiag, one(T), B.UL, A)
-A_mul_Bc!{T<:BlasReal}(A::StridedMatrix{T}, B::Triangular{T}) = BLAS.trmm('R', B.uplo, 'T', B.unitdiag, one(T), B.UL, A)
-
-
-function \{T<:BlasFloat}(A::Triangular{T}, B::StridedVecOrMat{T})
-    x = LAPACK.trtrs!(A.uplo, 'N', A.unitdiag, A.UL, copy(B))
-    errors=LAPACK.trrfs!(A.uplo, 'N', A.unitdiag, A.UL, B, x)
+A_mul_B!{T<:BlasFloat,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, B::StridedMatrix{T}) = BLAS.trmm!('L', UpLo == :L ? 'L' : 'U', 'N', IsUnit ? 'U' : 'N', one(T), A.data, B)
+A_mul_B!{T<:BlasFloat,S,UpLo,IsUnit}(A::StridedMatrix{T}, B::Triangular{T,S,UpLo,IsUnit}) = BLAS.trmm!('R', UpLo == :L ? 'L' : 'U', 'N', IsUnit ? 'U' : 'N', one(T), B.data, A)
+Ac_mul_B!{T<:BlasComplex,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, B::StridedMatrix{T}) = BLAS.trmm!('L', UpLo == :L ? 'L' : 'U', 'C', IsUnit ? 'U' : 'N', one(T), A.data, B)
+Ac_mul_B!{T<:BlasReal,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, B::StridedMatrix{T}) = BLAS.trmm!('L', UpLo == :L ? 'L' : 'U', 'T', IsUnit ? 'U' : 'N', one(T), A.data, B)
+A_mul_Bc!{T<:BlasComplex,S,UpLo,IsUnit}(A::StridedMatrix{T}, B::Triangular{T,S,UpLo,IsUnit}) = BLAS.trmm!('R', UpLo == :L ? 'L' : 'U', 'C', IsUnit ? 'U' : 'N', one(T), B.data, A)
+A_mul_Bc!{T<:BlasReal,S,UpLo,IsUnit}(A::StridedMatrix{T}, B::Triangular{T,S,UpLo,IsUnit}) = BLAS.trmm!('R', UpLo == :L ? 'L' : 'U', 'T', IsUnit ? 'U' : 'N', one(T), B.data, A)
+
+A_ldiv_B!{T<:BlasFloat,S<:AbstractMatrix,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, B::StridedVecOrMat{T}) = LAPACK.trtrs!(UpLo == :L ? 'L' : 'U', 'N', IsUnit ? 'U' : 'N', A.data, B)
+function \{T<:BlasFloat,S<:AbstractMatrix,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, B::StridedVecOrMat{T})
+    x = A_ldiv_B!(A, copy(B))
+    errors = LAPACK.trrfs!(UpLo == :L ? 'L' : 'U', 'N', IsUnit ? 'U' : 'N', A.data, B, x)
     all(isfinite, [errors...]) || all([errors...] .< one(T)/eps(T)) || warn("""Unreasonably large error in computed solution:
 forward errors:
 $(errors[1])
@@ -44,21 +48,21 @@ backward errors:
 $(errors[2])""")
     x
 end
-Ac_ldiv_B{T<:BlasReal}(A::Triangular{T}, B::StridedVecOrMat{T}) = LAPACK.trtrs!(A.uplo, 'T', A.unitdiag, A.UL, copy(B))
-Ac_ldiv_B{T<:BlasComplex}(A::Triangular{T}, B::StridedVecOrMat{T}) = LAPACK.trtrs!(A.uplo, 'C', A.unitdiag, A.UL, copy(B))
+Ac_ldiv_B!{T<:BlasReal,S<:AbstractMatrix,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, B::StridedVecOrMat{T}) = LAPACK.trtrs!(UpLo == :L ? 'L' : 'U', 'T', IsUnit ? 'U' : 'N', A.data, B)
+Ac_ldiv_B!{T<:BlasComplex,S<:AbstractMatrix,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, B::StridedVecOrMat{T}) = LAPACK.trtrs!(UpLo == :L ? 'L' : 'U', 'C', IsUnit ? 'U' : 'N', A.data, B)
 
-/{T<:BlasFloat}(A::StridedVecOrMat{T}, B::Triangular{T}) = BLAS.trsm!('R', B.uplo, 'N', B.unitdiag, one(T), B.UL, copy(A))
-A_rdiv_Bc{T<:BlasReal}(A::StridedVecOrMat{T}, B::Triangular{T}) = BLAS.trsm!('R', B.uplo, 'T', B.unitdiag, one(T), B.UL, copy(A))
-A_rdiv_Bc{T<:BlasComplex}(A::StridedVecOrMat{T}, B::Triangular{T}) = BLAS.trsm!('R', B.uplo, 'C', B.unitdiag, one(T), B.UL, copy(A))
+A_rdiv_B!{T<:BlasFloat,S<:AbstractMatrix,UpLo,IsUnit}(A::StridedVecOrMat{T}, B::Triangular{T,S,UpLo,IsUnit}) = BLAS.trsm!('R', UpLo == :L ? 'L' : 'U', 'N', IsUnit ? 'U' : 'N', one(T), B.data, A)
+A_rdiv_Bc!{T<:BlasReal,S<:AbstractMatrix,UpLo,IsUnit}(A::StridedMatrix{T}, B::Triangular{T,S,UpLo,IsUnit}) = BLAS.trsm!('R', UpLo == :L ? 'L' : 'U', 'T', IsUnit ? 'U' : 'N', one(T), B.data, A)
+A_rdiv_Bc!{T<:BlasComplex,S<:AbstractMatrix,UpLo,IsUnit}(A::StridedMatrix{T}, B::Triangular{T,S,UpLo,IsUnit}) = BLAS.trsm!('R', UpLo == :L ? 'L' : 'U', 'C', IsUnit ? 'U' : 'N', one(T), B.data, A)
 
-inv{T<:BlasFloat}(A::Triangular{T}) = LAPACK.trtri!(A.uplo, A.unitdiag, copy(A.UL))
+inv{T<:BlasFloat,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}) = Triangular{T,S,UpLo,IsUnit}(LAPACK.trtri!(UpLo == :L ? 'L' : 'U', IsUnit ? 'U' : 'N', copy(A.data)))
 
 #Eigensystems
-function eigvecs{T<:BlasFloat}(A::Triangular{T})
-    if A.uplo=='U'
-        V = LAPACK.trevc!('R', 'A', Array(Bool,1), A.UL)
-    else #A.uplo=='L'
-        V = LAPACK.trevc!('L', 'A', Array(Bool,1), A.UL')
+function eigvecs{T<:BlasFloat,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit})
+    if UpLo == :U
+        V = LAPACK.trevc!('R', 'A', Array(Bool,1), A.data)
+    else # Uplo == :L
+        V = LAPACK.trevc!('L', 'A', Array(Bool,1), A.data')
     end
     for i=1:size(V,2) #Normalize
         V[:,i] /= norm(V[:,i])
@@ -66,12 +70,12 @@ function eigvecs{T<:BlasFloat}(A::Triangular{T})
     V
 end
 
-function cond{T<:BlasFloat}(A::Triangular{T}, p::Real=2)
+function cond{T<:BlasFloat,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, p::Real=2)
     chksquare(A)
     if p==1
-        return inv(LAPACK.trcon!('O', A.uplo, A.unitdiag, A.UL))
+        return inv(LAPACK.trcon!('O', UpLo == :L ? 'L' : 'U', IsUnit ? 'U' : 'N', A.data))
     elseif p==Inf
-        return inv(LAPACK.trcon!('I', A.uplo, A.unitdiag, A.UL))
+        return inv(LAPACK.trcon!('I', UpLo == :L ? 'L' : 'U', IsUnit ? 'U' : 'N', A.data))
     else #use fallback
         return cond(full(A), p)
     end
@@ -81,45 +85,104 @@ end
 # Generic routines #
 ####################
 
-size(A::Triangular, args...) = size(A.UL, args...)
+size(A::Triangular, args...) = size(A.data, args...)
+
+convert{T,S,UpLo,IsUnit}(::Type{Triangular{T,S,UpLo,IsUnit}}, A::Triangular{T,S,UpLo,IsUnit}) = A
+convert{T,S,UpLo,IsUnit}(::Type{Triangular{T,S,UpLo,IsUnit}}, A::Triangular) = Triangular{T,S,UpLo,IsUnit}(convert(AbstractMatrix{T}, A.data))
+function convert{T,TA,S,UpLo,IsUnit}(::Type{AbstractMatrix{T}}, A::Triangular{TA,S,UpLo,IsUnit})
+    M = convert(AbstractMatrix{T}, A.data)
+    Triangular{T,typeof(M),UpLo,IsUnit}(M)
+end
+function convert{T,S,UpLo,IsUnit}(::Type{Matrix}, A::Triangular{T,S,UpLo,IsUnit})
+    B = Array(T, size(A, 1), size(A, 1))
+    copy!(B, A.data)
+    (UpLo == :L ? tril! : triu!)(B)
+    if IsUnit
+        for i = 1:size(B,1)
+            B[i,i] = 1
+        end
+    end
+    B
+end
+
+function full!(A::Triangular)
+    B = A.data
+    (UpLo == :L ? tril! : triu!)(B)
+    if IsUnit
+        for i = 1:size(A,1)
+            B[i,i] = 1
+        end
+    end
+    B
+end
+full{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}) = convert(Matrix, A)
 
-convert(::Type{Matrix}, A::Triangular) = full(A)
-convert{T}(::Type{Triangular{T}}, A::Triangular{T}) = A
-convert{T}(::Type{Triangular{T}}, A::Triangular) = Triangular(convert(AbstractMatrix{T}, A.UL), A.uplo, A.unitdiag)
-convert{T}(::Type{AbstractMatrix{T}}, A::Triangular) = convert(Triangular{T}, A)
+fill!(A::Triangular, x) = (fill!(A.data, x); A)
 
-full(A::Triangular) = A.uplo == 'U' ? triu!(A.UL) : tril!(A.UL)
+function similar{T,S,UpLo,IsUnit,Tnew}(A::Triangular{T,S,UpLo,IsUnit}, ::Type{Tnew}, dims::Dims)
+    dims[1] == dims[2] || throw(ArgumentError("a Triangular matrix must be square"))
+    length(dims) == 2 || throw(ArgumentError("a Traigular matrix must have two dimensions"))
+    A = similar(A.data, Tnew, dims)
+    return Triangular{Tnew, typeof(A), UpLo, IsUnit}(A)
+end
+
+getindex{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, i::Integer, j::Integer) = i == j ? (IsUnit ? one(T) : A.data[i,j]) : ((UpLo == :U) == (i < j) ? getindex(A.data, i, j) : zero(T))
+getindex{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, i::Integer) = ((m, n) = divrem(i - 1, size(A,1)); A[m + 1, n + 1])
 
-getindex{T}(A::Triangular{T}, i::Integer, j::Integer) = i == j ? (A.unitdiag == 'U' ? one(T) : A.UL[i,j]) : ((A.uplo == 'U') == (i < j) ? getindex(A.UL, i, j) : zero(T))
+istril{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}) = UpLo == :L
+istriu{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}) = UpLo == :U
 
-istril(A::Triangular) = A.uplo == 'L' || istriu(A.UL)
-istriu(A::Triangular) = A.uplo == 'U' || istril(A.UL)
+transpose{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}) = Triangular{T, S, UpLo == :U ? :L : :U, IsUnit}(transpose(A.data))
+ctranspose{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}) = Triangular{T, S, UpLo == :U ? :L : :U, IsUnit}(ctranspose(A.data))
+transpose!{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}) = Triangular{T, S, UpLo == :U ? :L : :U, IsUnit}(copytri!(A.data, UpLo == :L ? 'L' : 'U'))
+ctranspose!{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}) = Triangular{T, S, UpLo == :U ? :L : :U, IsUnit}(copytri!(A.data, UpLo == :L ? 'L' : 'U', true))
+diag{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}) = IsUnit ? ones(T, size(A,1)) : diag(A.data)
+function big{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit})
+    M = big(A.data)
+    Triangular{T,typeof(M),UpLo,IsUnit}(M)
+end
 
-transpose(A::Triangular) = Triangular(transpose(A.UL), A.uplo=='U'?'L':'U', A.unitdiag)
-ctranspose(A::Triangular) = Triangular(ctranspose(A.UL), A.uplo=='U'?'L':'U', A.unitdiag)
-transpose!(A::Triangular) = Triangular(copytri!(A.UL, A.uplo), A.uplo=='U'?'L':'U', A.unitdiag)
-ctranspose!(A::Triangular) = Triangular(copytri!(A.UL, A.uplo, true), A.uplo=='U'?'L':'U', A.unitdiag)
-diag(A::Triangular) = diag(A.UL)
-big(A::Triangular) = Triangular(big(A.UL), A.uplo, A.unitdiag)
+function (*){T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, x::Number)
+    n = size(A,1)
+    for j = 1:n
+        for i = UpLo == :L ? j:n : 1:j
+            A.data[i,j] = i == j & IsUnit ? x : A.data[i,j]*x
+        end
+    end
+    A
+end
+function (*){T,S,UpLo,IsUnit}(x::Number, A::Triangular{T,S,UpLo,IsUnit})
+    n = size(A,1)
+    for j = 1:n
+        for i = UpLo == :L ? j:n : 1:j
+            A.data[i,j] = i == j & IsUnit ? x : x*A.data[i,j]
+        end
+    end
+    A
+end
 
-*(A::Tridiagonal, B::Triangular) = A*full(B)
-A_mul_Bc{TB}(A::Triangular, B::Union(QRCompactWYQ{TB},QRPackedQ{TB})) = A_mul_Bc(full(A),B)
+A_mul_B!{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, B::Triangular{T,S,UpLo,IsUnit}) = Triangular{T,S,UpLo,IsUnit}(A*full!(B))
+A_mul_B!(A::Tridiagonal, B::Triangular) = A*full!(B)
+A_mul_Bc!(A::Triangular, B::QRCompactWYQ) = A_mul_Bc!(full!(A),B)
+A_mul_Bc!(A::Triangular, B::QRPackedQ) = A_mul_Bc!(full!(A),B)
 
 #Generic multiplication
+*(A::Tridiagonal, B::Triangular) = A_mul_B!(full(A), B)
+A_mul_Bc(A::Triangular, B::Union(QRCompactWYQ,QRPackedQ)) = A_mul_Bc(full(A), B)
 for func in (:*, :Ac_mul_B, :A_mul_Bc, :/, :A_rdiv_Bc)
     @eval begin
-        ($func)(A::Triangular, B::Triangular) = ($func)(A, full(B))
-        ($func)(A::Triangular, B::AbstractVecOrMat) = ($func)(full(A), B)
-        ($func)(A::AbstractMatrix, B::Triangular) = ($func)(A, full(B))
+        ($func){TA,TB,SA<:AbstractMatrix,SB<:AbstractMatrix,UpLoA,UpLoB,IsUnitA,IsUnitB}(A::Triangular{TA,SA,UpLoA,IsUnitA}, B::Triangular{TB,SB,UpLoB,IsUnitB}) = ($func)(A, full(B))
+        ($func){T,S<:AbstractMatrix,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, B::AbstractVecOrMat) = ($func)(full(A), B)
+        ($func){T,S<:AbstractMatrix,UpLo,IsUnit}(A::AbstractMatrix, B::Triangular{T,S,UpLo,IsUnit}) = ($func)(A, full(B))
     end
 end
 
-function sqrtm{T}(A::Triangular{T})
+function sqrtm{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit})
     n = size(A, 1)
     R = zeros(T, n, n)
-    if A.uplo == 'U'
+    if UpLo == :U
         for j = 1:n
-            (T<:Complex || A[j,j]>=0) ? (R[j,j]=sqrt(A[j,j])) : throw(SingularException(j))
+            (T<:Complex || A[j,j]>=0) ? (R[j,j] = IsUnit ? one(T) : sqrt(A[j,j])) : throw(SingularException(j))
             for i = j-1:-1:1
                 r = A[i,j]
                 for k = i+1:j-1
@@ -128,65 +191,65 @@ function sqrtm{T}(A::Triangular{T})
                 r==0 || (R[i,j] = r / (R[i,i] + R[j,j]))
             end
         end
-        return Triangular(R)
-    else #A.uplo == 'L' #Not the usual case
+        return Triangular{T,S,UpLo,IsUnit}(R)
+    else # UpLo == :L #Not the usual case
         return sqrtm(A.').'
     end
 end
 
 #Generic solver using naive substitution
-function naivesub!(A::Triangular, b::AbstractVector, x::AbstractVector=b)
+function naivesub!{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit}, b::AbstractVector, x::AbstractVector=b)
     N = size(A, 2)
     N==length(b)==length(x) || throw(DimensionMismatch(""))
 
-    if A.uplo == 'L' #do forward substitution
+    if UpLo == :L #do forward substitution
         for j = 1:N
             x[j] = b[j]
             for k = 1:j-1
                 x[j] -= A[j,k] * x[k]
             end
-            if A.unitdiag=='N'
+            if !IsUnit
                 x[j]/= A[j,j]==0 ? throw(SingularException(j)) : A[j,j]
             end
         end
-    elseif A.uplo == 'U' #do backward substitution
+    elseif UpLo == :U #do backward substitution
         for j = N:-1:1
             x[j] = b[j]
             for k = j+1:1:N
                 x[j] -= A[j,k] * x[k]
             end
-            if A.unitdiag=='N'
+            if !IsUnit
                 x[j]/= A[j,j]==0 ? throw(SingularException(j)) : A[j,j]
             end
         end
     else
-        throw(ArgumentError("Unknown uplo=$(A.uplo)"))
+        throw(ArgumentError("Unknown UpLo=$(UpLo)"))
     end
     x
 end
 
 #Generic eigensystems
-eigvals(A::Triangular) = diag(A.UL)
+eigvals(A::Triangular) = diag(A.data)
 det(A::Triangular) = prod(eigvals(A))
 
-function eigvecs{T}(A::Triangular{T})
-    evecs = zeros(A)
+function eigvecs{T,S,UpLo,IsUnit}(A::Triangular{T,S,UpLo,IsUnit})
     N = size(A,1)
-    if A.unitdiag == 'U' #Trivial
+    evecs = zeros(T, N, N)
+    if IsUnit #Trivial
         return eye(A)
-    elseif A.uplo == 'L' #do forward substitution
+    elseif UpLo == :L #do forward substitution
         for i=1:N
             evecs[i,i] = one(T)
             for j = i+1:N
                 for k = i:j-1
                     evecs[j,i] -= A[j,k] * evecs[k,i]
                 end
-                evecs[j,i] /= A[j,j]-A[i,i]
+                evecs[j,i] /= A[j,j] - A[i,i]
             end
             evecs[i:N, i] /= norm(evecs[i:N, i])
         end
         evecs
-    elseif A.uplo == 'U' #do backward substitution
+    elseif UpLo == :U #do backward substitution
         for i=1:N
             evecs[i,i] = one(T)
             for j = i-1:-1:1
@@ -198,7 +261,7 @@ function eigvecs{T}(A::Triangular{T})
             evecs[1:i, i] /= norm(evecs[1:i, i])
         end
     else
-        throw(ArgumentError("Unknown uplo=$(A.uplo)"))
+        throw(ArgumentError("Unknown uplo=$(UpLo)"))
     end
     evecs
 end
diff --git a/test/linalg1.jl b/test/linalg1.jl
index 3ce67aed33a86..04c450d10569d 100644
--- a/test/linalg1.jl
+++ b/test/linalg1.jl
@@ -94,7 +94,7 @@ debug && println("(Automatic) Square LU decomposition")
     lua   = factorize(a)
     l,u,p = lua[:L], lua[:U], lua[:p]
     @test_approx_eq l*u a[p,:]
-    @test_approx_eq l[invperm(p),:]*u a
+    @test_approx_eq (l*u)[invperm(p),:] a
     @test_approx_eq a * inv(lua) eye(n)
     @test norm(a*(lua\b) - b, 1) < ε*κ*n*2 # Two because the right hand side has two columns
 
diff --git a/test/linalg4.jl b/test/linalg4.jl
index 1183bcc241c20..4885ffb5db8c0 100644
--- a/test/linalg4.jl
+++ b/test/linalg4.jl
@@ -31,8 +31,7 @@ for relty in (Float32, Float64, BigFloat), elty in (relty, Complex{relty})
         b += im*convert(Matrix{elty}, randn(n, 2))
     end
 
-    for M in (triu(A), tril(A))
-        TM = Triangular(M)
+    for (M, TM) in ((triu(A), Triangular(A, :U)), (tril(A), Triangular(A, :L)))
         
         ##Idempotent tests #XXX - not implemented
         #for func in (conj, transpose, ctranspose)
@@ -65,8 +64,7 @@ for relty in (Float32, Float64, BigFloat), elty in (relty, Complex{relty})
 
         #Binary operations
         B = convert(Matrix{elty}, randn(n, n))
-        for M2 in (triu(B), tril(B))
-            TM2 = Triangular(M2)
+        for (M2, TM2) in ((triu(B), Triangular(B, :U)), (tril(B), Triangular(B, :L)))
             for op in (*,) #+, - not implemented
                 @test_approx_eq full(op(TM, TM2)) op(M, M2)
                 @test_approx_eq full(op(TM, M2)) op(M, M2)
@@ -301,11 +299,11 @@ for newtype in [Tridiagonal, Matrix]
 end
 
 A=Tridiagonal(zeros(n-1), [1.0:n], zeros(n-1)) #morally Diagonal
-for newtype in [Diagonal, Bidiagonal, SymTridiagonal, Triangular, Matrix]
+for newtype in [Diagonal, Bidiagonal, SymTridiagonal, Matrix]
     @test full(convert(newtype, A)) == full(A)
 end
 
-A=Triangular(full(Diagonal(a))) #morally Diagonal
+A=Triangular(full(Diagonal(a)), :L) #morally Diagonal
 for newtype in [Diagonal, Bidiagonal, SymTridiagonal, Triangular, Matrix]
     @test full(convert(newtype, A)) == full(A)
 end