From af7b46c3c0386bec381779cf64f596d972c7ec08 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 20 Jan 2016 12:07:33 -0500
Subject: [PATCH 1/3] forcably inline the byte-write method for IOBuffer, since
 this tends to end up in the hot-path

---
 base/iobuffer.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index 2d31a9ed23098..f49eb9291bd17 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -171,7 +171,7 @@ function compact(io::AbstractIOBuffer)
     return io
 end
 
-function ensureroom(io::AbstractIOBuffer, nshort::Int)
+@inline function ensureroom(io::AbstractIOBuffer, nshort::Int)
     io.writable || throw(ArgumentError("ensureroom failed, IOBuffer is not writeable"))
     if !io.seekable
         nshort >= 0 || throw(ArgumentError("ensureroom failed, requested number of bytes must be ≥ 0, got $nshort"))
@@ -198,7 +198,7 @@ end
 
 eof(io::AbstractIOBuffer) = (io.ptr-1 == io.size)
 
-function close{T}(io::AbstractIOBuffer{T})
+@noinline function close{T}(io::AbstractIOBuffer{T})
     io.readable = false
     io.writable = false
     io.seekable = false
@@ -310,12 +310,12 @@ function write_sub{T}(to::AbstractIOBuffer, a::AbstractArray{T}, offs, nel)
             written += write(to, a[i])
         end
     end
-    written
+    return written
 end
 
 write(to::AbstractIOBuffer, a::Array) = write_sub(to, a, 1, length(a))
 
-function write(to::AbstractIOBuffer, a::UInt8)
+@inline function write(to::AbstractIOBuffer, a::UInt8)
     ensureroom(to, 1)
     ptr = (to.append ? to.size+1 : to.ptr)
     if ptr > to.maxsize
@@ -325,7 +325,7 @@ function write(to::AbstractIOBuffer, a::UInt8)
     end
     to.size = max(to.size, ptr)
     if !to.append to.ptr += 1 end
-    sizeof(UInt8)
+    return sizeof(UInt8)
 end
 
 function readbytes!(io::AbstractIOBuffer, b::Array{UInt8}, nb=length(b))

From fd49bb1925127ddef8042effc15b6534c17c2192 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Fri, 22 Jan 2016 15:27:48 -0500
Subject: [PATCH 2/3] rename write(io, ptr::Ptr, nb) to unsafe_write(io,
 p::Ptr{Uint8}, nb::Uint)

this better clarifies the behavior of this method (the fallback for write(io, xs...) usually calls write on each element of xs)
and provides a fallback implementation for every subtype IO
this also allows the deletion of a substantial amount of duplicated and special-case code that is now made redundant

once Ref can be stack-allocated, this approach should gain an extra speed bump
---
 base/LineEdit.jl   |   2 +-
 base/Terminals.jl  |  52 ++++++-----------------
 base/base64.jl     |  17 ++++----
 base/deprecated.jl |   3 ++
 base/exports.jl    |   2 +
 base/filesystem.jl |  12 +-----
 base/grisu.jl      |  22 +++++-----
 base/io.jl         | 100 ++++++++++++++++++++++++++-------------------
 base/iobuffer.jl   |  68 ++++++++----------------------
 base/iostream.jl   |  18 ++------
 base/printf.jl     |  22 +++++-----
 base/serialize.jl  |   2 +-
 base/stream.jl     |  45 ++++++--------------
 base/sysimg.jl     |  10 ++---
 14 files changed, 149 insertions(+), 226 deletions(-)

diff --git a/base/LineEdit.jl b/base/LineEdit.jl
index 27cb6a04978b5..8bfa6625f244c 100644
--- a/base/LineEdit.jl
+++ b/base/LineEdit.jl
@@ -1125,7 +1125,7 @@ end
 
 function refresh_multi_line(termbuf::TerminalBuffer, s::SearchState)
     buf = IOBuffer()
-    write(buf, pointer(s.query_buffer.data), s.query_buffer.ptr-1)
+    unsafe_write(buf, pointer(s.query_buffer.data), s.query_buffer.ptr-1)
     write(buf, "': ")
     offset = buf.ptr
     ptr = s.response_buffer.ptr
diff --git a/base/Terminals.jl b/base/Terminals.jl
index dac425d0142bb..433a0933304b0 100644
--- a/base/Terminals.jl
+++ b/base/Terminals.jl
@@ -22,34 +22,30 @@ export
     getY,
     hascolor,
     pos,
-    raw!,
-    writepos
+    raw!
 
 import Base:
+    check_open, # stream.jl
+    displaysize,
     flush,
+    pipe_reader,
+    pipe_writer,
     read,
     readuntil,
-    displaysize,
-    start_reading,
-    stop_reading,
-    write,
-    writemime,
-    reseteof,
-    eof,
-    check_open # stream.jl
+    writemime
 
 ## TextTerminal ##
 
-abstract TextTerminal <: Base.IO
+abstract TextTerminal <: Base.AbstractPipe
 
 # INTERFACE
+pipe_reader(::TextTerminal) = error("Unimplemented")
+pipe_writer(::TextTerminal) = error("Unimplemented")
 displaysize(::TextTerminal) = error("Unimplemented")
-writepos(t::TextTerminal, x, y, s::Array{UInt8,1}) = error("Unimplemented")
 cmove(t::TextTerminal, x, y) = error("Unimplemented")
 getX(t::TextTerminal) = error("Unimplemented")
 getY(t::TextTerminal) = error("Unimplemented")
 pos(t::TextTerminal) = (getX(t), getY(t))
-reseteof(t::TextTerminal) = nothing
 
 # Relative moves (Absolute position fallbacks)
 cmove_up(t::TextTerminal, n) = cmove(getX(t), max(1, getY(t)-n))
@@ -76,18 +72,6 @@ cmove_col(t::TextTerminal, c) = cmove(c, getY(t))
 hascolor(::TextTerminal) = false
 
 # Utility Functions
-function writepos{T}(t::TextTerminal, x, y, b::Array{T})
-    if isbits(T)
-        writepos(t, x, y, reinterpret(UInt8, b))
-    else
-        cmove(t, x, y)
-        invoke(write, Tuple{IO, Array}, s, a)
-    end
-end
-function writepos(t::TextTerminal, x, y, args...)
-    cmove(t, x, y)
-    write(t, args...)
-end
 width(t::TextTerminal) = displaysize(t)[2]
 height(t::TextTerminal) = displaysize(t)[1]
 
@@ -108,6 +92,9 @@ disable_bracketed_paste(t::TextTerminal) = nothing
 
 abstract UnixTerminal <: TextTerminal
 
+pipe_reader(t::UnixTerminal) = t.in_stream
+pipe_writer(t::UnixTerminal) = t.out_stream
+
 type TerminalBuffer <: UnixTerminal
     out_stream::Base.IO
 end
@@ -119,8 +106,6 @@ type TTYTerminal <: UnixTerminal
     err_stream::Base.TTY
 end
 
-reseteof(t::TTYTerminal) = reseteof(t.in_stream)
-
 const CSI = "\x1b["
 
 cmove_up(t::UnixTerminal, n) = write(t.out_stream, "$(CSI)$(n)A")
@@ -166,19 +151,6 @@ clear(t::UnixTerminal) = write(t.out_stream, "\x1b[H\x1b[2J")
 clear_line(t::UnixTerminal) = write(t.out_stream, "\x1b[0G\x1b[0K")
 #beep(t::UnixTerminal) = write(t.err_stream,"\x7")
 
-write{T,N}(t::UnixTerminal, a::Array{T,N}) = write(t.out_stream, a)
-write(t::UnixTerminal, p::Ptr{UInt8}) = write(t.out_stream, p)
-write(t::UnixTerminal, p::Ptr{UInt8}, x::Integer) = write(t.out_stream, p, x)
-write(t::UnixTerminal, x::UInt8) = write(t.out_stream, x)
-read{T,N}(t::UnixTerminal, x::Array{T,N}) = read(t.in_stream, x)
-readuntil(t::UnixTerminal, s::AbstractString) = readuntil(t.in_stream, s)
-readuntil(t::UnixTerminal, c::Char) = readuntil(t.in_stream, c)
-readuntil(t::UnixTerminal, s) = readuntil(t.in_stream, s)
-read(t::UnixTerminal, ::Type{UInt8}) = read(t.in_stream, UInt8)
-start_reading(t::UnixTerminal) = start_reading(t.in_stream)
-stop_reading(t::UnixTerminal) = stop_reading(t.in_stream)
-eof(t::UnixTerminal) = eof(t.in_stream)
-
 @unix_only function hascolor(t::TTYTerminal)
     startswith(t.term_type, "xterm") && return true
     try
diff --git a/base/base64.jl b/base/base64.jl
index 90f0cde3514be..5b9856dfed1d5 100644
--- a/base/base64.jl
+++ b/base/base64.jl
@@ -85,16 +85,15 @@ end
 
 #############################################################################
 
-function write(b::Base64EncodePipe, x::AbstractVector{UInt8})
-    n = length(x)
+function unsafe_write(b::Base64EncodePipe, x::Ptr{UInt8}, n::UInt)
     s = 1 # starting index
     # finish any cached data to write:
     if b.nb == 1
         if n >= 2
-            write(b.io, b64(b.b0, x[1], x[2])...)
+            write(b.io, b64(b.b0, unsafe_load(x, 1), unsafe_load(x, 2))...)
             s = 3
         elseif n == 1
-            b.b1 = x[1]
+            b.b1 = unsafe_load(x, 1)
             b.nb = 2
             return
         else
@@ -102,7 +101,7 @@ function write(b::Base64EncodePipe, x::AbstractVector{UInt8})
         end
     elseif b.nb == 2
         if n >= 1
-            write(b.io, b64(b.b0, b.b1, x[1])...)
+            write(b.io, b64(b.b0, b.b1, unsafe_load(x, 1))...)
             s = 2
         else
             return
@@ -110,16 +109,16 @@ function write(b::Base64EncodePipe, x::AbstractVector{UInt8})
     end
     # write all groups of three bytes:
     while s + 2 <= n
-        write(b.io, b64(x[s], x[s+1], x[s+2])...)
+        write(b.io, b64(unsafe_load(x, s), unsafe_load(x, s + 1), unsafe_load(x, s + 2))...)
         s += 3
     end
     # cache any leftover bytes:
     if s + 1 == n
-        b.b0 = x[s]
-        b.b1 = x[s+1]
+        b.b0 = unsafe_load(x, s)
+        b.b1 = unsafe_load(x, s + 1)
         b.nb = 2
     elseif s == n
-        b.b0 = x[s]
+        b.b0 = unsafe_load(x, s)
         b.nb = 1
     else
         b.nb = 0
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 5b9cbbf42fcce..86f3db60510bd 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -978,3 +978,6 @@ end
     end
     return offsets
 end
+
+# 14766
+@deprecate write(io::IO, p::Ptr, nb::Integer) unsafe_write(io, p, nb)
diff --git a/base/exports.jl b/base/exports.jl
index 93a871d79d7b7..1191e1a0d293a 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -1333,7 +1333,9 @@ export
     unsafe_copy!,
     unsafe_load,
     unsafe_pointer_to_objref,
+    #unsafe_read,
     unsafe_store!,
+    unsafe_write,
 
 # nullable types
     isnull,
diff --git a/base/filesystem.jl b/base/filesystem.jl
index 29a534b6d51dd..2f8ddf111e9f7 100644
--- a/base/filesystem.jl
+++ b/base/filesystem.jl
@@ -105,7 +105,7 @@ function sendfile(dst::File, src::File, src_offset::Int64, bytes::Int)
     nothing
 end
 
-function write(f::File, buf::Ptr{UInt8}, len::Integer, offset::Integer=-1)
+function unsafe_write(f::File, buf::Ptr{UInt8}, len::UInt, offset::Int64=Int64(-1))
     check_open(f)
     err = ccall(:jl_fs_write, Int32, (Int32, Ptr{UInt8}, Csize_t, Int64),
                 f.handle, buf, len, offset)
@@ -113,15 +113,7 @@ function write(f::File, buf::Ptr{UInt8}, len::Integer, offset::Integer=-1)
     return len
 end
 
-write(f::File, c::UInt8) = write(f, UInt8[c])
-
-function write{T}(f::File, a::Array{T})
-    if isbits(T)
-        write(f, pointer(a), sizeof(a))
-    else
-        invoke(write, Tuple{IO, Array}, f, a)
-    end
-end
+write(f::File, c::UInt8) = write(f, Ref{UInt8}(c))
 
 function truncate(f::File, n::Integer)
     check_open(f)
diff --git a/base/grisu.jl b/base/grisu.jl
index ffc984a206b9c..055baa5983217 100644
--- a/base/grisu.jl
+++ b/base/grisu.jl
@@ -79,10 +79,10 @@ function _show(io::IO, x::AbstractFloat, mode, n::Int, typed, nanstr, infstr)
     exp_form = exp_form || (pt >= len && abs(mod(x + 0.05, 10^(pt - len)) - 0.05) > 0.05) # see issue #6608
     if exp_form # .00001 to 100000.
         # => #.#######e###
-        write(io, pdigits, 1)
+        unsafe_write(io, pdigits, 1)
         write(io, '.')
         if len > 1
-            write(io, pdigits+1, len-1)
+            unsafe_write(io, pdigits+1, len-1)
         else
             write(io, '0')
         end
@@ -97,19 +97,19 @@ function _show(io::IO, x::AbstractFloat, mode, n::Int, typed, nanstr, infstr)
             write(io, '0')
             pt += 1
         end
-        write(io, pdigits, len)
+        unsafe_write(io, pdigits, len)
     elseif pt >= len
         # => ########00.0
-        write(io, pdigits, len)
+        unsafe_write(io, pdigits, len)
         while pt > len
             write(io, '0')
             len += 1
         end
         write(io, ".0")
     else # => ####.####
-        write(io, pdigits, pt)
+        unsafe_write(io, pdigits, pt)
         write(io, '.')
-        write(io, pdigits+pt, len-pt)
+        unsafe_write(io, pdigits+pt, len-pt)
     end
     typed && isa(x,Float32) && write(io, "f0")
     typed && isa(x,Float16) && write(io, ")")
@@ -144,7 +144,7 @@ function _print_shortest(io::IO, x::AbstractFloat, dot::Bool, mode, n::Int)
     k = -9<=e<=9 ? 1 : 2
     if -pt > k+1 || e+dot > k+1
         # => ########e###
-        write(io, pdigits+0, len)
+        unsafe_write(io, pdigits+0, len)
         write(io, 'e')
         write(io, dec(e))
         return
@@ -155,10 +155,10 @@ function _print_shortest(io::IO, x::AbstractFloat, dot::Bool, mode, n::Int)
             write(io, '0')
             pt += 1
         end
-        write(io, pdigits+0, len)
+        unsafe_write(io, pdigits+0, len)
     elseif e >= dot
         # => ########000.
-        write(io, pdigits+0, len)
+        unsafe_write(io, pdigits+0, len)
         while e > 0
             write(io, '0')
             e -= 1
@@ -167,9 +167,9 @@ function _print_shortest(io::IO, x::AbstractFloat, dot::Bool, mode, n::Int)
             write(io, '.')
         end
     else # => ####.####
-        write(io, pdigits+0, pt)
+        unsafe_write(io, pdigits+0, pt)
         write(io, '.')
-        write(io, pdigits+pt, len-pt)
+        unsafe_write(io, pdigits+pt, len-pt)
     end
     nothing
 end
diff --git a/base/io.jl b/base/io.jl
index 53123506d0d2b..4d95906c95422 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: http://julialang.org/license
 
-# Generic IO stubs
+# Generic IO stubs -- all subtypes should implement these (if meaningful)
 
 lock(::IO) = nothing
 unlock(::IO) = nothing
@@ -23,20 +23,24 @@ function iswritable end
 function copy end
 function eof end
 
-# all subtypes should implement this
 read(s::IO, ::Type{UInt8}) = error(typeof(s)," does not support byte I/O")
 write(s::IO, x::UInt8) = error(typeof(s)," does not support byte I/O")
 
+function unsafe_write(s::IO, p::Ptr{UInt8}, n::UInt)
+    local written::Int = 0
+    for i=1:n
+        written += write(s, unsafe_load(p, i))
+    end
+    return written
+end
+
 # Generic wrappers around other IO objects
 abstract AbstractPipe <: IO
 function pipe_reader end
 function pipe_writer end
 
 write(io::AbstractPipe, byte::UInt8) = write(pipe_writer(io), byte)
-write(io::AbstractPipe, bytes::Vector{UInt8}) = write(pipe_writer(io), bytes)
-write{T<:AbstractPipe}(io::T, args...) = write(pipe_writer(io), args...)
-write{S<:AbstractPipe}(io::S, a::Array) = write(pipe_writer(io), a)
-buffer_or_write(io::AbstractPipe, p::Ptr, n::Integer) = buffer_or_write(pipe_writer(io), p, n)
+unsafe_write(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_write(pipe_writer(io), p, nb)
 buffer_writes(io::AbstractPipe, args...) = buffer_writes(pipe_writer(io), args...)
 flush(io::AbstractPipe) = flush(pipe_writer(io))
 
@@ -100,34 +104,18 @@ function write(io::IO, xs...)
     for x in xs
         written += write(io, x)
     end
-    written
+    return written
 end
 
-if ENDIAN_BOM == 0x01020304
-    function write(s::IO, x::Union{Int8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128})
-        sz = sizeof(x)
-        local written::Int = 0
-        for n = sz:-1:1
-            written += write(s, (x>>>((n-1)<<3))%UInt8)
-        end
-        return written
-    end
-else
-    function write(s::IO, x::Union{Int8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128})
-        sz = sizeof(x)
-        local written::Int = 0
-        for n = 1:sz
-            written += write(s, (x>>>((n-1)<<3))%UInt8)
-        end
-        return written
-    end
+unsafe_write{T}(s::IO, p::Ref{T}, n::Integer) = unsafe_write(s, unsafe_convert(Ref{T}, p)::Ptr, n)
+unsafe_write(s::IO, p::Ptr, n::Integer) = unsafe_write(s, convert(Ptr{UInt8}, p), convert(UInt, n))
+write(s::IO, x::Ref) = unsafe_write(s, x, sizeof(eltype(x)))
+
+function write(s::IO, x::Union{Int8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128,Float16,Float32,Float64})
+    return write(s, Ref(x))
 end
 
 write(s::IO, x::Bool)    = write(s, UInt8(x))
-write(s::IO, x::Float16) = write(s, reinterpret(Int16,x))
-write(s::IO, x::Float32) = write(s, reinterpret(Int32,x))
-write(s::IO, x::Float64) = write(s, reinterpret(Int64,x))
-
 write(to::IO, p::Ptr) = write(to, convert(UInt, p))
 
 function write(s::IO, a::AbstractArray)
@@ -138,6 +126,19 @@ function write(s::IO, a::AbstractArray)
     return nb
 end
 
+function write{T}(s::IO, a::Array{T})
+    if isbits(T)
+        return unsafe_write(s, pointer(a), sizeof(a))
+    end
+
+    nb = 0
+    for x in a
+        nb += write(s, x)
+    end
+    return nb
+end
+
+
 function write(s::IO, ch::Char)
     c = reinterpret(UInt32, ch)
     if c < 0x80
@@ -159,17 +160,9 @@ function write(s::IO, ch::Char)
     end
 end
 
-function write(s::IO, p::Ptr, n::Integer)
-    local written::Int = 0
-    for i=1:n
-        written += write(s, unsafe_load(p, i))
-    end
-    return written
-end
-
 function write(io::IO, s::Symbol)
     pname = unsafe_convert(Ptr{UInt8}, s)
-    return write(io, pname, Int(ccall(:strlen, Csize_t, (Cstring,), pname)))
+    return unsafe_write(io, pname, Int(ccall(:strlen, Csize_t, (Cstring,), pname)))
 end
 
 function write(to::IO, from::IO)
@@ -237,7 +230,7 @@ function read(s::IO, ::Type{Char})
     end
     c += ch
     c -= Base.utf8_offset[trailing+1]
-    Char(c)
+    return Char(c)
 end
 
 function readuntil(s::IO, delim::Char)
@@ -254,7 +247,7 @@ function readuntil(s::IO, delim::Char)
             break
         end
     end
-    takebuf_string(out)
+    return takebuf_string(out)
 end
 
 function readuntil{T}(s::IO, delim::T)
@@ -266,7 +259,7 @@ function readuntil{T}(s::IO, delim::T)
             break
         end
     end
-    out
+    return out
 end
 
 # based on code by Glen Hertz
@@ -365,3 +358,28 @@ next(itr::EachLine, nada) = (readline(itr.stream), nothing)
 eltype(::Type{EachLine}) = ByteString
 
 readlines(s=STDIN) = collect(eachline(s))
+
+# IOStream Marking
+# Note that these functions expect that io.mark exists for
+# the concrete IO type. This may not be true for IO types
+# not in base.
+
+function mark(io::IO)
+    io.mark = position(io)
+end
+
+function unmark(io::IO)
+    !ismarked(io) && return false
+    io.mark = -1
+    return true
+end
+
+function reset{T<:IO}(io::T)
+    ismarked(io) || throw(ArgumentError("$(T) not marked"))
+    m = io.mark
+    seek(io, m)
+    io.mark = -1 # must be after seek, or seek may fail
+    return m
+end
+
+ismarked(io::IO) = io.mark >= 0
diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index f49eb9291bd17..cd1d330734814 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -171,7 +171,8 @@ function compact(io::AbstractIOBuffer)
     return io
 end
 
-@inline function ensureroom(io::AbstractIOBuffer, nshort::Int)
+@inline ensureroom(io::AbstractIOBuffer, nshort::Int) = ensureroom(io, UInt(nshort))
+@inline function ensureroom(io::AbstractIOBuffer, nshort::UInt)
     io.writable || throw(ArgumentError("ensureroom failed, IOBuffer is not writeable"))
     if !io.seekable
         nshort >= 0 || throw(ArgumentError("ensureroom failed, requested number of bytes must be ≥ 0, got $nshort"))
@@ -236,7 +237,7 @@ function takebuf_array(io::AbstractIOBuffer)
         io.ptr = 1
         io.size = 0
     end
-    data
+    return data
 end
 function takebuf_array(io::IOBuffer)
     ismarked(io) && unmark(io)
@@ -258,7 +259,7 @@ function takebuf_array(io::IOBuffer)
         io.ptr = 1
         io.size = 0
     end
-    data
+    return data
 end
 function takebuf_string(io::AbstractIOBuffer)
     b = takebuf_array(io)
@@ -272,21 +273,21 @@ function write(to::AbstractIOBuffer, from::AbstractIOBuffer)
     end
     written::Int = write_sub(to, from.data, from.ptr, nb_available(from))
     from.ptr += written
-    written
+    return written
 end
 
-write(to::AbstractIOBuffer, p::Ptr, nb::Integer) = write(to, p, Int(nb))
-function write(to::AbstractIOBuffer, p::Ptr, nb::Int)
+function unsafe_write(to::AbstractIOBuffer, p::Ptr{UInt8}, nb::UInt)
     ensureroom(to, nb)
     ptr = (to.append ? to.size+1 : to.ptr)
     written = min(nb, length(to.data) - ptr + 1)
-    p_u8 = convert(Ptr{UInt8}, p)
     for i = 0:written - 1
-        @inbounds to.data[ptr + i] = unsafe_load(p_u8 + i)
+        @inbounds to.data[ptr + i] = unsafe_load(p + i)
     end
     to.size = max(to.size, ptr - 1 + written)
-    if !to.append to.ptr += written end
-    written
+    if !to.append
+        to.ptr += written
+    end
+    return written
 end
 
 function write_sub{T}(to::AbstractIOBuffer, a::AbstractArray{T}, offs, nel)
@@ -295,17 +296,11 @@ function write_sub{T}(to::AbstractIOBuffer, a::AbstractArray{T}, offs, nel)
     end
     local written::Int
     if isbits(T) && isa(a,Array)
-        nb = nel * sizeof(T)
-        ensureroom(to, Int(nb))
-        ptr = (to.append ? to.size+1 : to.ptr)
-        written = min(nb, length(to.data) - ptr + 1)
-        unsafe_copy!(pointer(to.data, ptr),
-                     convert(Ptr{UInt8}, pointer(a, offs)), written)
-        to.size = max(to.size, ptr - 1 + written)
-        if !to.append to.ptr += written end
+        nb = UInt(nel * sizeof(T))
+        written = unsafe_write(to, pointer(a, offs), nb)
     else
         written = 0
-        ensureroom(to, sizeof(a))
+        ensureroom(to, UInt(sizeof(a)))
         for i = offs:offs+nel-1
             written += write(to, a[i])
         end
@@ -313,10 +308,8 @@ function write_sub{T}(to::AbstractIOBuffer, a::AbstractArray{T}, offs, nel)
     return written
 end
 
-write(to::AbstractIOBuffer, a::Array) = write_sub(to, a, 1, length(a))
-
 @inline function write(to::AbstractIOBuffer, a::UInt8)
-    ensureroom(to, 1)
+    ensureroom(to, UInt(1))
     ptr = (to.append ? to.size+1 : to.ptr)
     if ptr > to.maxsize
         return 0
@@ -324,7 +317,9 @@ write(to::AbstractIOBuffer, a::Array) = write_sub(to, a, 1, length(a))
         to.data[ptr] = a
     end
     to.size = max(to.size, ptr)
-    if !to.append to.ptr += 1 end
+    if !to.append
+        to.ptr += 1
+    end
     return sizeof(UInt8)
 end
 
@@ -381,30 +376,3 @@ function readuntil(io::AbstractIOBuffer, delim::UInt8)
     end
     A
 end
-
-# IOStream Marking
-# Note that these functions expect that io.mark exists for
-# the concrete IO type.  This may not be true for IO types
-# not in base.
-
-# Note 2: these functions truly belong in io.jl, but serious massive performance issues with type-inference if they aren't available earlier
-
-function mark(io::IO)
-    io.mark = position(io)
-end
-
-function unmark(io::IO)
-    !ismarked(io) && return false
-    io.mark = -1
-    return true
-end
-
-function reset{T<:IO}(io::T)
-    ismarked(io) || throw(ArgumentError("$(T) not marked"))
-    m = io.mark
-    seek(io, m)
-    io.mark = -1 # must be after seek, or seek may fail
-    return m
-end
-
-ismarked(io::IO) = io.mark >= 0
diff --git a/base/iostream.jl b/base/iostream.jl
index 0168db9fc5d01..411ae8cd60424 100644
--- a/base/iostream.jl
+++ b/base/iostream.jl
@@ -121,19 +121,7 @@ end
 
 write(s::IOStream, b::UInt8) = Int(ccall(:ios_putc, Cint, (Cint, Ptr{Void}), b, s.ios))
 
-function write{T}(s::IOStream, a::Array{T})
-    if isbits(T)
-        if !iswritable(s)
-            throw(ArgumentError("write failed, IOStream is not writeable"))
-        end
-        Int(ccall(:ios_write, Csize_t, (Ptr{Void}, Ptr{Void}, Csize_t),
-                  s.ios, a, length(a)*sizeof(T)))
-    else
-        invoke(write, Tuple{IO, Array}, s, a)
-    end
-end
-
-function write(s::IOStream, p::Ptr, nb::Integer)
+function unsafe_write(s::IOStream, p::Ptr{UInt8}, nb::UInt)
     if !iswritable(s)
         throw(ArgumentError("write failed, IOStream is not writeable"))
     end
@@ -146,10 +134,10 @@ function write{T,N,A<:Array}(s::IOStream, a::SubArray{T,N,A})
     end
     colsz = size(a,1)*sizeof(T)
     if N<=1
-        return write(s, pointer(a, 1), colsz)
+        return unsafe_write(s, pointer(a, 1), colsz)
     else
         for idxs in CartesianRange((1, size(a)[2:end]...))
-            write(s, pointer(a, idxs.I), colsz)
+            unsafe_write(s, pointer(a, idxs.I), colsz)
         end
         return colsz*trailingsize(a,2)
     end
diff --git a/base/printf.jl b/base/printf.jl
index 8e2f677b6cfc8..08f61adead2f0 100644
--- a/base/printf.jl
+++ b/base/printf.jl
@@ -191,11 +191,11 @@ function print_fixed(out, precision, pt, ndigits, trailingzeros=true)
             write(out, '0')
             pt += 1
         end
-        write(out, pdigits, ndigits)
+        unsafe_write(out, pdigits, ndigits)
         precision -= ndigits
     elseif ndigits <= pt
         # dddd000.000000
-        write(out, pdigits, ndigits)
+        unsafe_write(out, pdigits, ndigits)
         while ndigits < pt
             write(out, '0')
             ndigits += 1
@@ -206,9 +206,9 @@ function print_fixed(out, precision, pt, ndigits, trailingzeros=true)
     else # 0 < pt < ndigits
         # dd.dd0000
         ndigits -= pt
-        write(out, pdigits, pt)
+        unsafe_write(out, pdigits, pt)
         write(out, '.')
-        write(out, pdigits+pt, ndigits)
+        unsafe_write(out, pdigits+pt, ndigits)
         precision -= ndigits
     end
     if trailingzeros
@@ -314,7 +314,7 @@ function gen_d(flags::ASCIIString, width::Int, precision::Int, c::Char)
         push!(blk.args, pad(width-1, zeros, '0'))
     end
     # print integer
-    push!(blk.args, :(write(out, pointer(DIGITS), pt)))
+    push!(blk.args, :(unsafe_write(out, pointer(DIGITS), pt)))
     # print padding
     if padding !== nothing && '-' in flags
         push!(blk.args, pad(width-precision, padding, ' '))
@@ -373,7 +373,7 @@ function gen_f(flags::ASCIIString, width::Int, precision::Int, c::Char)
     if precision > 0
         push!(blk.args, :(print_fixed(out,$precision,pt,len)))
     else
-        push!(blk.args, :(write(out, pointer(DIGITS), len)))
+        push!(blk.args, :(unsafe_write(out, pointer(DIGITS), len)))
         push!(blk.args, :(while pt >= (len+=1) write(out,'0') end))
         '#' in flags && push!(blk.args, :(write(out, '.')))
     end
@@ -473,12 +473,12 @@ function gen_e(flags::ASCIIString, width::Int, precision::Int, c::Char, inside_g
                               end;
                               if endidx > 1
                                   write(out, '.')
-                                  write(out, pointer(DIGITS)+1, endidx-1)
+                                  unsafe_write(out, pointer(DIGITS)+1, endidx-1)
                               end
                               ))
         else
             push!(blk.args, :(write(out, '.')))
-            push!(blk.args, :(write(out, pointer(DIGITS)+1, $(ndigits-1))))
+            push!(blk.args, :(unsafe_write(out, pointer(DIGITS)+1, $(ndigits-1))))
             if ndigits < precision+1
                 n = precision+1-ndigits
                 push!(blk.args, pad(n, n, '0'))
@@ -577,7 +577,7 @@ function gen_a(flags::ASCIIString, width::Int, precision::Int, c::Char)
     push!(blk.args, :(write(out, DIGITS[1])))
     if precision > 0
         push!(blk.args, :(write(out, '.')))
-        push!(blk.args, :(write(out, pointer(DIGITS)+1, $(ndigits-1))))
+        push!(blk.args, :(unsafe_write(out, pointer(DIGITS)+1, $(ndigits-1))))
         if ndigits < precision+1
             n = precision+1-ndigits
             push!(blk.args, pad(n, n, '0'))
@@ -590,7 +590,7 @@ function gen_a(flags::ASCIIString, width::Int, precision::Int, c::Char)
         else
             push!(vpblk.args, :(write(out, '.')))
         end
-        push!(vpblk.args, :(write(out, pointer(DIGITS)+1, len-1)))
+        push!(vpblk.args, :(unsafe_write(out, pointer(DIGITS)+1, len-1)))
         push!(blk.args, ifvpblk)
     end
     for ch in expmark
@@ -1103,7 +1103,7 @@ function bigfloat_printf(out, d, flags::ASCIIString, width::Int, precision::Int,
     bufsiz = length(DIGITS) - 1
     lng = ccall((:mpfr_snprintf,:libmpfr), Int32, (Ptr{UInt8}, Culong, Ptr{UInt8}, Ptr{BigFloat}...), DIGITS, bufsiz, printf_fmt, &d)
     lng > 0 || error("invalid printf formatting for BigFloat")
-    write(out, pointer(DIGITS), min(lng,bufsiz))
+    unsafe_write(out, pointer(DIGITS), min(lng,bufsiz))
     return (false, ())
 end
 
diff --git a/base/serialize.jl b/base/serialize.jl
index b2459c0bcc9ae..f608b724b80ad 100644
--- a/base/serialize.jl
+++ b/base/serialize.jl
@@ -143,7 +143,7 @@ function serialize(s::SerializationState, x::Symbol)
         writetag(s.io, LONGSYMBOL_TAG)
         write(s.io, Int32(ln))
     end
-    write(s.io, pname, ln)
+    unsafe_write(s.io, pname, ln)
 end
 
 function serialize_array_data(s::IO, a)
diff --git a/base/stream.jl b/base/stream.jl
index d49da0e39c936..ce8838bb758c6 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -960,8 +960,8 @@ function readuntil(this::LibuvStream, c::UInt8)
     readuntil(buf, c)
 end
 
-uv_write(s::LibuvStream, p::Vector{UInt8}) = uv_write(s, pointer(p), UInt(length(p)))
-function uv_write(s::LibuvStream, p::Ptr, n::UInt)
+uv_write(s::LibuvStream, p::Vector{UInt8}) = uv_write(s, pointer(p), UInt(sizeof(p)))
+function uv_write(s::LibuvStream, p::Ptr{UInt8}, n::UInt)
     check_open(s)
     uvw = Libc.malloc(_sizeof_uv_write)
     uv_req_set_data(uvw,C_NULL)
@@ -984,7 +984,7 @@ end
 # - smaller writes are buffered, final uv write on flush or when buffer full
 # - large isbits arrays are unbuffered and written directly
 
-function buffer_or_write(s::LibuvStream, p::Ptr, n::Integer)
+function unsafe_write(s::LibuvStream, p::Ptr{UInt8}, n::UInt)
     if isnull(s.sendbuf)
         return uv_write(s, p, UInt(n))
     end
@@ -992,13 +992,13 @@ function buffer_or_write(s::LibuvStream, p::Ptr, n::Integer)
     buf = get(s.sendbuf)
     totb = nb_available(buf) + n
     if totb < buf.maxsize
-        nb = write(buf, p, n)
+        nb = unsafe_write(buf, p, n)
     else
         flush(s)
         if n > buf.maxsize
             nb = uv_write(s, p, n)
         else
-            nb = write(buf, p, n)
+            nb = unsafe_write(buf, p, n)
         end
     end
     return nb
@@ -1013,26 +1013,14 @@ function flush(s::LibuvStream)
         arr = takebuf_array(buf)        # Array of UInt8s
         uv_write(s, arr)
     end
-    s
+    return s
 end
 
 buffer_writes(s::LibuvStream, bufsize) = (s.sendbuf=PipeBuffer(bufsize); s)
 
 ## low-level calls to libuv ##
 
-write(s::LibuvStream, b::UInt8) = write(s, [b])
-write(s::LibuvStream, c::Char) = write(s, string(c))
-function write{T}(s::LibuvStream, a::Array{T})
-    if isbits(T)
-        n = UInt(length(a) * sizeof(T))
-        return buffer_or_write(s, pointer(a), n)
-    else
-        check_open(s)
-        invoke(write, Tuple{IO, typeof(a)}, s, a)
-    end
-end
-
-write(s::LibuvStream, p::Ptr, n::Integer) = buffer_or_write(s, p, n)
+write(s::LibuvStream, b::UInt8) = write(s, Ref{UInt8}(b))
 
 function uv_writecb_task(req::Ptr{Void}, status::Cint)
     d = uv_req_data(req)
@@ -1224,25 +1212,18 @@ function wait_readbyte(s::BufferStream, c::UInt8)
     end
 end
 
-wait_close(s::BufferStream) = if isopen(s) wait(s.close_c); end
+wait_close(s::BufferStream) = if isopen(s); wait(s.close_c); end
 start_reading(s::BufferStream) = nothing
 
-write(s::BufferStream, b::UInt8) = write(s, [b])
-write(s::BufferStream, c::Char) = write(s, string(c))
-
-function write{T}(s::BufferStream, a::Array{T})
-    rv=write(s.buffer, a)
-    !(s.buffer_writes) && notify(s.r_c; all=true);
-    return rv
-end
-function write(s::BufferStream, p::Ptr, nb::Integer)
-    rv=write(s.buffer, p, nb)
-    !(s.buffer_writes) && notify(s.r_c; all=true);
+write(s::BufferStream, b::UInt8) = write(s, Ref{UInt8}(b))
+function unsafe_write(s::BufferStream, p::Ptr{UInt8}, nb::UInt)
+    rv = unsafe_write(s.buffer, p, nb)
+    !(s.buffer_writes) && notify(s.r_c; all=true)
     return rv
 end
 
 function eof(s::BufferStream)
-    wait_readnb(s,1)
+    wait_readnb(s, 1)
     return !isopen(s) && nb_available(s)<=0
 end
 
diff --git a/base/sysimg.jl b/base/sysimg.jl
index dd06024bf8efa..7a8126d6c9471 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -86,10 +86,14 @@ include(UTF8String(vcat(length(Core.ARGS)>=2?Core.ARGS[2].data:"".data, "version
 include("c.jl")
 include("osutils.jl")
 
+# Core I/O
+include("io.jl")
+include("iostream.jl")
+include("iobuffer.jl")
+
 # strings & printing
 include("char.jl")
 include("ascii.jl")
-include("iobuffer.jl")
 include("string.jl")
 include("unicode.jl")
 include("parse.jl")
@@ -98,10 +102,6 @@ include("regex.jl")
 include("base64.jl")
 importall .Base64
 
-# Core I/O
-include("io.jl")
-include("iostream.jl")
-
 # system & environment
 include("libc.jl")
 using .Libc: getpid, gethostname, time

From 8b9774331bac0eed4c5b8a1203fc73223f16ffd0 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Sat, 23 Jan 2016 21:13:46 -0500
Subject: [PATCH 3/3] add unsafe_read(io, p::Ptr{UInt8}, nb::UInt) counterpart
 to unsafe_write

---
 base/exports.jl           |  2 +-
 base/filesystem.jl        | 13 ++----
 base/io.jl                | 93 +++++++++++++++++++++++----------------
 base/iobuffer.jl          | 34 +++++++-------
 base/iostream.jl          | 16 ++++---
 base/stream.jl            | 91 +++++++++++++++++++++++++-------------
 doc/stdlib/io-network.rst | 16 +++++++
 7 files changed, 164 insertions(+), 101 deletions(-)

diff --git a/base/exports.jl b/base/exports.jl
index 1191e1a0d293a..3b322a9d71914 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -1333,7 +1333,7 @@ export
     unsafe_copy!,
     unsafe_load,
     unsafe_pointer_to_objref,
-    #unsafe_read,
+    unsafe_read,
     unsafe_store!,
     unsafe_write,
 
diff --git a/base/filesystem.jl b/base/filesystem.jl
index 2f8ddf111e9f7..1818bbf78df91 100644
--- a/base/filesystem.jl
+++ b/base/filesystem.jl
@@ -144,18 +144,13 @@ function read(f::File, ::Type{UInt8})
     return ret % UInt8
 end
 
-function read!(f::File, a::Vector{UInt8}, nel=length(a))
+function unsafe_read(f::File, p::Ptr{UInt8}, nel::UInt)
     check_open(f)
-    if nel < 0 || nel > length(a)
-        throw(BoundsError())
-    end
     ret = ccall(:jl_fs_read, Int32, (Int32, Ptr{Void}, Csize_t),
-                f.handle, a, nel)
-    if ret < nel
-        throw(EOFError())
-    end
+                f.handle, p, nel)
     uv_error("read",ret)
-    return a
+    ret == nel || throw(EOFError())
+    nothing
 end
 
 nb_available(f::File) = filesize(f) - position(f)
diff --git a/base/io.jl b/base/io.jl
index 4d95906c95422..fd7db0f1daaef 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -26,14 +26,40 @@ function eof end
 read(s::IO, ::Type{UInt8}) = error(typeof(s)," does not support byte I/O")
 write(s::IO, x::UInt8) = error(typeof(s)," does not support byte I/O")
 
+"""
+    unsafe_write(io, ref, nbytes)
+
+Copy nbytes from ref (converted to a pointer) into the IO stream object.
+
+It is recommended that IO subtypes override the exact method signature below
+to provide more efficient implementations:
+`unsafe_write(s::IO, p::Ptr{UInt8}, n::UInt)`
+"""
 function unsafe_write(s::IO, p::Ptr{UInt8}, n::UInt)
     local written::Int = 0
-    for i=1:n
+    for i = 1:n
         written += write(s, unsafe_load(p, i))
     end
     return written
 end
 
+"""
+    unsafe_read(io, ref, nbytes)
+
+Copy nbytes from the IO stream object into ref (converted to a pointer).
+
+It is recommended that IO subtypes override the exact method signature below
+to provide more efficient implementations:
+`unsafe_read(s::IO, p::Ptr{UInt8}, n::UInt)`
+"""
+function unsafe_read(s::IO, p::Ptr{UInt8}, n::UInt)
+    for i = 1:n
+        unsafe_store!(p, read(s, UInt8)::UInt8, i)
+    end
+    nothing
+end
+
+
 # Generic wrappers around other IO objects
 abstract AbstractPipe <: IO
 function pipe_reader end
@@ -45,11 +71,9 @@ buffer_writes(io::AbstractPipe, args...) = buffer_writes(pipe_writer(io), args..
 flush(io::AbstractPipe) = flush(pipe_writer(io))
 
 read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io), byte)
-read!(io::AbstractPipe, bytes::Vector{UInt8}) = read!(pipe_reader(io), bytes)
-read{T<:AbstractPipe}(io::T, args...) = read(pipe_reader(io), args...)
-read!{T<:AbstractPipe}(io::T, args...) = read!(pipe_reader(io), args...)
-readuntil{T<:AbstractPipe}(io::T, args...) = readuntil(pipe_reader(io), args...)
+unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io), p, nb)
 read(io::AbstractPipe) = read(pipe_reader(io))
+readuntil{T<:AbstractPipe}(io::T, args...) = readuntil(pipe_reader(io), args...)
 readavailable(io::AbstractPipe) = readavailable(pipe_reader(io))
 
 isreadable(io::AbstractPipe) = isreadable(pipe_reader(io))
@@ -107,11 +131,11 @@ function write(io::IO, xs...)
     return written
 end
 
-unsafe_write{T}(s::IO, p::Ref{T}, n::Integer) = unsafe_write(s, unsafe_convert(Ref{T}, p)::Ptr, n)
+@noinline unsafe_write{T}(s::IO, p::Ref{T}, n::Integer) = unsafe_write(s, unsafe_convert(Ref{T}, p)::Ptr, n) # mark noinline to ensure ref is gc-rooted somewhere (by the caller)
 unsafe_write(s::IO, p::Ptr, n::Integer) = unsafe_write(s, convert(Ptr{UInt8}, p), convert(UInt, n))
-write(s::IO, x::Ref) = unsafe_write(s, x, sizeof(eltype(x)))
-
-function write(s::IO, x::Union{Int8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128,Float16,Float32,Float64})
+write{T}(s::IO, x::Ref{T}) = unsafe_write(s, x, Core.sizeof(T))
+write(s::IO, x::Int8) = write(s, reinterpret(UInt8, x))
+function write(s::IO, x::Union{Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128,Float16,Float32,Float64})
     return write(s, Ref(x))
 end
 
@@ -126,16 +150,20 @@ function write(s::IO, a::AbstractArray)
     return nb
 end
 
-function write{T}(s::IO, a::Array{T})
+@noinline function write(s::IO, a::Array{UInt8}) # mark noinline to ensure the array is gc-rooted somewhere (by the caller)
+    return unsafe_write(s, pointer(a), sizeof(a))
+end
+
+@noinline function write{T}(s::IO, a::Array{T}) # mark noinline to ensure the array is gc-rooted somewhere (by the caller)
     if isbits(T)
         return unsafe_write(s, pointer(a), sizeof(a))
+    else
+        nb = 0
+        for i in eachindex(a)
+            nb += write(s, a[i])
+        end
+        return nb
     end
-
-    nb = 0
-    for x in a
-        nb += write(s, x)
-    end
-    return nb
 end
 
 
@@ -171,23 +199,17 @@ function write(to::IO, from::IO)
     end
 end
 
+@noinline unsafe_read{T}(s::IO, p::Ref{T}, n::Integer) = unsafe_read(s, unsafe_convert(Ref{T}, p)::Ptr, n) # mark noinline to ensure ref is gc-rooted somewhere (by the caller)
+unsafe_read(s::IO, p::Ptr, n::Integer) = unsafe_read(s, convert(Ptr{UInt8}, p), convert(UInt, n))
+read{T}(s::IO, x::Ref{T}) = (unsafe_read(s, x, Core.sizeof(T)); x)
 
-read(s::IO, ::Type{Int8}) = reinterpret(Int8, read(s,UInt8))
-
-function read{T <: Union{Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128}}(s::IO, ::Type{T})
-    x = zero(T)
-    for n = 1:sizeof(x)
-        x |= (convert(T,read(s,UInt8))<<((n-1)<<3))
-    end
-    return x
+read(s::IO, ::Type{Int8}) = reinterpret(Int8, read(s, UInt8))
+function read(s::IO, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64},Type{Int128},Type{UInt128},Type{Float16},Type{Float32},Type{Float64}})
+    return read(s, Ref{T}(0))[]::T
 end
 
 read(s::IO, ::Type{Bool})    = (read(s,UInt8)!=0)
-read(s::IO, ::Type{Float16}) = box(Float16,unbox(Int16,read(s,Int16)))
-read(s::IO, ::Type{Float32}) = box(Float32,unbox(Int32,read(s,Int32)))
-read(s::IO, ::Type{Float64}) = box(Float64,unbox(Int64,read(s,Int64)))
-
-read{T}(s::IO, ::Type{Ptr{T}}) = convert(Ptr{T}, read(s,UInt))
+read{T}(s::IO, ::Type{Ptr{T}}) = convert(Ptr{T}, read(s, UInt))
 
 read{T}(s::IO, t::Type{T}, d1::Int, dims::Int...) = read(s, t, tuple(d1,dims...))
 read{T}(s::IO, t::Type{T}, d1::Integer, dims::Integer...) =
@@ -195,17 +217,14 @@ read{T}(s::IO, t::Type{T}, d1::Integer, dims::Integer...) =
 
 read{T}(s::IO, ::Type{T}, dims::Dims) = read!(s, Array(T, dims))
 
-function read!(s::IO, a::Vector{UInt8})
-    for i in 1:length(a)
-        a[i] = read(s, UInt8)
-    end
+@noinline function read!(s::IO, a::Array{UInt8}) # mark noinline to ensure the array is gc-rooted somewhere (by the caller)
+    unsafe_read(s, pointer(a), sizeof(a))
     return a
 end
 
-function read!{T}(s::IO, a::Array{T})
+@noinline function read!{T}(s::IO, a::Array{T}) # mark noinline to ensure the array is gc-rooted somewhere (by the caller)
     if isbits(T)
-        nb::Int = length(a) * sizeof(T)
-        read!(s, reinterpret(UInt8, a, (nb,)))
+        unsafe_read(s, pointer(a), sizeof(a))
     else
         for i in eachindex(a)
             a[i] = read(s, T)
@@ -324,7 +343,7 @@ function read(s::IO, nb=typemax(Int))
     # instead of taking of risk of over-allocating
     b = Array(UInt8, nb == typemax(Int) ? 1024 : nb)
     nr = readbytes!(s, b, nb)
-    resize!(b, nr)
+    return resize!(b, nr)
 end
 
 function readstring(s::IO)
diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index cd1d330734814..9395e0bd0ff83 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -39,7 +39,7 @@ function copy(b::AbstractIOBuffer)
                     b.readable, b.writable, b.seekable, b.append, b.maxsize)
     ret.size = b.size
     ret.ptr  = b.ptr
-    ret
+    return ret
 end
 
 show(io::IO, b::AbstractIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ",
@@ -52,8 +52,17 @@ show(io::IO, b::AbstractIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ",
                                       "ptr=",      b.ptr, ", ",
                                       "mark=",     b.mark, ")")
 
-read!(from::AbstractIOBuffer, a::Vector{UInt8}) = read_sub(from, a, 1, length(a))
-read!(from::AbstractIOBuffer, a::Array) = read_sub(from, a, 1, length(a))
+function unsafe_read(from::AbstractIOBuffer, p::Ptr{UInt8}, nb::UInt)
+    from.readable || throw(ArgumentError("read failed, IOBuffer is not readable"))
+    avail = nb_available(from)
+    adv = min(avail, nb)
+    unsafe_copy!(p, pointer(from.data, from.ptr), adv)
+    from.ptr += adv
+    if nb > avail
+        throw(EOFError())
+    end
+    nothing
+end
 
 function read_sub{T}(from::AbstractIOBuffer, a::AbstractArray{T}, offs, nel)
     from.readable || throw(ArgumentError("read failed, IOBuffer is not readable"))
@@ -61,18 +70,8 @@ function read_sub{T}(from::AbstractIOBuffer, a::AbstractArray{T}, offs, nel)
         throw(BoundsError())
     end
     if isbits(T) && isa(a,Array)
-        nb = nel * sizeof(T)
-        avail = nb_available(from)
-        adv = min(avail, nb)
-        copy!(pointer_to_array(convert(Ptr{UInt8},pointer(a)), sizeof(a)), # reinterpret(UInt8,a) but without setting the shared data property on a
-              1 + (1 - offs) * sizeof(T),
-              from.data,
-              from.ptr,
-              adv)
-        from.ptr += adv
-        if nb > avail
-            throw(EOFError())
-        end
+        nb = UInt(nel * sizeof(T))
+        unsafe_read(from, pointer(a, offs), nb)
     else
         for i = offs:offs+nel-1
             a[i] = read(to, T)
@@ -82,9 +81,9 @@ function read_sub{T}(from::AbstractIOBuffer, a::AbstractArray{T}, offs, nel)
 end
 
 @inline function read(from::AbstractIOBuffer, ::Type{UInt8})
+    from.readable || throw(ArgumentError("read failed, IOBuffer is not readable"))
     ptr = from.ptr
     size = from.size
-    from.readable || throw(ArgumentError("read failed, IOBuffer is not readable"))
     if ptr > size
         throw(EOFError())
     end
@@ -323,7 +322,8 @@ end
     return sizeof(UInt8)
 end
 
-function readbytes!(io::AbstractIOBuffer, b::Array{UInt8}, nb=length(b))
+readbytes!(io::AbstractIOBuffer, b::Array{UInt8}, nb=length(b)) = readbytes!(io, b, Int(nb))
+function readbytes!(io::AbstractIOBuffer, b::Array{UInt8}, nb::Int)
     nr = min(nb, nb_available(io))
     if length(b) < nr
         resize!(b, nr)
diff --git a/base/iostream.jl b/base/iostream.jl
index 411ae8cd60424..31cf086933789 100644
--- a/base/iostream.jl
+++ b/base/iostream.jl
@@ -125,7 +125,7 @@ function unsafe_write(s::IOStream, p::Ptr{UInt8}, nb::UInt)
     if !iswritable(s)
         throw(ArgumentError("write failed, IOStream is not writeable"))
     end
-    Int(ccall(:ios_write, Csize_t, (Ptr{Void}, Ptr{Void}, Csize_t), s.ios, p, nb))
+    return Int(ccall(:ios_write, Csize_t, (Ptr{Void}, Ptr{Void}, Csize_t), s.ios, p, nb))
 end
 
 function write{T,N,A<:Array}(s::IOStream, a::SubArray{T,N,A})
@@ -153,19 +153,21 @@ function read(s::IOStream, ::Type{UInt8})
     if b == -1
         throw(EOFError())
     end
-    b % UInt8
+    return b % UInt8
 end
 
-function read{T<:Union{UInt16, Int16, UInt32, Int32, UInt64, Int64}}(s::IOStream, ::Type{T})
-    ccall(:jl_ios_get_nbyte_int, UInt64, (Ptr{Void}, Csize_t), s.ios, sizeof(T)) % T
+if ENDIAN_BOM == 0x04030201
+function read(s::IOStream, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64}})
+    return ccall(:jl_ios_get_nbyte_int, UInt64, (Ptr{Void}, Csize_t), s.ios, sizeof(T)) % T
+end
 end
 
-function read!(s::IOStream, a::Vector{UInt8})
+function unsafe_read(s::IOStream, p::Ptr{UInt8}, nb::UInt)
     if ccall(:ios_readall, Csize_t,
-             (Ptr{Void}, Ptr{Void}, Csize_t), s.ios, a, sizeof(a)) < sizeof(a)
+             (Ptr{Void}, Ptr{Void}, Csize_t), s, p, nb) != nb
         throw(EOFError())
     end
-    a
+    nothing
 end
 
 ## text I/O ##
diff --git a/base/stream.jl b/base/stream.jl
index ce8838bb758c6..236dbf3c10f0d 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -664,7 +664,7 @@ type Timer
         ccall(:uv_timer_start, Cint, (Ptr{Void},Ptr{Void},UInt64,UInt64),
               this.handle, uv_jl_timercb::Ptr{Void},
               UInt64(round(timeout*1000))+1, UInt64(round(repeat*1000)))
-        this
+        return this
     end
 end
 
@@ -678,6 +678,7 @@ function close(t::Timer)
         ccall(:uv_timer_stop, Cint, (Ptr{Void},), t.handle)
         ccall(:jl_close_uv, Void, (Ptr{Void},), t.handle)
     end
+    nothing
 end
 
 function _uv_hook_close(t::Timer)
@@ -723,7 +724,7 @@ function Timer(cb::Function, timeout::Real, repeat::Real=0.0)
     # we re-enter the event loop. this avoids a race condition. see issue #12719
     enq_work(current_task())
     yieldto(waiter)
-    t
+    return t
 end
 
 ## event loop ##
@@ -736,9 +737,9 @@ end
 function process_events(block::Bool)
     loop = eventloop()
     if block
-        ccall(:jl_run_once,Int32,(Ptr{Void},),loop)
+        return ccall(:jl_run_once,Int32,(Ptr{Void},),loop)
     else
-        ccall(:jl_process_events,Int32,(Ptr{Void},),loop)
+        return ccall(:jl_process_events,Int32,(Ptr{Void},),loop)
     end
 end
 
@@ -758,7 +759,7 @@ function init_pipe!(pipe::LibuvPipe;
         (Ptr{Void}, Int32, Int32, Int32),
         pipe.handle, writable, readable, julia_only))
     pipe.status = StatusInit
-    pipe
+    return pipe
 end
 
 function malloc_julia_pipe!(x::LibuvPipe)
@@ -766,11 +767,13 @@ function malloc_julia_pipe!(x::LibuvPipe)
     x.handle = Libc.malloc(_sizeof_uv_named_pipe)
     associate_julia_struct(x.handle, x)
     finalizer(x, uvfinalize)
+    nothing
 end
 
 function _link_pipe(read_end::Ptr{Void}, write_end::Ptr{Void})
     uv_error("pipe_link",
         ccall(:uv_pipe_link, Int32, (Ptr{Void}, Ptr{Void}), read_end, write_end))
+    nothing
 end
 
 function link_pipe(read_end::Ptr{Void}, readable_julia_only::Bool,
@@ -783,6 +786,7 @@ function link_pipe(read_end::Ptr{Void}, readable_julia_only::Bool,
     uv_error("init_pipe(write)",
         ccall(:jl_init_pipe, Cint, (Ptr{Void},Int32,Int32,Int32), write_end, 1, 0, writable_julia_only))
     _link_pipe(read_end, write_end)
+    nothing
 end
 
 function link_pipe(read_end::Ptr{Void}, readable_julia_only::Bool,
@@ -792,6 +796,7 @@ function link_pipe(read_end::Ptr{Void}, readable_julia_only::Bool,
     uv_error("init_pipe(write)",
         ccall(:jl_init_pipe, Cint, (Ptr{Void},Int32,Int32,Int32), write_end, 1, 0, writable_julia_only))
     _link_pipe(read_end,write_end)
+    nothing
 end
 
 function link_pipe(read_end::PipeEndpoint, readable_julia_only::Bool,
@@ -805,6 +810,7 @@ function link_pipe(read_end::PipeEndpoint, readable_julia_only::Bool,
         ccall(:jl_init_pipe, Cint, (Ptr{Void},Int32,Int32,Int32), write_end, 1, 0, writable_julia_only))
     _link_pipe(read_end.handle, write_end)
     read_end.status = StatusOpen
+    nothing
 end
 
 function link_pipe(read_end::Ptr{Void}, readable_julia_only::Bool,
@@ -818,6 +824,7 @@ function link_pipe(read_end::Ptr{Void}, readable_julia_only::Bool,
         readable = false, writable = true, julia_only = writable_julia_only)
     _link_pipe(read_end, write_end.handle)
     write_end.status = StatusOpen
+    nothing
 end
 
 function link_pipe(read_end::PipeEndpoint, readable_julia_only::Bool,
@@ -845,7 +852,7 @@ function close_pipe_sync(p::PipeEndpoint)
 end
 
 function close_pipe_sync(handle::Ptr{Void})
-    ccall(:uv_pipe_close_sync, Void, (Ptr{Void},), handle)
+    return ccall(:uv_pipe_close_sync, Void, (Ptr{Void},), handle)
 end
 
 ## Functions for any LibuvStream ##
@@ -858,11 +865,11 @@ function start_reading(stream::LibuvStream)
         ret = ccall(:uv_read_start, Cint, (Ptr{Void}, Ptr{Void}, Ptr{Void}),
                     stream, uv_jl_alloc_buf::Ptr{Void}, uv_jl_readcb::Ptr{Void})
         stream.status = StatusActive
-        ret
+        return ret
     elseif stream.status == StatusActive
-        Int32(0)
+        return Int32(0)
     else
-        Int32(-1)
+        return Int32(-1)
     end
 end
 
@@ -886,21 +893,45 @@ function stop_reading(stream::LibuvStream)
     if stream.status == StatusActive
         ret = ccall(:uv_read_stop, Cint, (Ptr{Void},), stream)
         stream.status = StatusOpen
-        ret
+        return ret
     elseif stream.status == StatusOpen
-        Int32(0)
+        return Int32(0)
     else
-        Int32(-1)
+        return Int32(-1)
     end
 end
 
-function read!(s::LibuvStream, b::Vector{UInt8})
-    nb = length(b)
-    r = readbytes!(s, b, nb)
-    if r < nb
-        throw(EOFError())
+readbytes!(s::LibuvStream, a::Vector{UInt8}, nb = length(a)) = readbytes!(s, a, Int(nb))
+function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int)
+    sbuf = s.buffer
+    @assert sbuf.seekable == false
+    @assert sbuf.maxsize >= nb
+
+    if nb_available(sbuf) >= nb
+        return readbytes!(sbuf, a, nb)
+    end
+
+    if nb <= SZ_UNBUFFERED_IO # Under this limit we are OK with copying the array from the stream's buffer
+        wait_readnb(s, nb)
+        return readbytes!(sbuf, a, nb)
+    else
+        try
+            stop_reading(s) # Just playing it safe, since we are going to switch buffers.
+            newbuf = PipeBuffer(a, #=maxsize=# nb)
+            newbuf.size = 0 # reset the write pointer to the beginning
+            s.buffer = newbuf
+            write(newbuf, sbuf)
+            wait_readnb(s, Int(nb))
+            compact(newbuf)
+            return nb_available(newbuf)
+        finally
+            s.buffer = sbuf
+            if !isempty(s.readnotify.waitq)
+                start_reading(s) # resume reading iff there are currently other read clients of the stream
+            end
+        end
     end
-    return b
+    @assert false # unreachable
 end
 
 function read(stream::LibuvStream)
@@ -908,27 +939,27 @@ function read(stream::LibuvStream)
     return takebuf_array(stream.buffer)
 end
 
-function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb = length(a))
+function unsafe_read(s::LibuvStream, p::Ptr{UInt8}, nb::UInt)
     sbuf = s.buffer
     @assert sbuf.seekable == false
     @assert sbuf.maxsize >= nb
 
     if nb_available(sbuf) >= nb
-        return readbytes!(sbuf, a, nb)
+        return unsafe_read(sbuf, p, nb)
     end
 
     if nb <= SZ_UNBUFFERED_IO # Under this limit we are OK with copying the array from the stream's buffer
-        wait_readnb(s, nb)
-        r = readbytes!(sbuf, a, nb)
+        wait_readnb(s, Int(nb))
+        unsafe_read(sbuf, p, nb)
     else
         try
             stop_reading(s) # Just playing it safe, since we are going to switch buffers.
-            newbuf = PipeBuffer(a, #=maxsize=# nb)
+            newbuf = PipeBuffer(pointer_to_array(p, nb), #=maxsize=# Int(nb))
             newbuf.size = 0 # reset the write pointer to the beginning
             s.buffer = newbuf
             write(newbuf, sbuf)
-            wait_readnb(s, nb)
-            r = nb_available(newbuf)
+            wait_readnb(s, Int(nb))
+            nb == nb_available(newbuf) || throw(EOFError())
         finally
             s.buffer = sbuf
             if !isempty(s.readnotify.waitq)
@@ -936,28 +967,28 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb = length(a))
             end
         end
     end
-    return r
+    nothing
 end
 
 function read(this::LibuvStream, ::Type{UInt8})
     wait_readnb(this, 1)
     buf = this.buffer
     @assert buf.seekable == false
-    read(buf, UInt8)
+    return read(buf, UInt8)
 end
 
 function readavailable(this::LibuvStream)
     wait_readnb(this, 1)
     buf = this.buffer
     @assert buf.seekable == false
-    takebuf_array(buf)
+    return takebuf_array(buf)
 end
 
 function readuntil(this::LibuvStream, c::UInt8)
     wait_readbyte(this, c)
     buf = this.buffer
     @assert buf.seekable == false
-    readuntil(buf, c)
+    return readuntil(buf, c)
 end
 
 uv_write(s::LibuvStream, p::Vector{UInt8}) = uv_write(s, pointer(p), UInt(sizeof(p)))
@@ -1192,7 +1223,7 @@ end
 isopen(s::BufferStream) = s.is_open
 close(s::BufferStream) = (s.is_open = false; notify(s.r_c; all=true); notify(s.close_c; all=true); nothing)
 read(s::BufferStream, ::Type{UInt8}) = (wait_readnb(s, 1); read(s.buffer, UInt8))
-read!(s::BufferStream, a::Vector{UInt8}) = (wait_readnb(s, length(a)); read!(s.buffer, a))
+unsafe_read(s::BufferStream, a::Ptr{UInt8}, nb::UInt) = (wait_readnb(s, Int(nb)); unsafe_read(s.buffer, a, nb))
 nb_available(s::BufferStream) = nb_available(s.buffer)
 
 isreadable(s::BufferStream) = s.buffer.readable
diff --git a/doc/stdlib/io-network.rst b/doc/stdlib/io-network.rst
index e285c9fad78ca..601e716d37be3 100644
--- a/doc/stdlib/io-network.rst
+++ b/doc/stdlib/io-network.rst
@@ -168,6 +168,22 @@ General I/O
 
    If ``all`` is ``true`` (the default), this function will block repeatedly trying to read all requested bytes, until an error or end-of-file occurs. If ``all`` is ``false``\ , at most one ``read`` call is performed, and the amount of data returned is device-dependent. Note that not all stream types support the ``all`` option.
 
+.. function:: unsafe_read(io, ref, nbytes)
+
+   .. Docstring generated from Julia source
+
+   Copy nbytes from the IO stream object into ref (converted to a pointer).
+
+   It is recommended that IO subtypes override the exact method signature below to provide more efficient implementations: ``unsafe_read(s::IO, p::Ptr{UInt8}, n::UInt)``
+
+.. function:: unsafe_write(io, ref, nbytes)
+
+   .. Docstring generated from Julia source
+
+   Copy nbytes from ref (converted to a pointer) into the IO stream object.
+
+   It is recommended that IO subtypes override the exact method signature below to provide more efficient implementations: ``unsafe_write(s::IO, p::Ptr{UInt8}, n::UInt)``
+
 .. function:: position(s)
 
    .. Docstring generated from Julia source