@@ -140,7 +140,43 @@ function utf8_to_utf16(src::Vector{UInt8})
140
140
end
141
141
142
142
function utf16_to_utf8 (src:: Vector{UInt16} )
143
-
143
+ dst = UInt8[]
144
+ i, n = 1 , length (src)
145
+ n > 0 || return dst
146
+ sizehint! (dst, n)
147
+ a = src[1 ]
148
+ while true
149
+ if a < 0x80 # ASCII
150
+ push! (dst, a % UInt8)
151
+ elseif a < 0x800 # 2-byte UTF-8
152
+ push! (dst, 0xc0 | ((a >> 6 ) % UInt8),
153
+ 0x80 | ((a % UInt8) & 0x3f ))
154
+ elseif ((a & 0xfc00 ) == 0xd800 ) & (i < n)
155
+ b = src[i += 1 ]
156
+ if (b & 0xfc00 ) == 0xdc00
157
+ # 2-unit UTF-16 sequence => 4-byte UTF-8
158
+ a += 0x2840
159
+ push! (dst, 0xf0 | ((a >> 8 ) % UInt8),
160
+ 0x80 | ((a % UInt8) >> 2 ),
161
+ 0xf0 $ ((((a % UInt8) << 4 ) & 0x3f ) $ (b >> 6 ) % UInt8),
162
+ 0x80 | ((b % UInt8) & 0x3f ))
163
+ else
164
+ push! (dst, 0xe0 | ((a >> 12 ) % UInt8),
165
+ 0x80 | (((a >> 6 ) % UInt8) & 0x3f ),
166
+ 0x80 | ((a % UInt8) & 0x3f ))
167
+ a = b; continue
168
+ end
169
+ else
170
+ # 1-unit high UTF-16 or unpaired high surrogate
171
+ # either way, encode as 3-byte UTF-8 code point
172
+ push! (dst, 0xe0 | ((a >> 12 ) % UInt8),
173
+ 0x80 | (((a >> 6 ) % UInt8) & 0x3f ),
174
+ 0x80 | ((a % UInt8) & 0x3f ))
175
+ end
176
+ i < n || break
177
+ a = src[i += 1 ]
178
+ end
179
+ return dst
144
180
end
145
181
146
182
# deferring (or un-deferring) ctrl-c handler for external C code that
0 commit comments