@@ -638,8 +638,7 @@ impl char {
638
638
#[ rustc_const_stable( feature = "const_char_len_utf" , since = "1.52.0" ) ]
639
639
#[ inline]
640
640
pub const fn len_utf16 ( self ) -> usize {
641
- let ch = self as u32 ;
642
- if ( ch & 0xFFFF ) == ch { 1 } else { 2 }
641
+ len_utf16 ( self as u32 )
643
642
}
644
643
645
644
/// Encodes this character as UTF-8 into the provided byte buffer,
@@ -709,8 +708,9 @@ impl char {
709
708
/// '𝕊'.encode_utf16(&mut b);
710
709
/// ```
711
710
#[ stable( feature = "unicode_encode_char" , since = "1.15.0" ) ]
711
+ #[ rustc_const_unstable( feature = "const_char_encode_utf16" , issue = "130660" ) ]
712
712
#[ inline]
713
- pub fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
713
+ pub const fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
714
714
encode_utf16_raw ( self as u32 , dst)
715
715
}
716
716
@@ -1745,7 +1745,12 @@ const fn len_utf8(code: u32) -> usize {
1745
1745
}
1746
1746
}
1747
1747
1748
- /// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
1748
+ #[ inline]
1749
+ const fn len_utf16 ( code : u32 ) -> usize {
1750
+ if ( code & 0xFFFF ) == code { 1 } else { 2 }
1751
+ }
1752
+
1753
+ /// Encodes a raw `u32` value as UTF-8 into the provided byte buffer,
1749
1754
/// and then returns the subslice of the buffer that contains the encoded character.
1750
1755
///
1751
1756
/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
@@ -1799,7 +1804,7 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
1799
1804
unsafe { slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , len) }
1800
1805
}
1801
1806
1802
- /// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
1807
+ /// Encodes a raw ` u32` value as UTF-16 into the provided `u16` buffer,
1803
1808
/// and then returns the subslice of the buffer that contains the encoded character.
1804
1809
///
1805
1810
/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
@@ -1810,28 +1815,33 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
1810
1815
/// Panics if the buffer is not large enough.
1811
1816
/// A buffer of length 2 is large enough to encode any `char`.
1812
1817
#[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
1818
+ #[ rustc_const_unstable( feature = "const_char_encode_utf16" , issue = "130660" ) ]
1813
1819
#[ doc( hidden) ]
1814
1820
#[ inline]
1815
- pub fn encode_utf16_raw ( mut code : u32 , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
1816
- // SAFETY: each arm checks whether there are enough bits to write into
1817
- unsafe {
1818
- if ( code & 0xFFFF ) == code && !dst. is_empty ( ) {
1819
- // The BMP falls through
1820
- * dst. get_unchecked_mut ( 0 ) = code as u16 ;
1821
- slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 1 )
1822
- } else if dst. len ( ) >= 2 {
1823
- // Supplementary planes break into surrogates.
1821
+ pub const fn encode_utf16_raw ( mut code : u32 , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
1822
+ const fn panic_at_const ( _code : u32 , _len : usize , _dst_len : usize ) {
1823
+ // Note that we cannot format in constant expressions.
1824
+ panic ! ( "encode_utf16: buffer does not have enough bytes to encode code point" ) ;
1825
+ }
1826
+ fn panic_at_rt ( code : u32 , len : usize , dst_len : usize ) {
1827
+ panic ! (
1828
+ "encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}" ,
1829
+ ) ;
1830
+ }
1831
+ let len = len_utf16 ( code) ;
1832
+ match ( len, & mut * dst) {
1833
+ ( 1 , [ a, ..] ) => {
1834
+ * a = code as u16 ;
1835
+ }
1836
+ ( 2 , [ a, b, ..] ) => {
1824
1837
code -= 0x1_0000 ;
1825
- * dst. get_unchecked_mut ( 0 ) = 0xD800 | ( ( code >> 10 ) as u16 ) ;
1826
- * dst. get_unchecked_mut ( 1 ) = 0xDC00 | ( ( code as u16 ) & 0x3FF ) ;
1827
- slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 2 )
1828
- } else {
1829
- panic ! (
1830
- "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}" ,
1831
- char :: from_u32_unchecked( code) . len_utf16( ) ,
1832
- code,
1833
- dst. len( ) ,
1834
- )
1838
+
1839
+ * a = ( code >> 10 ) as u16 | 0xD800 ;
1840
+ * b = ( code & 0x3FF ) as u16 | 0xDC00 ;
1835
1841
}
1836
- }
1842
+ // FIXME(const-hack): We would prefer to have streamlined panics when formatters become const-friendly.
1843
+ _ => const_eval_select ( ( code, len, dst. len ( ) ) , panic_at_const, panic_at_rt) ,
1844
+ } ;
1845
+ // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
1846
+ unsafe { slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , len) }
1837
1847
}
0 commit comments