Skip to content

Commit 62a85df

Browse files
authored
Rollup merge of rust-lang#71854 - eduardosm:assoc-char-funcs-and-consts, r=Amanieu
Make `std::char` functions and constants associated to `char`. First step to fix rust-lang#71763.
2 parents e89a99d + 0e12a9d commit 62a85df

File tree

2 files changed

+239
-2
lines changed

2 files changed

+239
-2
lines changed

src/libcore/char/methods.rs

+237
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,243 @@ use super::*;
99

1010
#[lang = "char"]
1111
impl char {
12+
/// The highest valid code point a `char` can have.
13+
///
14+
/// A `char` is a [Unicode Scalar Value], which means that it is a [Code
15+
/// Point], but only ones within a certain range. `MAX` is the highest valid
16+
/// code point that's a valid [Unicode Scalar Value].
17+
///
18+
/// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
19+
/// [Code Point]: http://www.unicode.org/glossary/#code_point
20+
#[unstable(feature = "assoc_char_consts", reason = "recently added", issue = "71763")]
21+
pub const MAX: char = '\u{10ffff}';
22+
23+
/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
24+
/// decoding error.
25+
///
26+
/// It can occur, for example, when giving ill-formed UTF-8 bytes to
27+
/// [`String::from_utf8_lossy`](string/struct.String.html#method.from_utf8_lossy).
28+
#[unstable(feature = "assoc_char_consts", reason = "recently added", issue = "71763")]
29+
pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
30+
31+
/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
32+
/// `char` and `str` methods are based on.
33+
///
34+
/// New versions of Unicode are released regularly and subsequently all methods
35+
/// in the standard library depending on Unicode are updated. Therefore the
36+
/// behavior of some `char` and `str` methods and the value of this constant
37+
/// changes over time. This is *not* considered to be a breaking change.
38+
///
39+
/// The version numbering scheme is explained in
40+
/// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
41+
#[unstable(feature = "assoc_char_consts", reason = "recently added", issue = "71763")]
42+
pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
43+
44+
/// Creates an iterator over the UTF-16 encoded code points in `iter`,
45+
/// returning unpaired surrogates as `Err`s.
46+
///
47+
/// # Examples
48+
///
49+
/// Basic usage:
50+
///
51+
/// ```
52+
/// use std::char::decode_utf16;
53+
///
54+
/// // 𝄞mus<invalid>ic<invalid>
55+
/// let v = [
56+
/// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
57+
/// ];
58+
///
59+
/// assert_eq!(
60+
/// decode_utf16(v.iter().cloned())
61+
/// .map(|r| r.map_err(|e| e.unpaired_surrogate()))
62+
/// .collect::<Vec<_>>(),
63+
/// vec![
64+
/// Ok('𝄞'),
65+
/// Ok('m'), Ok('u'), Ok('s'),
66+
/// Err(0xDD1E),
67+
/// Ok('i'), Ok('c'),
68+
/// Err(0xD834)
69+
/// ]
70+
/// );
71+
/// ```
72+
///
73+
/// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
74+
///
75+
/// ```
76+
/// use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
77+
///
78+
/// // 𝄞mus<invalid>ic<invalid>
79+
/// let v = [
80+
/// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
81+
/// ];
82+
///
83+
/// assert_eq!(
84+
/// decode_utf16(v.iter().cloned())
85+
/// .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
86+
/// .collect::<String>(),
87+
/// "𝄞mus�ic�"
88+
/// );
89+
/// ```
90+
#[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
91+
#[inline]
92+
pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
93+
super::decode::decode_utf16(iter)
94+
}
95+
96+
/// Converts a `u32` to a `char`.
97+
///
98+
/// Note that all `char`s are valid [`u32`]s, and can be cast to one with
99+
/// `as`:
100+
///
101+
/// ```
102+
/// let c = '💯';
103+
/// let i = c as u32;
104+
///
105+
/// assert_eq!(128175, i);
106+
/// ```
107+
///
108+
/// However, the reverse is not true: not all valid [`u32`]s are valid
109+
/// `char`s. `from_u32()` will return `None` if the input is not a valid value
110+
/// for a `char`.
111+
///
112+
/// [`u32`]: primitive.u32.html
113+
///
114+
/// For an unsafe version of this function which ignores these checks, see
115+
/// [`from_u32_unchecked`].
116+
///
117+
/// [`from_u32_unchecked`]: #method.from_u32_unchecked
118+
///
119+
/// # Examples
120+
///
121+
/// Basic usage:
122+
///
123+
/// ```
124+
/// use std::char;
125+
///
126+
/// let c = char::from_u32(0x2764);
127+
///
128+
/// assert_eq!(Some('❤'), c);
129+
/// ```
130+
///
131+
/// Returning `None` when the input is not a valid `char`:
132+
///
133+
/// ```
134+
/// use std::char;
135+
///
136+
/// let c = char::from_u32(0x110000);
137+
///
138+
/// assert_eq!(None, c);
139+
/// ```
140+
#[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
141+
#[inline]
142+
pub fn from_u32(i: u32) -> Option<char> {
143+
super::convert::from_u32(i)
144+
}
145+
146+
/// Converts a `u32` to a `char`, ignoring validity.
147+
///
148+
/// Note that all `char`s are valid [`u32`]s, and can be cast to one with
149+
/// `as`:
150+
///
151+
/// ```
152+
/// let c = '💯';
153+
/// let i = c as u32;
154+
///
155+
/// assert_eq!(128175, i);
156+
/// ```
157+
///
158+
/// However, the reverse is not true: not all valid [`u32`]s are valid
159+
/// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to
160+
/// `char`, possibly creating an invalid one.
161+
///
162+
/// [`u32`]: primitive.u32.html
163+
///
164+
/// # Safety
165+
///
166+
/// This function is unsafe, as it may construct invalid `char` values.
167+
///
168+
/// For a safe version of this function, see the [`from_u32`] function.
169+
///
170+
/// [`from_u32`]: #method.from_u32
171+
///
172+
/// # Examples
173+
///
174+
/// Basic usage:
175+
///
176+
/// ```
177+
/// use std::char;
178+
///
179+
/// let c = unsafe { char::from_u32_unchecked(0x2764) };
180+
///
181+
/// assert_eq!('❤', c);
182+
/// ```
183+
#[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
184+
#[inline]
185+
pub unsafe fn from_u32_unchecked(i: u32) -> char {
186+
super::convert::from_u32_unchecked(i)
187+
}
188+
189+
/// Converts a digit in the given radix to a `char`.
190+
///
191+
/// A 'radix' here is sometimes also called a 'base'. A radix of two
192+
/// indicates a binary number, a radix of ten, decimal, and a radix of
193+
/// sixteen, hexadecimal, to give some common values. Arbitrary
194+
/// radices are supported.
195+
///
196+
/// `from_digit()` will return `None` if the input is not a digit in
197+
/// the given radix.
198+
///
199+
/// # Panics
200+
///
201+
/// Panics if given a radix larger than 36.
202+
///
203+
/// # Examples
204+
///
205+
/// Basic usage:
206+
///
207+
/// ```
208+
/// use std::char;
209+
///
210+
/// let c = char::from_digit(4, 10);
211+
///
212+
/// assert_eq!(Some('4'), c);
213+
///
214+
/// // Decimal 11 is a single digit in base 16
215+
/// let c = char::from_digit(11, 16);
216+
///
217+
/// assert_eq!(Some('b'), c);
218+
/// ```
219+
///
220+
/// Returning `None` when the input is not a digit:
221+
///
222+
/// ```
223+
/// use std::char;
224+
///
225+
/// let c = char::from_digit(20, 10);
226+
///
227+
/// assert_eq!(None, c);
228+
/// ```
229+
///
230+
/// Passing a large radix, causing a panic:
231+
///
232+
/// ```
233+
/// use std::thread;
234+
/// use std::char;
235+
///
236+
/// let result = thread::spawn(|| {
237+
/// // this panics
238+
/// let c = char::from_digit(1, 37);
239+
/// }).join();
240+
///
241+
/// assert!(result.is_err());
242+
/// ```
243+
#[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
244+
#[inline]
245+
pub fn from_digit(num: u32, radix: u32) -> Option<char> {
246+
super::convert::from_digit(num, radix)
247+
}
248+
12249
/// Checks if a `char` is a digit in the given radix.
13250
///
14251
/// A 'radix' here is sometimes also called a 'base'. A radix of two

src/libcore/char/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,15 @@ const MAX_THREE_B: u32 = 0x10000;
9292
/// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
9393
/// [Code Point]: http://www.unicode.org/glossary/#code_point
9494
#[stable(feature = "rust1", since = "1.0.0")]
95-
pub const MAX: char = '\u{10ffff}';
95+
pub const MAX: char = char::MAX;
9696

9797
/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
9898
/// decoding error.
9999
///
100100
/// It can occur, for example, when giving ill-formed UTF-8 bytes to
101101
/// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
102102
#[stable(feature = "decode_utf16", since = "1.9.0")]
103-
pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
103+
pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
104104

105105
/// Returns an iterator that yields the hexadecimal Unicode escape of a
106106
/// character, as `char`s.

0 commit comments

Comments
 (0)