38
38
//! - Has the [`Emoji_Presentation`] property, and
39
39
//! - Is not in the [Enclosed Ideographic Supplement] block.
40
40
//! 3. The sequence `"\r\n"` has width 1.
41
- //! 4. [`'\u{115F}'` HANGUL CHOSEONG FILLER](https://util.unicode.org/UnicodeJsps/character.jsp?a=115F) has width 2.
42
- //! 5. The following have width 0:
41
+ //! 4. [Lisu tone letter] combinations consisting of a character in the range `'\u{A4F8}'..='\u{A4FB}'`
42
+ //! followed by a character in the range `'\u{A4FC}'..='\u{A4FD}'` have width 1.
43
+ //! 5. [`'\u{115F}'` HANGUL CHOSEONG FILLER](https://util.unicode.org/UnicodeJsps/character.jsp?a=115F) has width 2.
44
+ //! 6. The following have width 0:
43
45
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BDefault_Ignorable_Code_Point%7D)
44
46
//! with the [`Default_Ignorable_Code_Point`] property.
45
47
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BGrapheme_Extend%7D)
55
57
//! - [`'\u{1B43}'` BALINESE VOWEL SIGN PEPET TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B43).
56
58
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D)
57
59
//! with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`).
58
- //! 6 . [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
60
+ //! 7 . [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
59
61
//! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
60
- //! 7 . [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D)
62
+ //! 8 . [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D)
61
63
//! with an [`East_Asian_Width`] of [`Ambiguous`] have width 2 in an East Asian context, and width 1 otherwise.
62
- //! 8 . All other characters have width 1.
64
+ //! 9 . All other characters have width 1.
63
65
//!
64
66
//! [`Default_Ignorable_Code_Point`]: https://www.unicode.org/versions/Unicode15.0.0/ch05.pdf#G40095
65
67
//! [`East_Asian_Width`]: https://www.unicode.org/reports/tr11/#ED1
76
78
//!
77
79
//! [Enclosed Ideographic Supplement]: https://unicode.org/charts/PDF/U1F200.pdf
78
80
//!
81
+ //! [Lisu tone letter]: https://www.unicode.org/versions/Unicode15.0.0/ch18.pdf#G42078
82
+ //!
79
83
//! ## Canonical equivalence
80
84
//!
81
85
//! The non-CJK width methods guarantee that canonically equivalent strings are assigned the same width.
@@ -185,8 +189,14 @@ impl UnicodeWidthStr for str {
185
189
enum NextCharInfo {
186
190
#[ default]
187
191
Default ,
192
+ /// `'\n'`
188
193
LineFeed = 0x0A ,
194
+ /// `'\u{A4FC}'..='\u{A4FD}'`
195
+ /// <https://www.unicode.org/versions/Unicode15.0.0/ch18.pdf#G42078>
196
+ TrailingLisuToneLetter ,
197
+ /// `'\u{FE0E}'`
189
198
Vs15 = 0x0E ,
199
+ /// `'\u{FE0F}'`
190
200
Vs16 = 0x0F ,
191
201
}
192
202
@@ -204,25 +214,28 @@ fn str_width(s: &str, is_cjk: bool) -> usize {
204
214
/// they're treated as single width.
205
215
#[ inline]
206
216
fn width_in_str ( c : char , is_cjk : bool , next_info : NextCharInfo ) -> ( usize , NextCharInfo ) {
207
- match next_info {
208
- NextCharInfo :: Vs15 if !is_cjk && cw:: starts_non_ideographic_text_presentation_seq ( c) => {
209
- ( 1 , NextCharInfo :: Default )
217
+ if next_info == NextCharInfo :: Vs16 && cw:: starts_emoji_presentation_seq ( c) {
218
+ ( 2 , NextCharInfo :: Default )
219
+ } else if c <= '\u{A0}' {
220
+ match c {
221
+ '\n' => ( 1 , NextCharInfo :: LineFeed ) ,
222
+ '\r' if next_info == NextCharInfo :: LineFeed => ( 0 , NextCharInfo :: Default ) ,
223
+ _ => ( 1 , NextCharInfo :: Default ) ,
210
224
}
211
- NextCharInfo :: Vs16 if cw:: starts_emoji_presentation_seq ( c) => ( 2 , NextCharInfo :: Default ) ,
212
- _ => {
213
- if c <= '\u{A0}' {
214
- match c {
215
- '\n' => ( 1 , NextCharInfo :: LineFeed ) ,
216
- '\r' if next_info == NextCharInfo :: LineFeed => ( 0 , NextCharInfo :: Default ) ,
217
- _ => ( 1 , NextCharInfo :: Default ) ,
218
- }
219
- } else {
220
- match c {
221
- '\u{FE0E}' => ( 0 , NextCharInfo :: Vs15 ) ,
222
- '\u{FE0F}' => ( 0 , NextCharInfo :: Vs16 ) ,
223
- _ => ( cw:: lookup_width ( c, is_cjk) , NextCharInfo :: Default ) ,
224
- }
225
+ } else {
226
+ match ( c, next_info) {
227
+ ( '\u{A4F8}' ..='\u{A4FB}' , NextCharInfo :: TrailingLisuToneLetter ) => {
228
+ ( 0 , NextCharInfo :: Default )
229
+ }
230
+ ( '\u{A4FC}' ..='\u{A4FD}' , _) => ( 1 , NextCharInfo :: TrailingLisuToneLetter ) ,
231
+ ( '\u{FE0E}' , _) => ( 0 , NextCharInfo :: Vs15 ) ,
232
+ ( '\u{FE0F}' , _) => ( 0 , NextCharInfo :: Vs16 ) ,
233
+ ( _, NextCharInfo :: Vs15 )
234
+ if !is_cjk && cw:: starts_non_ideographic_text_presentation_seq ( c) =>
235
+ {
236
+ ( 1 , NextCharInfo :: Default )
225
237
}
238
+ _ => ( cw:: lookup_width ( c, is_cjk) , NextCharInfo :: Default ) ,
226
239
}
227
240
}
228
241
}
0 commit comments