@@ -74,6 +74,7 @@ def load_unicode_data(f):
74
74
gencats = {}
75
75
to_lower = {}
76
76
to_upper = {}
77
+ to_title = {}
77
78
combines = {}
78
79
canon_decomp = {}
79
80
compat_decomp = {}
@@ -110,6 +111,10 @@ def load_unicode_data(f):
110
111
if upcase != "" and code_org != upcase :
111
112
to_upper [code ] = (int (upcase , 16 ), 0 , 0 )
112
113
114
+ # title case
115
+ if titlecase .strip () != "" and code_org != titlecase :
116
+ to_title [code ] = (int (titlecase , 16 ), 0 , 0 )
117
+
113
118
# store decomposition, if given
114
119
if decomp != "" :
115
120
if decomp .startswith ('<' ):
@@ -144,9 +149,9 @@ def load_unicode_data(f):
144
149
gencats = group_cats (gencats )
145
150
combines = to_combines (group_cats (combines ))
146
151
147
- return (canon_decomp , compat_decomp , gencats , combines , to_upper , to_lower )
152
+ return (canon_decomp , compat_decomp , gencats , combines , to_upper , to_lower , to_title )
148
153
149
- def load_special_casing (f , to_upper , to_lower ):
154
+ def load_special_casing (f , to_upper , to_lower , to_title ):
150
155
fetch (f )
151
156
for line in fileinput .input (f ):
152
157
data = line .split ('#' )[0 ].split (';' )
@@ -163,7 +168,7 @@ def load_special_casing(f, to_upper, to_lower):
163
168
title = title .strip ()
164
169
upper = upper .strip ()
165
170
key = int (code , 16 )
166
- for (map_ , values ) in [(to_lower , lower ), (to_upper , upper )]:
171
+ for (map_ , values ) in [(to_lower , lower ), (to_upper , upper ), ( to_title , title ) ]:
167
172
if values != code :
168
173
values = [int (i , 16 ) for i in values .split ()]
169
174
for _ in range (len (values ), 3 ):
@@ -344,7 +349,7 @@ def emit_property_module(f, mod, tbl, emit):
344
349
f .write (" }\n \n " )
345
350
f .write ("}\n \n " )
346
351
347
- def emit_conversions_module (f , to_upper , to_lower ):
352
+ def emit_conversions_module (f , to_upper , to_lower , to_title ):
348
353
f .write ("pub mod conversions {" )
349
354
f .write ("""
350
355
use core::cmp::Ordering::{Equal, Less, Greater};
@@ -367,6 +372,13 @@ def emit_conversions_module(f, to_upper, to_lower):
367
372
}
368
373
}
369
374
375
+ pub fn to_title(c: char) -> [char; 3] {
376
+ match bsearch_case_table(c, to_titlecase_table) {
377
+ None => [c, '\\ 0', '\\ 0'],
378
+ Some(index) => to_titlecase_table[index].1
379
+ }
380
+ }
381
+
370
382
fn bsearch_case_table(c: char, table: &'static [(char, [char; 3])]) -> Option<usize> {
371
383
match table.binary_search_by(|&(key, _)| {
372
384
if c == key { Equal }
@@ -379,18 +391,18 @@ def emit_conversions_module(f, to_upper, to_lower):
379
391
}
380
392
381
393
""" )
394
+ t_type = "&'static [(char, [char; 3])]"
395
+ pfun = lambda x : "(%s,[%s,%s,%s])" % (
396
+ escape_char (x [0 ]), escape_char (x [1 ][0 ]), escape_char (x [1 ][1 ]), escape_char (x [1 ][2 ]))
382
397
emit_table (f , "to_lowercase_table" ,
383
398
sorted (to_lower .iteritems (), key = operator .itemgetter (0 )),
384
- is_pub = False ,
385
- t_type = "&'static [(char, [char; 3])]" ,
386
- pfun = lambda x : "(%s,[%s,%s,%s])" % (
387
- escape_char (x [0 ]), escape_char (x [1 ][0 ]), escape_char (x [1 ][1 ]), escape_char (x [1 ][2 ])))
399
+ is_pub = False , t_type = t_type , pfun = pfun )
388
400
emit_table (f , "to_uppercase_table" ,
389
401
sorted (to_upper .iteritems (), key = operator .itemgetter (0 )),
390
- is_pub = False ,
391
- t_type = "&'static [(char, [char; 3])] " ,
392
- pfun = lambda x : "(%s,[%s,%s,%s])" % (
393
- escape_char ( x [ 0 ]), escape_char ( x [ 1 ][ 0 ]), escape_char ( x [ 1 ][ 1 ]), escape_char ( x [ 1 ][ 2 ])) )
402
+ is_pub = False , t_type = t_type , pfun = pfun )
403
+ emit_table ( f , "to_titlecase_table " ,
404
+ sorted ( to_title . iteritems (), key = operator . itemgetter ( 0 )),
405
+ is_pub = False , t_type = t_type , pfun = pfun )
394
406
f .write ("}\n \n " )
395
407
396
408
def emit_grapheme_module (f , grapheme_table , grapheme_cats ):
@@ -624,8 +636,8 @@ def optimize_width_table(wtable):
624
636
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
625
637
""" % unicode_version )
626
638
(canon_decomp , compat_decomp , gencats , combines ,
627
- to_upper , to_lower ) = load_unicode_data ("UnicodeData.txt" )
628
- load_special_casing ("SpecialCasing.txt" , to_upper , to_lower )
639
+ to_upper , to_lower , to_title ) = load_unicode_data ("UnicodeData.txt" )
640
+ load_special_casing ("SpecialCasing.txt" , to_upper , to_lower , to_title )
629
641
want_derived = ["XID_Start" , "XID_Continue" , "Alphabetic" , "Lowercase" , "Uppercase" ]
630
642
derived = load_properties ("DerivedCoreProperties.txt" , want_derived )
631
643
scripts = load_properties ("Scripts.txt" , [])
@@ -645,7 +657,7 @@ def optimize_width_table(wtable):
645
657
646
658
# normalizations and conversions module
647
659
emit_norm_module (rf , canon_decomp , compat_decomp , combines , norm_props )
648
- emit_conversions_module (rf , to_upper , to_lower )
660
+ emit_conversions_module (rf , to_upper , to_lower , to_title )
649
661
650
662
### character width module
651
663
width_table = []
0 commit comments