@@ -40,11 +40,67 @@ value_t fl_skipws(value_t *args, u_int32_t nargs)
40
40
return skipped ;
41
41
}
42
42
43
+ static int is_wc_cat_id_start (uint32_t wc , utf8proc_propval_t cat )
44
+ {
45
+ return (cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LL ||
46
+ cat == UTF8PROC_CATEGORY_LT || cat == UTF8PROC_CATEGORY_LM ||
47
+ cat == UTF8PROC_CATEGORY_LO || cat == UTF8PROC_CATEGORY_NL ||
48
+ // allow currency symbols
49
+ cat == UTF8PROC_CATEGORY_SC ||
50
+ // allow all latin-1 characters except math symbols and quotes
51
+ (wc <= 0xff && cat != UTF8PROC_CATEGORY_SM &&
52
+ cat != UTF8PROC_CATEGORY_PF && cat != UTF8PROC_CATEGORY_PI ) ||
53
+ // Other_ID_Start
54
+ wc == 0x2118 || wc == 0x212E || (wc >= 0x309B && wc <= 0x309C ));
55
+ }
56
+
57
+ static int jl_id_start_char (uint32_t wc )
58
+ {
59
+ if ((wc >= 'A' && wc <= 'Z' ) || (wc >= 'a' && wc <= 'z' ) || wc == '_' )
60
+ return 1 ;
61
+ if (wc < 0xA1 || wc > 0x10ffff )
62
+ return 0 ;
63
+ const utf8proc_property_t * prop = utf8proc_get_property (wc );
64
+ return is_wc_cat_id_start (wc , prop -> category );
65
+ }
66
+
43
67
static int jl_id_char (uint32_t wc )
44
68
{
45
- return ((wc >= 'A' && wc <= 'Z' ) || (wc >= 'a' && wc <= 'z' ) ||
46
- (wc >= '0' && wc <= '9' ) || (wc >= 0xA1 ) ||
47
- wc == '!' || wc == '_' );
69
+ if ((wc >= 'A' && wc <= 'Z' ) || (wc >= 'a' && wc <= 'z' ) || wc == '_' ||
70
+ (wc >= '0' && wc <= '9' ) || wc == '!' )
71
+ return 1 ;
72
+ if (wc < 0xA1 || wc > 0x10ffff )
73
+ return 0 ;
74
+ const utf8proc_property_t * prop = utf8proc_get_property (wc );
75
+ utf8proc_propval_t cat = prop -> category ;
76
+ if (is_wc_cat_id_start (wc , cat )) return 1 ;
77
+ if (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC ||
78
+ cat == UTF8PROC_CATEGORY_ND || cat == UTF8PROC_CATEGORY_PC ||
79
+ cat == UTF8PROC_CATEGORY_SK ||
80
+ // primes
81
+ (wc >= 0x2032 && wc <= 0x2034 ) ||
82
+ // Other_ID_Continue
83
+ wc == 0x0387 || wc == 0x19da || (wc >= 0x1369 && wc <= 0x1371 ))
84
+ return 1 ;
85
+ return 0 ;
86
+ }
87
+
88
+ value_t fl_julia_identifier_char (value_t * args , u_int32_t nargs )
89
+ {
90
+ argcount ("identifier-char?" , nargs , 1 );
91
+ if (!iscprim (args [0 ]) || ((cprim_t * )ptr (args [0 ]))-> type != wchartype )
92
+ type_error ("identifier-char?" , "wchar" , args [0 ]);
93
+ uint32_t wc = * (uint32_t * )cp_data ((cprim_t * )ptr (args [0 ]));
94
+ return jl_id_char (wc );
95
+ }
96
+
97
+ value_t fl_julia_identifier_start_char (value_t * args , u_int32_t nargs )
98
+ {
99
+ argcount ("identifier-start-char?" , nargs , 1 );
100
+ if (!iscprim (args [0 ]) || ((cprim_t * )ptr (args [0 ]))-> type != wchartype )
101
+ type_error ("identifier-start-char?" , "wchar" , args [0 ]);
102
+ uint32_t wc = * (uint32_t * )cp_data ((cprim_t * )ptr (args [0 ]));
103
+ return jl_id_start_char (wc );
48
104
}
49
105
50
106
// return NFC-normalized UTF8-encoded version of s
@@ -105,6 +161,8 @@ value_t fl_accum_julia_symbol(value_t *args, u_int32_t nargs)
105
161
static builtinspec_t julia_flisp_func_info [] = {
106
162
{ "skip-ws" , fl_skipws },
107
163
{ "accum-julia-symbol" , fl_accum_julia_symbol },
164
+ { "identifier-char?" , fl_julia_identifier_char },
165
+ { "identifier-start-char?" , fl_julia_identifier_start_char },
108
166
{ NULL , NULL }
109
167
};
110
168
0 commit comments