Skip to content

Commit d50eac6

Browse files
stevengjJeffBezanson
authored andcommitted
allow operator suffixes — combining characters and primes (#22089)
1 parent 4913cc4 commit d50eac6

File tree

8 files changed

+269
-45
lines changed

8 files changed

+269
-45
lines changed

NEWS.md

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ New language features
1111
a function argument name, the argument is unpacked into local variables `x` and `y`
1212
as in the assignment `(x, y) = arg` ([#6614]).
1313

14+
* Custom infix operators can now be defined by appending Unicode
15+
combining marks, primes, and sub/superscripts to other operators.
16+
For example, `+̂ₐ″` is parsed as an infix operator with the same
17+
precedence as `+` ([#22089]).
18+
1419
Language changes
1520
----------------
1621

doc/src/manual/variables.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ Operators like `+` are also valid identifiers, but are parsed specially. In some
104104
can be used just like variables; for example `(+)` refers to the addition function, and `(+) = f`
105105
will reassign it. Most of the Unicode infix operators (in category Sm), such as ``, are parsed
106106
as infix operators and are available for user-defined methods (e.g. you can use `const ⊗ = kron`
107-
to define `` as an infix Kronecker product).
107+
to define `` as an infix Kronecker product). Operators can also be suffixed with modifying marks,
108+
primes, and sub/superscripts, e.g. `+̂ₐ″` is parsed as an infix operator with the same precedence as `+`.
108109

109110
The only explicitly disallowed names for variables are the names of built-in statements:
110111

src/flisp/flisp.c

-2
Original file line numberDiff line numberDiff line change
@@ -2304,7 +2304,6 @@ static const builtinspec_t core_builtin_info[] = {
23042304

23052305
extern void builtins_init(fl_context_t *fl_ctx);
23062306
extern void comparehash_init(fl_context_t *fl_ctx);
2307-
extern void jl_charmap_init(fl_context_t *fl_ctx);
23082307

23092308
static void lisp_init(fl_context_t *fl_ctx, size_t initial_heapsize)
23102309
{
@@ -2337,7 +2336,6 @@ static void lisp_init(fl_context_t *fl_ctx, size_t initial_heapsize)
23372336
fl_ctx->consflags = bitvector_new(fl_ctx->heapsize/sizeof(cons_t), 1);
23382337
fl_print_init(fl_ctx);
23392338
comparehash_init(fl_ctx);
2340-
jl_charmap_init(fl_ctx);
23412339
fl_ctx->N_STACK = 262144;
23422340
fl_ctx->Stack = (value_t*)malloc(fl_ctx->N_STACK*sizeof(value_t));
23432341
CHECK_ALIGN8(fl_ctx->Stack);

src/flisp/flisp.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ int fl_load_system_image_str(fl_context_t *fl_ctx, char* str, size_t len);
378378
/* julia extensions */
379379
JL_DLLEXPORT int jl_id_char(uint32_t wc);
380380
JL_DLLEXPORT int jl_id_start_char(uint32_t wc);
381+
JL_DLLEXPORT int jl_op_suffix_char(uint32_t wc);
381382

382383
struct _fl_context_t {
383384
symbol_t *symtab;
@@ -406,7 +407,6 @@ struct _fl_context_t {
406407
fltype_t *builtintype;
407408

408409
htable_t equal_eq_hashtable;
409-
htable_t jl_charmap;
410410

411411
value_t tablesym;
412412
fltype_t *tabletype;

src/flisp/julia_extensions.c

+77-24
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
extern "C" {
1313
#endif
1414

15+
#define _equal_wchar_(x, y, ctx) ((x) == (y))
16+
#define _hash_wchar_(x, ctx) inthash((uint32_t) ((uintptr_t) (x)))
17+
#include "htable.inc"
18+
HTIMPL_R(wcharhash, _hash_wchar_, _equal_wchar_)
19+
1520
static int is_uws(uint32_t wc)
1621
{
1722
return (wc==9 || wc==10 || wc==11 || wc==12 || wc==13 || wc==32 ||
@@ -134,6 +139,28 @@ JL_DLLEXPORT int jl_id_char(uint32_t wc)
134139
return 0;
135140
}
136141

142+
#include "julia_opsuffs.h"
143+
144+
// chars that can follow an operator (e.g. +) and be parsed as part of the operator
145+
int jl_op_suffix_char(uint32_t wc)
146+
{
147+
static htable_t jl_opsuffs;
148+
if (!jl_opsuffs.size) { // initialize hash table of suffixes
149+
size_t i, opsuffs_len = sizeof(opsuffs) / (sizeof(uint32_t));
150+
htable_t *h = htable_new(&jl_opsuffs, opsuffs_len);
151+
assert(sizeof(uint32_t) <= sizeof(void*));
152+
for (i = 0; i < opsuffs_len; ++i)
153+
wcharhash_put_r(h, (void*)((uintptr_t)opsuffs[i]), NULL, NULL);
154+
}
155+
if (wc < 0xA1 || wc > 0x10ffff) return 0;
156+
utf8proc_category_t cat = utf8proc_category((utf8proc_int32_t) wc);
157+
if (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC ||
158+
cat == UTF8PROC_CATEGORY_ME)
159+
return 1;
160+
// use hash table of other allowed characters: primes and sub/superscripts
161+
return HT_NOTFOUND != wcharhash_get_r(&jl_opsuffs, (void*)((uintptr_t)wc), NULL);
162+
}
163+
137164
value_t fl_julia_identifier_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
138165
{
139166
argcount(fl_ctx, "identifier-char?", nargs, 1);
@@ -152,33 +179,57 @@ value_t fl_julia_identifier_start_char(fl_context_t *fl_ctx, value_t *args, uint
152179
return jl_id_start_char(wc) ? fl_ctx->T : fl_ctx->F;
153180
}
154181

155-
#include "julia_charmap.h"
156-
#define _equal_wchar_(x, y, ctx) ((x) == (y))
157-
#define _hash_wchar_(x, ctx) inthash((uint32_t) ((uintptr_t) (x)))
158-
#include "htable.inc"
159-
HTIMPL_R(wcharhash, _hash_wchar_, _equal_wchar_)
182+
value_t fl_julia_op_suffix_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
183+
{
184+
argcount(fl_ctx, "op-suffix-char?", nargs, 1);
185+
if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype)
186+
type_error(fl_ctx, "op-suffix-char?", "wchar", args[0]);
187+
uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
188+
return jl_op_suffix_char(wc) ? fl_ctx->T : fl_ctx->F;
189+
}
160190

161-
void jl_charmap_init(fl_context_t *fl_ctx)
191+
value_t fl_julia_strip_op_suffix(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
162192
{
163-
size_t charmap_len = sizeof(charmap) / (2*sizeof(uint32_t));
164-
size_t i;
165-
htable_t *h = htable_new(&fl_ctx->jl_charmap, charmap_len);
166-
assert(sizeof(uint32_t) <= sizeof(void*));
167-
for (i = 0; i < charmap_len; ++i) {
168-
/* Store charmap in a hash table. Typecasting codepoints
169-
directly to pointer keys works because pointers are at
170-
least 32 bits on all Julia-supported systems, and because
171-
we never map anything to U+0001 (since HT_NOTFOUND is (void*)1). */
172-
assert((void*)(uintptr_t)charmap[i][1] != HT_NOTFOUND);
173-
wcharhash_put_r(h, (void*)((uintptr_t)charmap[i][0]),
174-
(void*)((uintptr_t)charmap[i][1]), (void*)fl_ctx);
193+
argcount(fl_ctx, "strip-op-suffix", nargs, 1);
194+
if (!issymbol(args[0]))
195+
type_error(fl_ctx, "strip-op-suffix", "symbol", args[0]);
196+
char *op = symbol_name(fl_ctx, args[0]);
197+
size_t i = 0;
198+
while (op[i]) {
199+
size_t j = i;
200+
if (jl_op_suffix_char(u8_nextchar(op, &j)))
201+
break;
202+
i = j;
175203
}
204+
if (!op[i]) return args[0]; // no suffix to strip
205+
if (!i) lerror(fl_ctx, symbol(fl_ctx, "error"), "invalid operator");
206+
char *opnew = strncpy(malloc(i+1), op, i);
207+
opnew[i] = 0;
208+
value_t opnew_symbol = symbol(fl_ctx, opnew);
209+
free(opnew);
210+
return opnew_symbol;
176211
}
177-
utf8proc_int32_t jl_charmap_map(utf8proc_int32_t c, void *fl_ctx_)
212+
213+
#include "julia_charmap.h"
214+
215+
utf8proc_int32_t jl_charmap_map(utf8proc_int32_t c, void *ctx)
178216
{
179-
fl_context_t *fl_ctx = (fl_context_t *) fl_ctx_;
180-
htable_t *h = &fl_ctx->jl_charmap;
181-
void *v = wcharhash_get_r(h, (void*)((uintptr_t)c), (void*) fl_ctx);
217+
static htable_t jl_charmap;
218+
if (!jl_charmap.size) { // initialize hash table
219+
size_t i, charmap_len = sizeof(charmap) / (2*sizeof(uint32_t));
220+
htable_t *h = htable_new(&jl_charmap, charmap_len);
221+
assert(sizeof(uint32_t) <= sizeof(void*));
222+
for (i = 0; i < charmap_len; ++i) {
223+
/* Store charmap in a hash table. Typecasting codepoints
224+
directly to pointer keys works because pointers are at
225+
least 32 bits on all Julia-supported systems, and because
226+
we never map anything to U+0001 (since HT_NOTFOUND is (void*)1). */
227+
assert((void*)(uintptr_t)charmap[i][1] != HT_NOTFOUND);
228+
wcharhash_put_r(h, (void*)((uintptr_t)charmap[i][0]),
229+
(void*)((uintptr_t)charmap[i][1]), NULL);
230+
}
231+
}
232+
void *v = wcharhash_get_r(&jl_charmap, (void*)((uintptr_t)c), NULL);
182233
return v == HT_NOTFOUND ? c : (utf8proc_int32_t) ((uintptr_t) v);
183234
}
184235

@@ -191,7 +242,7 @@ static char *normalize(fl_context_t *fl_ctx, char *s)
191242
ssize_t result;
192243
size_t newlen;
193244
result = utf8proc_decompose_custom((uint8_t*) s, 0, NULL, 0, (utf8proc_option_t)options,
194-
jl_charmap_map, (void*) fl_ctx);
245+
jl_charmap_map, NULL);
195246
if (result < 0) goto error;
196247
newlen = result * sizeof(int32_t) + 1;
197248
if (newlen > fl_ctx->jlbuflen) {
@@ -200,7 +251,7 @@ static char *normalize(fl_context_t *fl_ctx, char *s)
200251
if (!fl_ctx->jlbuf) lerror(fl_ctx, fl_ctx->OutOfMemoryError, "error allocating UTF8 buffer");
201252
}
202253
result = utf8proc_decompose_custom((uint8_t*)s,0, (int32_t*)fl_ctx->jlbuf,result, (utf8proc_option_t)options,
203-
jl_charmap_map, (void*) fl_ctx);
254+
jl_charmap_map, NULL);
204255
if (result < 0) goto error;
205256
result = utf8proc_reencode((int32_t*)fl_ctx->jlbuf,result, (utf8proc_option_t)options);
206257
if (result < 0) goto error;
@@ -245,6 +296,8 @@ static const builtinspec_t julia_flisp_func_info[] = {
245296
{ "accum-julia-symbol", fl_accum_julia_symbol },
246297
{ "identifier-char?", fl_julia_identifier_char },
247298
{ "identifier-start-char?", fl_julia_identifier_start_char },
299+
{ "op-suffix-char?", fl_julia_op_suffix_char },
300+
{ "strip-op-suffix", fl_julia_strip_op_suffix },
248301
{ NULL, NULL }
249302
};
250303

src/flisp/julia_opsuffs.h

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/* Array of codepoints allowed as operator suffixes in Julia:
2+
primes and Latin/Greek/math super/subscripts.
3+
4+
produced by:
5+
6+
for c in sort(unique(collect("₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ ⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ" * "′″‴‵‶‷⁗")))
7+
println(" ", repr(UInt32(c)), ", // ", c)
8+
end
9+
*/
10+
11+
static const uint32_t opsuffs[] = {
12+
0x000000b2, // ²
13+
0x000000b3, // ³
14+
0x000000b9, // ¹
15+
0x000002b0, // ʰ
16+
0x000002b2, // ʲ
17+
0x000002b3, // ʳ
18+
0x000002b7, // ʷ
19+
0x000002b8, // ʸ
20+
0x000002e1, // ˡ
21+
0x000002e2, // ˢ
22+
0x000002e3, // ˣ
23+
0x00001d2c, // ᴬ
24+
0x00001d2e, // ᴮ
25+
0x00001d30, // ᴰ
26+
0x00001d31, // ᴱ
27+
0x00001d33, // ᴳ
28+
0x00001d34, // ᴴ
29+
0x00001d35, // ᴵ
30+
0x00001d36, // ᴶ
31+
0x00001d37, // ᴷ
32+
0x00001d38, // ᴸ
33+
0x00001d39, // ᴹ
34+
0x00001d3a, // ᴺ
35+
0x00001d3c, // ᴼ
36+
0x00001d3e, // ᴾ
37+
0x00001d3f, // ᴿ
38+
0x00001d40, // ᵀ
39+
0x00001d41, // ᵁ
40+
0x00001d42, // ᵂ
41+
0x00001d43, // ᵃ
42+
0x00001d47, // ᵇ
43+
0x00001d48, // ᵈ
44+
0x00001d49, // ᵉ
45+
0x00001d4d, // ᵍ
46+
0x00001d4f, // ᵏ
47+
0x00001d50, // ᵐ
48+
0x00001d52, // ᵒ
49+
0x00001d56, // ᵖ
50+
0x00001d57, // ᵗ
51+
0x00001d58, // ᵘ
52+
0x00001d5b, // ᵛ
53+
0x00001d5d, // ᵝ
54+
0x00001d5e, // ᵞ
55+
0x00001d5f, // ᵟ
56+
0x00001d60, // ᵠ
57+
0x00001d61, // ᵡ
58+
0x00001d62, // ᵢ
59+
0x00001d63, // ᵣ
60+
0x00001d64, // ᵤ
61+
0x00001d65, // ᵥ
62+
0x00001d66, // ᵦ
63+
0x00001d67, // ᵧ
64+
0x00001d68, // ᵨ
65+
0x00001d69, // ᵩ
66+
0x00001d6a, // ᵪ
67+
0x00001d9c, // ᶜ
68+
0x00001da0, // ᶠ
69+
0x00001da5, // ᶥ
70+
0x00001da6, // ᶦ
71+
0x00001dab, // ᶫ
72+
0x00001db0, // ᶰ
73+
0x00001db8, // ᶸ
74+
0x00001dbb, // ᶻ
75+
0x00001dbf, // ᶿ
76+
0x00002009, //  
77+
0x00002032, // ′
78+
0x00002033, // ″
79+
0x00002034, // ‴
80+
0x00002035, // ‵
81+
0x00002036, // ‶
82+
0x00002037, // ‷
83+
0x00002057, // ⁗
84+
0x00002070, // ⁰
85+
0x00002071, // ⁱ
86+
0x00002074, // ⁴
87+
0x00002075, // ⁵
88+
0x00002076, // ⁶
89+
0x00002077, // ⁷
90+
0x00002078, // ⁸
91+
0x00002079, // ⁹
92+
0x0000207a, // ⁺
93+
0x0000207b, // ⁻
94+
0x0000207c, // ⁼
95+
0x0000207d, // ⁽
96+
0x0000207e, // ⁾
97+
0x0000207f, // ⁿ
98+
0x00002080, // ₀
99+
0x00002081, // ₁
100+
0x00002082, // ₂
101+
0x00002083, // ₃
102+
0x00002084, // ₄
103+
0x00002085, // ₅
104+
0x00002086, // ₆
105+
0x00002087, // ₇
106+
0x00002088, // ₈
107+
0x00002089, // ₉
108+
0x0000208a, // ₊
109+
0x0000208b, // ₋
110+
0x0000208c, // ₌
111+
0x0000208d, // ₍
112+
0x0000208e, // ₎
113+
0x00002090, // ₐ
114+
0x00002091, // ₑ
115+
0x00002092, // ₒ
116+
0x00002093, // ₓ
117+
0x00002095, // ₕ
118+
0x00002096, // ₖ
119+
0x00002097, // ₗ
120+
0x00002098, // ₘ
121+
0x00002099, // ₙ
122+
0x0000209a, // ₚ
123+
0x0000209b, // ₛ
124+
0x0000209c, // ₜ
125+
0x00002c7c, // ⱼ
126+
0x00002c7d // ⱽ
127+
};

0 commit comments

Comments
 (0)