Skip to content

Commit 3b37219

Browse files
committed
Feature #6798 - Add built-in functions UNICODE_CHAR and UNICODE_VAL to convert between Unicode code point and character.
1 parent d0eac02 commit 3b37219

File tree

4 files changed

+153
-7
lines changed

4 files changed

+153
-7
lines changed

doc/sql.extensions/README.builtin_functions.txt

+42-7
Original file line numberDiff line numberDiff line change
@@ -721,22 +721,22 @@ Notes:
721721
1) If the first argument (relation) is a string expression or literal, then
722722
it's treated as a relation name and the engine searches for the
723723
corresponding relation ID. The search is case-sensitive.
724-
In the case of string literal, relation ID is evaluated at prepare time.
725-
In the case of expression, relation ID is evaluated at execution time.
724+
In the case of string literal, relation ID is evaluated at prepare time.
725+
In the case of expression, relation ID is evaluated at execution time.
726726
If the relation couldn't be found, then isc_relnotdef error is raised.
727-
2) If the first argument (relation) is a numeric expression or literal, then
727+
2) If the first argument (relation) is a numeric expression or literal, then
728728
it's treated as a relation ID and used "as is", without verification
729729
against existing relations.
730730
If the argument value is negative or greater than the maximum allowed
731731
relation ID (65535 currently), then NULL is returned.
732-
3) Second argument (recnum) represents an absolute record number in relation
732+
3) Second argument (recnum) represents an absolute record number in relation
733733
(if the next arguments -- dpnum and ppnum -- are missing), or a record
734734
number relative to the first record, specified by the next arguments.
735-
4) Third argument (dpnum) is a logical number of data page in relation (if
735+
4) Third argument (dpnum) is a logical number of data page in relation (if
736736
the next argument -- ppnum -- is missing), or number of data page
737737
relative to the first data page addressed by the given ppnum.
738738
5) Forth argument (ppnum) is a logical number of pointer page in relation.
739-
6) All numbers are zero-based.
739+
6) All numbers are zero-based.
740740
Maximum allowed value for dpnum and ppnum is 2^32 (4294967296).
741741
If dpnum is specified, then recnum could be negative.
742742
If dpnum is missing and recnum is negative then NULL is returned.
@@ -763,7 +763,7 @@ Examples:
763763
where rdb$db_key >= make_dbkey(6, 0, 0)
764764
and rdb$db_key < make_dbkey(6, 0, 1)
765765

766-
4) Select all records that physically reside at first data page of 6th pointer
766+
4) Select all records that physically reside at first data page of 6th pointer
767767
page at relation
768768

769769
select * from SOMETABLE
@@ -1255,6 +1255,41 @@ Example:
12551255
3) select trunc(987.65, 1), trunc(987.65, -1) from rdb$database; -- returns 987.60, 980.00
12561256

12571257

1258+
------------
1259+
UNICODE_CHAR
1260+
------------
1261+
1262+
Function:
1263+
Returns the UNICODE character with the specified code point.
1264+
1265+
Format:
1266+
UNICODE_CHAR( <number> )
1267+
1268+
Notes:
1269+
Argument to UNICODE_CHAR must be a valid UTF-32 code point not in the range of
1270+
high/low surrogates (0xD800 to 0xDFFF). Otherwise it throws an error.
1271+
1272+
Example:
1273+
select unicode_char(x) from y;
1274+
1275+
1276+
-----------
1277+
UNICODE_VAL
1278+
-----------
1279+
1280+
Function:
1281+
Returns the UTF-32 code point of the first character of the specified string.
1282+
1283+
Format:
1284+
UNICODE_VAL( <string> )
1285+
1286+
Notes:
1287+
Returns 0 if the string is empty.
1288+
1289+
Example:
1290+
select unicode_val(x) from y;
1291+
1292+
12581293
------------
12591294
UUID_TO_CHAR
12601295
------------

src/common/keywords.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,8 @@ static const TOK tokens[] =
507507
{TOK_UNBOUNDED, "UNBOUNDED", false},
508508
{TOK_UNCOMMITTED, "UNCOMMITTED", true},
509509
{TOK_UNDO, "UNDO", true},
510+
{TOK_UNICODE_CHAR, "UNICODE_CHAR", true},
511+
{TOK_UNICODE_VAL, "UNICODE_VAL", true},
510512
{TOK_UNION, "UNION", false},
511513
{TOK_UNIQUE, "UNIQUE", false},
512514
{TOK_UNKNOWN, "UNKNOWN", false},

src/dsql/parse.y

+9
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,11 @@ using namespace Firebird;
677677
%token <metaNamePtr> CLEAR
678678
%token <metaNamePtr> OLDEST
679679

680+
// tokens added for Firebird 5.0
681+
682+
%token <metaNamePtr> UNICODE_CHAR
683+
%token <metaNamePtr> UNICODE_VAL
684+
680685
// precedence declarations for expression evaluation
681686

682687
%left OR
@@ -8134,6 +8139,8 @@ system_function_std_syntax
81348139
| TAN
81358140
| TANH
81368141
| TRUNC
8142+
| UNICODE_CHAR
8143+
| UNICODE_VAL
81378144
| UUID_TO_CHAR
81388145
| QUANTIZE
81398146
| TOTALORDER
@@ -9040,6 +9047,8 @@ non_reserved_word
90409047
| TOTALORDER
90419048
| TRAPS
90429049
| ZONE
9050+
| UNICODE_CHAR // added in FB 5.0
9051+
| UNICODE_VAL
90439052
;
90449053

90459054
%%

src/jrd/SysFunction.cpp

+100
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ void setParamsRsaEncrypt(DataTypeUtilBase*, const SysFunction*, int argsCount, d
237237
void setParamsRsaPublic(DataTypeUtilBase*, const SysFunction*, int argsCount, dsc** args);
238238
void setParamsRsaSign(DataTypeUtilBase*, const SysFunction*, int argsCount, dsc** args);
239239
void setParamsRsaVerify(DataTypeUtilBase*, const SysFunction*, int argsCount, dsc** args);
240+
void setParamsUnicodeVal(DataTypeUtilBase* dataTypeUtil, const SysFunction* function, int argsCount, dsc** args);
240241
void setParamsUuidToChar(DataTypeUtilBase* dataTypeUtil, const SysFunction* function, int argsCount, dsc** args);
241242

242243
// generic make functions
@@ -280,6 +281,7 @@ void makeRsaPrivate(DataTypeUtilBase* dataTypeUtil, const SysFunction* function,
280281
void makeRsaPublic(DataTypeUtilBase* dataTypeUtil, const SysFunction* function, dsc* result, int argsCount, const dsc** args);
281282
void makeRsaSign(DataTypeUtilBase* dataTypeUtil, const SysFunction* function, dsc* result, int argsCount, const dsc** args);
282283
void makeTrunc(DataTypeUtilBase* dataTypeUtil, const SysFunction* function, dsc* result, int argsCount, const dsc** args);
284+
void makeUnicodeChar(DataTypeUtilBase* dataTypeUtil, const SysFunction* function, dsc* result, int argsCount, const dsc** args);
283285
void makeUuid(DataTypeUtilBase* dataTypeUtil, const SysFunction* function, dsc* result, int argsCount, const dsc** args);
284286
void makeUuidToChar(DataTypeUtilBase* dataTypeUtil, const SysFunction* function, dsc* result, int argsCount, const dsc** args);
285287

@@ -338,6 +340,8 @@ dsc* evlSign(thread_db* tdbb, const SysFunction* function, const NestValueArray&
338340
dsc* evlSqrt(thread_db* tdbb, const SysFunction* function, const NestValueArray& args, impure_value* impure);
339341
dsc* evlSystemPrivilege(thread_db* tdbb, const SysFunction* function, const NestValueArray& args, impure_value* impure);
340342
dsc* evlTrunc(thread_db* tdbb, const SysFunction* function, const NestValueArray& args, impure_value* impure);
343+
dsc* evlUnicodeChar(thread_db* tdbb, const SysFunction* function, const NestValueArray& args, impure_value* impure);
344+
dsc* evlUnicodeVal(thread_db* tdbb, const SysFunction* function, const NestValueArray& args, impure_value* impure);
341345
dsc* evlUuidToChar(thread_db* tdbb, const SysFunction* function, const NestValueArray& args, impure_value* impure);
342346

343347

@@ -645,6 +649,13 @@ void setParamsDateDiff(DataTypeUtilBase*, const SysFunction*, int argsCount, dsc
645649
}
646650

647651

652+
void setParamsUnicodeVal(DataTypeUtilBase*, const SysFunction*, int argsCount, dsc** args)
653+
{
654+
if (argsCount >= 1 && args[0]->isUnknown())
655+
args[0]->makeText(4, CS_UTF8);
656+
}
657+
658+
648659
void setParamVarying(dsc* param, USHORT textType, bool condition)
649660
{
650661
if (!param)
@@ -1734,6 +1745,24 @@ void makeTrunc(DataTypeUtilBase*, const SysFunction* function, dsc* result,
17341745
}
17351746

17361747

1748+
void makeUnicodeChar(DataTypeUtilBase*, const SysFunction* function, dsc* result,
1749+
int argsCount, const dsc** args)
1750+
{
1751+
fb_assert(argsCount == function->minArgCount);
1752+
1753+
const dsc* value = args[0];
1754+
1755+
if (value->isNull())
1756+
{
1757+
result->makeNullString();
1758+
return;
1759+
}
1760+
1761+
result->makeText(4, ttype_utf8);
1762+
result->setNullable(value->isNullable());
1763+
}
1764+
1765+
17371766
void makeUuid(DataTypeUtilBase*, const SysFunction* function, dsc* result,
17381767
int argsCount, const dsc** args)
17391768
{
@@ -6331,6 +6360,75 @@ dsc* evlSystemPrivilege(thread_db* tdbb, const SysFunction*, const NestValueArra
63316360
return &impure->vlu_desc;
63326361
}
63336362

6363+
6364+
dsc* evlUnicodeChar(thread_db* tdbb, const SysFunction*, const NestValueArray& args,
6365+
impure_value* impure)
6366+
{
6367+
fb_assert(args.getCount() == 1);
6368+
6369+
jrd_req* request = tdbb->getRequest();
6370+
6371+
const dsc* value = EVL_expr(tdbb, request, args[0]);
6372+
if (request->req_flags & req_null) // return NULL if value is NULL
6373+
return NULL;
6374+
6375+
const UChar32 code = MOV_get_long(tdbb, value, 0);
6376+
6377+
if (U8_LENGTH(code) == 0)
6378+
status_exception::raise(Arg::Gds(isc_arith_except) << Arg::Gds(isc_malformed_string));
6379+
6380+
UCHAR buffer[4];
6381+
int len = 0;
6382+
U8_APPEND_UNSAFE(buffer, len, code);
6383+
6384+
dsc result;
6385+
result.makeText(len, ttype_utf8, buffer);
6386+
EVL_make_value(tdbb, &result, impure);
6387+
6388+
return &impure->vlu_desc;
6389+
}
6390+
6391+
6392+
dsc* evlUnicodeVal(thread_db* tdbb, const SysFunction*, const NestValueArray& args,
6393+
impure_value* impure)
6394+
{
6395+
fb_assert(args.getCount() == 1);
6396+
6397+
jrd_req* request = tdbb->getRequest();
6398+
6399+
const dsc* value = EVL_expr(tdbb, request, args[0]);
6400+
if (request->req_flags & req_null) // return NULL if value is NULL
6401+
return NULL;
6402+
6403+
MoveBuffer buffer;
6404+
UCHAR* str;
6405+
int len = MOV_make_string2(tdbb, value, CS_UTF8, &str, buffer);
6406+
6407+
USHORT dst[2];
6408+
USHORT errCode = 0;
6409+
ULONG errPosition;
6410+
ULONG dstLen = UnicodeUtil::utf8ToUtf16(len, str, sizeof(dst), dst, &errCode, &errPosition);
6411+
6412+
if (errCode != 0 && errCode != CS_TRUNCATION_ERROR)
6413+
status_exception::raise(Arg::Gds(isc_arith_except) << Arg::Gds(isc_transliteration_failed));
6414+
6415+
if (dstLen == 0)
6416+
impure->vlu_misc.vlu_long = 0;
6417+
else if (dstLen == 2 || !U_IS_SURROGATE(dst[0]))
6418+
impure->vlu_misc.vlu_long = dst[0];
6419+
else if (dstLen == 4 && U16_IS_LEAD(dst[0]) && U16_IS_TRAIL(dst[1]))
6420+
impure->vlu_misc.vlu_long = U16_GET_SUPPLEMENTARY(dst[0], dst[1]);
6421+
else
6422+
{
6423+
fb_assert(false);
6424+
impure->vlu_misc.vlu_long = 0;
6425+
}
6426+
6427+
impure->vlu_desc.makeLong(0, &impure->vlu_misc.vlu_long);
6428+
6429+
return &impure->vlu_desc;
6430+
}
6431+
63346432
} // anonymous namespace
63356433

63366434

@@ -6417,6 +6515,8 @@ const SysFunction SysFunction::functions[] =
64176515
{"TANH", 1, 1, setParamsDouble, makeDoubleResult, evlStdMath, (void*) trfTanh},
64186516
{"TOTALORDER", 2, 2, setParamsDecFloat, makeShortResult, evlCompare, (void*) funTotalOrd},
64196517
{"TRUNC", 1, 2, setParamsRoundTrunc, makeTrunc, evlTrunc, NULL},
6518+
{"UNICODE_CHAR", 1, 1, setParamsInteger, makeUnicodeChar, evlUnicodeChar, NULL},
6519+
{"UNICODE_VAL", 1, 1, setParamsUnicodeVal, makeLongResult, evlUnicodeVal, NULL},
64206520
{"UUID_TO_CHAR", 1, 1, setParamsUuidToChar, makeUuidToChar, evlUuidToChar, NULL},
64216521
{"", 0, 0, NULL, NULL, NULL, NULL}
64226522
};

0 commit comments

Comments
 (0)