Skip to content

Postgresql boolean (t/f) JSON parsing #443

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
WTM-Jarryd opened this issue Mar 16, 2023 · 1 comment
Closed

Postgresql boolean (t/f) JSON parsing #443

WTM-Jarryd opened this issue Mar 16, 2023 · 1 comment

Comments

@WTM-Jarryd
Copy link

Hello,

This isn't so much of an issue, but rather an addition to the JSON parser for GetJSONFieldOrObjectOrArray (Ln 57200) and GetJSONField (Ln 56833).

I am using mORMot v1.18 (I don't have an exact build number, but I have compared against latest SynCommons file).

I was having difficulty handling boolean values returned from Postgresql 10.19.
PG was returning booleans as t or f

The values were not wrapped in quotes in the JSON so they were not being handled as text.

I have modified my local SynCommons.pas to parse these t / f values the same way true / false are parsed.
Apologies for the full method posts. I'm not sure how to submit it otherwise.
The changes are near to the bottom sections of the methods. Comments start with // added

I hope this helps others with handling Postgresql integration.

GetJSONField

// decode a JSON field into an UTF-8 encoded buffer, stored inplace of input buffer
function GetJSONField(P: PUTF8Char; out PDest: PUTF8Char;
  wasString: PBoolean; EndOfObject: PUTF8Char; Len: PInteger): PUTF8Char;
var D: PUTF8Char;
    c4,surrogate,j: integer;
    c: AnsiChar;
    b: byte;
    jsonset: PJsonCharSet;
    {$ifdef CPUX86NOTPIC} tab: TNormTableByte absolute ConvertHexToBin;
    {$else} tab: PNormTableByte; {$endif}
label slash,num,lit;
begin // see http://www.ietf.org/rfc/rfc4627.txt
  if wasString<>nil then
    wasString^ := false; // not a string by default
  if Len<>nil then
    Len^ := 0; // avoid buffer overflow on parsing error
  PDest := nil; // PDest=nil indicates parsing error (e.g. unexpected #0 end)
  result := nil;
  if P=nil then exit;
  if P^<=' ' then repeat inc(P); if P^=#0 then exit; until P^>' ';
  case P^ of
  '"': begin // " -> unescape P^ into D^
    if wasString<>nil then
      wasString^ := true;
    inc(P);
    result := P;
    D := P;
    repeat
      c := P^;
      if c=#0 then exit else
      if c='"' then break else
      if c='\' then goto slash;
      inc(P);
      D^ := c;
      inc(D);
      continue;
slash:inc(P); // unescape JSON string
      c := P^;
      if (c='"') or (c='\') then begin
lit:    inc(P);
        D^ := c; // most common case
        inc(D);
        continue;
      end else
      if c=#0 then
        exit else // to avoid potential buffer overflow issue on \#0
      if c='b' then
        c := #8 else
      if c='t' then
        c := #9 else
      if c='n' then
        c := #10 else
      if c='f' then
        c := #12 else
      if c='r' then
        c := #13 else
      if c='u' then begin
        // inlined decoding of '\u0123' UTF-16 codepoint(s) into UTF-8
        {$ifndef CPUX86NOTPIC}tab := @ConvertHexToBin;{$endif}
        c4 := tab[ord(P[1])];
        if c4<=15 then begin
          b := tab[ord(P[2])];
          if b<=15 then begin
            c4 := c4 shl 4;
            c4 := c4 or b;
            b := tab[ord(P[3])];
            if b<=15 then begin
              c4 := c4 shl 4;
              c4 := c4 or b;
              b := tab[ord(P[4])];
              if b<=15 then begin
                c4 := c4 shl 4;
                c4 := c4 or b;
                case c4 of
                0: begin
                  D^ := '?'; // \u0000 is an invalid value
                  inc(D);
                end;
                1..$7f: begin
                  D^ := AnsiChar(c4);
                  inc(D);
                end;
                $80..$7ff: begin
                  D[0] := AnsiChar($C0 or (c4 shr 6));
                  D[1] := AnsiChar($80 or (c4 and $3F));
                  inc(D,2);
                end;
                UTF16_HISURROGATE_MIN..UTF16_LOSURROGATE_MAX:
                  if PWord(P+5)^=ord('\')+ord('u') shl 8 then begin
                    inc(P,6); // optimistic conversion (no check)
                    surrogate := (ConvertHexToBin[ord(P[1])] shl 12)+
                                 (ConvertHexToBin[ord(P[2])] shl 8)+
                                 (ConvertHexToBin[ord(P[3])] shl 4)+
                                  ConvertHexToBin[ord(P[4])];
                    case c4 of // inlined UTF16CharToUtf8()
                    UTF16_HISURROGATE_MIN..UTF16_HISURROGATE_MAX:
                      c4 := ((c4-$D7C0)shl 10)+(surrogate xor UTF16_LOSURROGATE_MIN);
                    UTF16_LOSURROGATE_MIN..UTF16_LOSURROGATE_MAX:
                      c4 := ((surrogate-$D7C0)shl 10)+(c4 xor UTF16_LOSURROGATE_MIN);
                    end;
                    case c4 of
                    0..$7ff: b := 2;
                    $800..$ffff: b := 3;
                    $10000..$1FFFFF: b := 4;
                    $200000..$3FFFFFF: b := 5;
                    else b := 6;
                    end;
                    for j := b-1 downto 1 do begin
                      D[j] := AnsiChar((c4 and $3f)+$80);
                      c4 := c4 shr 6;
                    end;
                    D^ := AnsiChar(Byte(c4) or UTF8_FIRSTBYTE[b]);
                    inc(D,b);
                  end else begin
                    D^ := '?'; // unexpected surrogate without its pair
                    inc(D);
                  end;
                else begin
                  D[0] := AnsiChar($E0 or (c4 shr 12));
                  D[1] := AnsiChar($80 or ((c4 shr 6) and $3F));
                  D[2] := AnsiChar($80 or (c4 and $3F));
                  inc(D,3);
                end;
                end;
                inc(P,5);
                continue;
                end;
              end;
            end;
          end;
          c := '?'; // bad formated hexa number -> '?0123'
        end;
      goto lit;
    until false;
    // here P^='"'
    D^ := #0; // make zero-terminated
    if Len<>nil then
      Len^ := D-result;
    inc(P);
    if P^=#0 then
      exit;
  end;
  '0':
    if P[1] in ['0'..'9'] then // 0123 excluded by JSON!
      exit else // leave PDest=nil for unexpected end
      goto num;// may be 0.123
  '-','1'..'9': begin // numerical field: all chars before end of field
num:result := P;
    jsonset := @JSON_CHARS;
    repeat
      if not (jcDigitFloatChar in jsonset[P^]) then
        break;
      inc(P);
    until false;
    if P^=#0 then
      exit;
    if Len<>nil then
      Len^ := P-result;
    if P^<=' ' then begin
      P^ := #0; // force numerical field with no trailing ' '
      inc(P);
    end;
  end;
  'n':
    if (PInteger(P)^=NULL_LOW) and (jcEndOfJSONValueField in JSON_CHARS[P[4]])  then begin
      result := nil; // null -> returns nil and wasString=false
      if Len<>nil then
        Len^ := 0; // when result is converted to string
      inc(P,4);
    end else
      exit;
  'f':
    if (PInteger(P+1)^=FALSE_LOW2) and (jcEndOfJSONValueField in JSON_CHARS[P[5]]) then begin
      result := P; // false -> returns 'false' and wasString=false
      if Len<>nil then
        Len^ := 5;
      inc(P,5);
    end else
    if (jcEndOfJSONValueField in JSON_CHARS[P[1]]) then begin // added parsing for singular unquoted 'f' boolean value
      Result := P;
      if Len<>nil then
        Len^ := 1;
      inc(P, 1);
    end else
      exit;
  't':
    if (PInteger(P)^=TRUE_LOW) and (jcEndOfJSONValueField in JSON_CHARS[P[4]]) then begin
      result := P; // true -> returns 'true' and wasString=false
      if Len<>nil then
        Len^ := 4;
      inc(P,4);
    end else
    if (jcEndOfJSONValueField in JSON_CHARS[P[1]]) then begin // added parsing for singular unquoted 't' boolean value
      Result := P;
      if Len<>nil then
        Len^ := 1;
      inc(P, 1);
    end else
      exit;
  else
    exit; // PDest=nil to indicate error
  end;
  jsonset := @JSON_CHARS;
  while not (jcEndOfJSONField in jsonset[P^]) do begin
    if P^=#0 then
      exit; // leave PDest=nil for unexpected end
    inc(P);
  end;
  if EndOfObject<>nil then
    EndOfObject^ := P^;
  P^ := #0; // make zero-terminated
  PDest := @P[1];
  if P[1]=#0 then
    PDest := nil;
end;

GetJSONFieldOrObjectOrArray

function GetJSONFieldOrObjectOrArray(var P: PUTF8Char; wasString: PBoolean;
  EndOfObject: PUTF8Char; HandleValuesAsObjectOrArray: Boolean;
  NormalizeBoolean: Boolean; Len: PInteger): PUTF8Char;
var Value: PUTF8Char;
    wStr: boolean;
begin
  result := nil;
  if P=nil then
    exit;
  while ord(P^) in [1..32] do inc(P);
  if HandleValuesAsObjectOrArray and (P^ in ['{','[']) then begin
    Value := P;
    P := GotoNextJSONObjectOrArray(P);
    if P=nil then
      exit; // invalid content
    if Len<>nil then
      Len^ := P-Value;
    if wasString<>nil then
      wasString^ := false; // was object or array
    while ord(P^) in [1..32] do inc(P);
    if EndOfObject<>nil then
     EndOfObject^ := P^;
    P^ := #0; // make zero-terminated
    if P[1]=#0 then
      P := nil else
      inc(P);
    result := Value;
  end else begin
    result := GetJSONField(P,P,@wStr,EndOfObject,Len);
    if wasString<>nil then
      wasString^ := wStr;
    if not wStr and NormalizeBoolean and (result<>nil) then begin
      if (PInteger(result)^=TRUE_LOW) or (result^='t') then // added check for 't' char boolean normalisation
        result := pointer(SmallUInt32UTF8[1]) else   // normalize true -> 1
      if (PInteger(result)^=FALSE_LOW) or (result^='f') then // added check for 'f' char boolean normalisation
        result := pointer(SmallUInt32UTF8[0]) else   // normalize false -> 0
        exit;
      if Len<>nil then
        Len^ := 1;
    end;
  end;
end;
@synopse
Copy link
Owner

synopse commented Mar 16, 2023

IMHO this is not the way to fix the problem.
t and f are no valid JSON values.
The fix should be not in the JSON parsing, but in the JSON emitting.

How is the JSON generated from PostgreSQL?

Please use the forum for discussion on this subject. I will close this issue by now.

@synopse synopse closed this as completed Mar 16, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants