andialbrecht · Oct 8, 2020 · Oct 8, 2020 · Oct 19, 2020 · Oct 19, 2020 · Dec 12, 2020
Showing with 69 additions and 8 deletions.

+1 −0 AUTHORS

+24 −0 CHANGELOG

+1 −1 sqlparse/__init__.py

+4 −1 sqlparse/filters/others.py

+5 −2 sqlparse/keywords.py

+17 −0 tests/test_format.py

+10 −4 tests/test_parse.py

+7 −0 tests/test_regressions.py
diff --git a/AUTHORS b/AUTHORS
@@ -30,6 +30,7 @@ Alphabetical list of contributors:
 * hurcy <cinyoung.hur@gmail.com>
 * Ian Robertson <ian.robertson@capitalone.com>
 * JacekPliszka <Jacek.Pliszka@gmail.com>
+* Jean-Martin Archer <jm@jmartin.ca>
 * Jesús Leganés Combarro "Piranna" <piranna@gmail.com>
 * Johannes Hoff <johshoff@gmail.com>
 * John Bodley <john.bodley@airbnb.com>

diff --git a/CHANGELOG b/CHANGELOG
@@ -1,7 +1,31 @@
+Release 0.4.2 (Sep 10, 2021)
+----------------------------
+
+Notable Changes
+
+* IMPORTANT: This release fixes a security vulnerability in the
+  strip comments filter. In this filter a regular expression that was
+  vulnerable to ReDOS (Regular Expression Denial of Service) was
+  used. See the security advisory for details: https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-p5w8-wqhj-9hhf
+  The vulnerability was discovered by @erik-krogh and @yoff from
+  GitHub Security Lab (GHSL). Thanks for reporting!
+
+Enhancements
+
+* Add ELSIF as keyword (issue584).
+* Add CONFLICT and ON_ERROR_STOP keywords (pr595, by j-martin).
+
+Bug Fixes
+
+* Fix parsing of backticks (issue588).
+* Fix parsing of scientific number (issue399).
+
+
 Release 0.4.1 (Oct 08, 2020)
 ----------------------------
 
 Bug Fixes
+
 * Just removed a debug print statement, sorry...
 
 

diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
@@ -16,7 +16,7 @@
 from sqlparse import formatter
 
 
-__version__ = '0.4.1'
+__version__ = '0.4.2'
 __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli']
 
 

diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py
@@ -22,7 +22,10 @@ def get_next_comment():
         def _get_insert_token(token):
             """Returns either a whitespace or the line breaks from token."""
             # See issue484 why line breaks should be preserved.
-            m = re.search(r'((\r\n|\r|\n)+) *$', token.value)
+            # Note: The actual value for a line break is replaced by \n
+            # in SerializerUnicode which will be executed in the
+            # postprocessing state.
+            m = re.search(r'((\r|\n)+) *$', token.value)
             if m is not None:
                 return sql.Token(T.Whitespace.Newline, m.groups()[0])
             else:

diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
@@ -62,7 +62,7 @@ def is_keyword(value):
         (r'(?<=\.)[A-ZÀ-Ü]\w*', tokens.Name),  # .'Name'
         (r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name),  # side effect: change kw to func
         (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal),
-        (r'-?\d*(\.\d+)?E-?\d+', tokens.Number.Float),
+        (r'-?\d+(\.\d+)?E-?\d+', tokens.Number.Float),
         (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])',
          tokens.Number.Float),
         (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer),
@@ -93,7 +93,7 @@ def is_keyword(value):
         (r'[0-9_A-ZÀ-Ü][_$#\w]*', is_keyword),
         (r'[;:()\[\],\.]', tokens.Punctuation),
         (r'[<>=~!]+', tokens.Operator.Comparison),
-        (r'[+/@#%^&|`?^-]+', tokens.Operator),
+        (r'[+/@#%^&|^-]+', tokens.Operator),
     ]}
 
 FLAGS = re.IGNORECASE | re.UNICODE
@@ -745,6 +745,7 @@ def is_keyword(value):
     'DOUBLE': tokens.Keyword,
     'DUMP': tokens.Keyword,
 
+    'ELSIF': tokens.Keyword,
     'EVENTS': tokens.Keyword,
     'EXCEPTIONS': tokens.Keyword,
     'EXPLAIN': tokens.Keyword,
@@ -833,6 +834,7 @@ def is_keyword(value):
 
 # PostgreSQL Syntax
 KEYWORDS_PLPGSQL = {
+    'CONFLICT': tokens.Keyword,
     'WINDOW': tokens.Keyword,
     'PARTITION': tokens.Keyword,
     'OVER': tokens.Keyword,
@@ -841,6 +843,7 @@ def is_keyword(value):
     'PLPGSQL': tokens.Keyword,
     'INHERIT': tokens.Keyword,
     'INDEXES': tokens.Keyword,
+    'ON_ERROR_STOP': tokens.Keyword,
 
     'BYTEA': tokens.Keyword,
     'BIGSERIAL': tokens.Keyword,

diff --git a/tests/test_format.py b/tests/test_format.py
@@ -84,6 +84,23 @@ def test_strip_comments_multi(self):
         res = sqlparse.format(sql, strip_comments=True)
         assert res == 'select (select 2)'
 
+    def test_strip_comments_preserves_linebreak(self):
+        sql = 'select * -- a comment\r\nfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        assert res == 'select *\nfrom foo'
+        sql = 'select * -- a comment\nfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        assert res == 'select *\nfrom foo'
+        sql = 'select * -- a comment\rfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        assert res == 'select *\nfrom foo'
+        sql = 'select * -- a comment\r\n\r\nfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        assert res == 'select *\n\nfrom foo'
+        sql = 'select * -- a comment\n\nfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        assert res == 'select *\n\nfrom foo'
+
     def test_strip_ws(self):
         f = lambda sql: sqlparse.format(sql, strip_whitespace=True)
         s = 'select\n* from      foo\n\twhere  ( 1 = 2 )\n'

diff --git a/tests/test_parse.py b/tests/test_parse.py
@@ -188,11 +188,16 @@ def test_placeholder(ph):
     assert p[0].ttype is T.Name.Placeholder
 
 
-@pytest.mark.parametrize('num', ['6.67428E-8', '1.988e33', '1e-12'])
-def test_scientific_numbers(num):
+@pytest.mark.parametrize('num, expected', [
+    ('6.67428E-8', T.Number.Float),
+    ('1.988e33', T.Number.Float),
+    ('1e-12', T.Number.Float),
+    ('e1', None),
+])
+def test_scientific_numbers(num, expected):
     p = sqlparse.parse(num)[0].tokens
     assert len(p) == 1
-    assert p[0].ttype is T.Number.Float
+    assert p[0].ttype is expected
 
 
 def test_single_quotes_are_strings():
@@ -336,7 +341,8 @@ def test_pprint():
         "|  |  `- 0 Name 'd0'",
         "|  |- 10 Punctuation ','",
         "|  |- 11 Whitespace ' '",
-        "|  `- 12 Float 'e0'",
+        "|  `- 12 Identifier 'e0'",
+        "|     `- 0 Name 'e0'",
         "|- 3 Whitespace ' '",
         "|- 4 Keyword 'from'",
         "|- 5 Whitespace ' '",

diff --git a/tests/test_regressions.py b/tests/test_regressions.py
@@ -411,3 +411,10 @@ def test_format_invalid_where_clause():
     # did raise ValueError
     formatted = sqlparse.format('where, foo', reindent=True)
     assert formatted == 'where, foo'
+
+
+def test_splitting_at_and_backticks_issue588():
+    splitted = sqlparse.split(
+        'grant foo to user1@`myhost`; grant bar to user1@`myhost`;')
+    assert len(splitted) == 2
+    assert splitted[-1] == 'grant bar to user1@`myhost`;'