Skip to content

Commit ebde45a

Browse files
stephenLYZofekisr
authored andcommitted
fix(sql): unable to filter text with quotes (apache#17881)
1 parent ae4fb29 commit ebde45a

File tree

5 files changed

+116
-62
lines changed

5 files changed

+116
-62
lines changed

superset/connectors/base/models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ def handle_single_value(value: Optional[FilterValue]) -> Optional[FilterValue]:
398398
):
399399
return datetime.utcfromtimestamp(value / 1000)
400400
if isinstance(value, str):
401-
value = value.strip("\t\n'\"")
401+
value = value.strip("\t\n")
402402

403403
if target_column_type == utils.GenericDataType.NUMERIC:
404404
# For backwards compatibility and edge cases

tests/integration_tests/druid_func_tests.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ def test_get_filters_extracts_values_in_quotes(self):
359359
col = DruidColumn(column_name="A")
360360
column_dict = {"A": col}
361361
res = DruidDatasource.get_filters([filtr], [], column_dict)
362-
self.assertEqual("a", res.filter["filter"]["value"])
362+
self.assertEqual('"a"', res.filter["filter"]["value"])
363363

364364
@unittest.skipUnless(
365365
SupersetTestCase.is_module_installed("pydruid"), "pydruid not installed"

tests/integration_tests/druid_func_tests_sip38.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ def test_get_filters_extracts_values_in_quotes(self):
364364
col = DruidColumn(column_name="A")
365365
column_dict = {"A": col}
366366
res = DruidDatasource.get_filters([filtr], [], column_dict)
367-
self.assertEqual("a", res.filter["filter"]["value"])
367+
self.assertEqual('"a"', res.filter["filter"]["value"])
368368

369369
@unittest.skipUnless(
370370
SupersetTestCase.is_module_installed("pydruid"), "pydruid not installed"

tests/integration_tests/query_context_tests.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ def test_query_response_type(self):
380380
assert re.search(r'[`"\[]?num[`"\]]? IS NOT NULL', sql_text)
381381
assert re.search(
382382
r"""NOT \([`"\[]?name[`"\]]? IS NULL[\s\n]* """
383-
r"""OR [`"\[]?name[`"\]]? IN \('abc'\)\)""",
383+
r"""OR [`"\[]?name[`"\]]? IN \('"abc"'\)\)""",
384384
sql_text,
385385
)
386386

tests/integration_tests/sqla_models_tests.py

+112-58
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
load_birth_names_dashboard_with_slices,
4848
load_birth_names_data,
4949
)
50+
from tests.integration_tests.test_app import app
5051

5152
from .base_tests import SupersetTestCase
5253

@@ -475,80 +476,133 @@ def test_labels_expected_on_mutated_query(self):
475476
db.session.delete(database)
476477
db.session.commit()
477478

478-
def test_values_for_column(self):
479+
480+
@pytest.fixture
481+
def text_column_table():
482+
with app.app_context():
479483
table = SqlaTable(
480-
table_name="test_null_in_column",
484+
table_name="text_column_table",
481485
sql=(
482486
"SELECT 'foo' as foo "
483487
"UNION SELECT '' "
484488
"UNION SELECT NULL "
485-
"UNION SELECT 'null'"
489+
"UNION SELECT 'null' "
490+
"UNION SELECT '\"text in double quotes\"' "
491+
"UNION SELECT '''text in single quotes''' "
492+
"UNION SELECT 'double quotes \" in text' "
493+
"UNION SELECT 'single quotes '' in text' "
486494
),
487495
database=get_example_database(),
488496
)
489497
TableColumn(column_name="foo", type="VARCHAR(255)", table=table)
490498
SqlMetric(metric_name="count", expression="count(*)", table=table)
499+
yield table
491500

492-
# null value, empty string and text should be retrieved
493-
with_null = table.values_for_column(column_name="foo", limit=10000)
494-
assert None in with_null
495-
assert len(with_null) == 4
496501

497-
# null value should be replaced
498-
result_object = table.query(
499-
{
500-
"metrics": ["count"],
501-
"filter": [{"col": "foo", "val": [NULL_STRING], "op": "IN"}],
502-
"is_timeseries": False,
503-
}
504-
)
505-
assert result_object.df["count"][0] == 1
502+
def test_values_for_column_on_text_column(text_column_table):
503+
# null value, empty string and text should be retrieved
504+
with_null = text_column_table.values_for_column(column_name="foo", limit=10000)
505+
assert None in with_null
506+
assert len(with_null) == 8
506507

507-
# also accept None value
508-
result_object = table.query(
509-
{
510-
"metrics": ["count"],
511-
"filter": [{"col": "foo", "val": [None], "op": "IN"}],
512-
"is_timeseries": False,
513-
}
514-
)
515-
assert result_object.df["count"][0] == 1
516508

517-
# empty string should be replaced
518-
result_object = table.query(
519-
{
520-
"metrics": ["count"],
521-
"filter": [{"col": "foo", "val": [EMPTY_STRING], "op": "IN"}],
522-
"is_timeseries": False,
523-
}
524-
)
525-
assert result_object.df["count"][0] == 1
509+
def test_filter_on_text_column(text_column_table):
510+
table = text_column_table
511+
# null value should be replaced
512+
result_object = table.query(
513+
{
514+
"metrics": ["count"],
515+
"filter": [{"col": "foo", "val": [NULL_STRING], "op": "IN"}],
516+
"is_timeseries": False,
517+
}
518+
)
519+
assert result_object.df["count"][0] == 1
526520

527-
# also accept "" string
528-
result_object = table.query(
529-
{
530-
"metrics": ["count"],
531-
"filter": [{"col": "foo", "val": [""], "op": "IN"}],
532-
"is_timeseries": False,
533-
}
534-
)
535-
assert result_object.df["count"][0] == 1
521+
# also accept None value
522+
result_object = table.query(
523+
{
524+
"metrics": ["count"],
525+
"filter": [{"col": "foo", "val": [None], "op": "IN"}],
526+
"is_timeseries": False,
527+
}
528+
)
529+
assert result_object.df["count"][0] == 1
536530

537-
# both replaced
538-
result_object = table.query(
539-
{
540-
"metrics": ["count"],
541-
"filter": [
542-
{
543-
"col": "foo",
544-
"val": [EMPTY_STRING, NULL_STRING, "null", "foo"],
545-
"op": "IN",
546-
}
547-
],
548-
"is_timeseries": False,
549-
}
550-
)
551-
assert result_object.df["count"][0] == 4
531+
# empty string should be replaced
532+
result_object = table.query(
533+
{
534+
"metrics": ["count"],
535+
"filter": [{"col": "foo", "val": [EMPTY_STRING], "op": "IN"}],
536+
"is_timeseries": False,
537+
}
538+
)
539+
assert result_object.df["count"][0] == 1
540+
541+
# also accept "" string
542+
result_object = table.query(
543+
{
544+
"metrics": ["count"],
545+
"filter": [{"col": "foo", "val": [""], "op": "IN"}],
546+
"is_timeseries": False,
547+
}
548+
)
549+
assert result_object.df["count"][0] == 1
550+
551+
# both replaced
552+
result_object = table.query(
553+
{
554+
"metrics": ["count"],
555+
"filter": [
556+
{
557+
"col": "foo",
558+
"val": [EMPTY_STRING, NULL_STRING, "null", "foo"],
559+
"op": "IN",
560+
}
561+
],
562+
"is_timeseries": False,
563+
}
564+
)
565+
assert result_object.df["count"][0] == 4
566+
567+
# should filter text in double quotes
568+
result_object = table.query(
569+
{
570+
"metrics": ["count"],
571+
"filter": [{"col": "foo", "val": ['"text in double quotes"'], "op": "IN",}],
572+
"is_timeseries": False,
573+
}
574+
)
575+
assert result_object.df["count"][0] == 1
576+
577+
# should filter text in single quotes
578+
result_object = table.query(
579+
{
580+
"metrics": ["count"],
581+
"filter": [{"col": "foo", "val": ["'text in single quotes'"], "op": "IN",}],
582+
"is_timeseries": False,
583+
}
584+
)
585+
assert result_object.df["count"][0] == 1
586+
587+
# should filter text with double quote
588+
result_object = table.query(
589+
{
590+
"metrics": ["count"],
591+
"filter": [{"col": "foo", "val": ['double quotes " in text'], "op": "IN",}],
592+
"is_timeseries": False,
593+
}
594+
)
595+
assert result_object.df["count"][0] == 1
596+
597+
# should filter text with single quote
598+
result_object = table.query(
599+
{
600+
"metrics": ["count"],
601+
"filter": [{"col": "foo", "val": ["single quotes ' in text"], "op": "IN",}],
602+
"is_timeseries": False,
603+
}
604+
)
605+
assert result_object.df["count"][0] == 1
552606

553607

554608
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)