Skip to content

Commit 4d56e12

Browse files
Do not hoist expressions from if bodies (MaterializeInc#3395)
Scalar CSE was hoisting common expressions in then and els expressions, without certainty that they would be evaluated. This led to queries like ``` SELECT CASE WHEN b = 0 THEN 0 ELSE 1/b END, CASE WHEN b != 0 THEN 1/b ELSE 0 END FROM x ``` unconditionally evaluating 1/b, because it would be hoisted prematurely. We don't do that now. We could be more advanced and determine the expressions in the intersection of the then and els expressions, but that is future work for now.
1 parent 8b666b7 commit 4d56e12

File tree

5 files changed

+70
-27
lines changed

5 files changed

+70
-27
lines changed

ci/slt/slt.sh

+1
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,6 @@ sqllogictest \
3737
sqllogictest \
3838
-v \
3939
test/sqllogictest/*.slt \
40+
test/sqllogictest/transform/*.slt \
4041
test/sqllogictest/sqlite/test \
4142
| tee -a target/slt.log

ci/test/slt-fast.sh

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export RUST_BACKTRACE=full
2525

2626
tests=(
2727
test/sqllogictest/*.slt
28+
test/sqllogictest/transform/*.slt
2829
# test/sqllogictest/sqlite/test/evidence/in1.test
2930
test/sqllogictest/sqlite/test/evidence/in2.test
3031
test/sqllogictest/sqlite/test/evidence/slt_lang_aggfunc.test

src/transform/src/cse/map.rs

+52-26
Original file line numberDiff line numberDiff line change
@@ -62,39 +62,18 @@ impl Map {
6262
/// This method extracts all expression AST nodes as individual expressions,
6363
/// and builds others out of column references to them, avoiding any duplication.
6464
/// Once complete, expressions which are referred to only once are re-inlined.
65+
///
66+
/// Some care is taken to ensure that `if` expressions only memoize expresions
67+
/// they are certain to evaluate.
6568
fn memoize_and_reuse(
6669
exprs: &mut [ScalarExpr],
6770
input_arity: usize,
6871
) -> (Vec<ScalarExpr>, Vec<usize>) {
6972
let mut projection = (0..input_arity).collect::<Vec<_>>();
7073
let mut scalars = Vec::new();
7174
for expr in exprs.iter_mut() {
72-
expr.visit_mut(&mut |node| {
73-
match node {
74-
ScalarExpr::Column(index) => {
75-
// Column references need to be rewritten, but do not
76-
// need to be memoized.
77-
*index = projection[*index];
78-
}
79-
ScalarExpr::Literal(_, _) => {
80-
// Literals do not need to be memoized.
81-
}
82-
_ => {
83-
if let Some(position) = scalars.iter().position(|e| e == node) {
84-
// Any complex expression that already exists as a prior column can
85-
// be replaced by a reference to that column.
86-
*node = ScalarExpr::Column(input_arity + position);
87-
} else {
88-
// A complex expression that does not exist should be memoized, and
89-
// replaced by a reference to the column.
90-
scalars.push(std::mem::replace(
91-
node,
92-
ScalarExpr::Column(input_arity + scalars.len()),
93-
));
94-
}
95-
}
96-
}
97-
});
75+
// Carefully memoize expressions that will certainly be evaluated.
76+
memoize(expr, &mut scalars, &mut projection, input_arity);
9877

9978
// At this point `expr` should be a column reference or a literal. It may not have
10079
// been added to `scalars` and we should do so if it is a literal to make sure we
@@ -115,6 +94,53 @@ fn memoize_and_reuse(
11594
(scalars, projection)
11695
}
11796

97+
/// Visit and memoize expression nodes.
98+
///
99+
/// Importantly, we should not memoize expressions that may not be excluded by virtue of
100+
/// being guarded by `if` expressions.
101+
fn memoize(
102+
expr: &mut ScalarExpr,
103+
scalars: &mut Vec<ScalarExpr>,
104+
projection: &mut Vec<usize>,
105+
input_arity: usize,
106+
) {
107+
match expr {
108+
ScalarExpr::Column(index) => {
109+
// Column references need to be rewritten, but do not need to be memoized.
110+
*index = projection[*index];
111+
}
112+
ScalarExpr::Literal(_, _) => {
113+
// Literals do not need to be memoized.
114+
}
115+
_ => {
116+
// We should not eagerly memoize `if` branches that might not be taken.
117+
// TODO: Memoize expressions in the intersection of `then` and `els`.
118+
if let ScalarExpr::If {
119+
cond,
120+
then: _,
121+
els: _,
122+
} = expr
123+
{
124+
memoize(cond, scalars, projection, input_arity);
125+
} else {
126+
expr.visit1_mut(|e| memoize(e, scalars, projection, input_arity));
127+
}
128+
if let Some(position) = scalars.iter().position(|e| e == expr) {
129+
// Any complex expression that already exists as a prior column can
130+
// be replaced by a reference to that column.
131+
*expr = ScalarExpr::Column(input_arity + position);
132+
} else {
133+
// A complex expression that does not exist should be memoized, and
134+
// replaced by a reference to the column.
135+
scalars.push(std::mem::replace(
136+
expr,
137+
ScalarExpr::Column(input_arity + scalars.len()),
138+
));
139+
}
140+
}
141+
}
142+
}
143+
118144
/// Replaces column references that occur once with the referenced expression.
119145
///
120146
/// This method in-lines expressions that are only referenced once, and then

src/transform/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,9 @@ impl Default for Optimizer {
252252
Box::new(crate::map_lifting::LiteralLifting),
253253
],
254254
}),
255-
Box::new(crate::fusion::project::Project),
256255
Box::new(crate::reduction_pushdown::ReductionPushdown),
257256
Box::new(crate::cse::map::Map),
257+
Box::new(crate::fusion::project::Project),
258258
Box::new(crate::reduction::FoldConstants),
259259
];
260260
Self { transforms }

test/sqllogictest/transform/scalar_cse.slt

+15
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,18 @@ FROM x
6060
| Project (#4, #5, #7, #8)
6161

6262
EOF
63+
64+
65+
# Ensure we don't inline if-guarded expressions
66+
query T multiline
67+
EXPLAIN PLAN FOR SELECT
68+
CASE WHEN b = 0 THEN 0 ELSE 1/b END,
69+
CASE WHEN b != 0 THEN 1/b ELSE 0 END
70+
FROM x
71+
----
72+
%0 =
73+
| Get materialize.public.x (u1)
74+
| Map if (#1 = 0) then {0} else {(1 / #1)}, if (#1 != 0) then {(1 / #1)} else {0}
75+
| Project (#2, #3)
76+
77+
EOF

0 commit comments

Comments
 (0)