Skip to content

Commit 42232b2

Browse files
authored
Merge pull request #1728 from dolthub/james/cte3
hashjoin indexing
2 parents f515f22 + b85582c commit 42232b2

File tree

9 files changed

+383
-31
lines changed

9 files changed

+383
-31
lines changed

enginetest/queries/queries.go

+88
Original file line numberDiff line numberDiff line change
@@ -7522,6 +7522,71 @@ SELECT * FROM my_cte;`,
75227522
{2},
75237523
},
75247524
},
7525+
{
7526+
// Original Issue: https://github.com/dolthub/dolt/issues/5714
7527+
Query: `
7528+
7529+
SELECT COUNT(*)
7530+
FROM keyless
7531+
WHERE keyless.c0 IN (
7532+
7533+
WITH RECURSIVE cte(depth, i, j) AS (
7534+
SELECT 0, T1.c0, T1.c1
7535+
FROM keyless T1
7536+
WHERE T1.c0 = 0
7537+
7538+
UNION ALL
7539+
7540+
SELECT cte.depth + 1, cte.i, T2.c1 + 1
7541+
FROM cte, keyless T2
7542+
WHERE cte.depth = T2.c0
7543+
)
7544+
7545+
SELECT U0.c0
7546+
FROM keyless U0, cte
7547+
WHERE cte.j = keyless.c0
7548+
7549+
)
7550+
ORDER BY c0;
7551+
;`,
7552+
7553+
Expected: []sql.Row{
7554+
{4},
7555+
},
7556+
},
7557+
{
7558+
// Original Issue: https://github.com/dolthub/dolt/issues/5714
7559+
// Similar, but this time the subquery is on the left
7560+
Query: `
7561+
7562+
SELECT COUNT(*)
7563+
FROM keyless
7564+
WHERE keyless.c0 IN (
7565+
7566+
WITH RECURSIVE cte(depth, i, j) AS (
7567+
SELECT 0, T1.c0, T1.c1
7568+
FROM keyless T1
7569+
WHERE T1.c0 = 0
7570+
7571+
UNION ALL
7572+
7573+
SELECT cte.depth + 1, cte.i, T2.c1 + 1
7574+
FROM cte, keyless T2
7575+
WHERE cte.depth = T2.c0
7576+
)
7577+
7578+
SELECT U0.c0
7579+
FROM cte, keyless U0
7580+
WHERE cte.j = keyless.c0
7581+
7582+
)
7583+
ORDER BY c0;
7584+
;`,
7585+
7586+
Expected: []sql.Row{
7587+
{4},
7588+
},
7589+
},
75257590
}
75267591

75277592
var KeylessQueries = []QueryTest{
@@ -7946,6 +8011,29 @@ var BrokenQueries = []QueryTest{
79468011
Query: "select 2000.0 / 250000000.0 * (24.0 * 6.0 * 6.25 * 10.0);",
79478012
Expected: []sql.Row{{"0.0720000000"}},
79488013
},
8014+
{
8015+
// This panics
8016+
// The non-recursive part of the UNION ALL returns too many rows, causing index out of bounds errors
8017+
// Without the join on mytable and cte, this error is caught
8018+
Query: `
8019+
WITH RECURSIVE cte(i, j) AS (
8020+
SELECT 0, 1, 2
8021+
FROM mytable
8022+
8023+
UNION ALL
8024+
8025+
SELECT *
8026+
FROM mytable, cte
8027+
WHERE cte.i = mytable.i
8028+
)
8029+
SELECT *
8030+
FROM mytable;`,
8031+
Expected: []sql.Row{
8032+
{1, "first row"},
8033+
{2, "second row"},
8034+
{3, "third row"},
8035+
},
8036+
},
79498037
}
79508038

79518039
var VersionedQueries = []QueryTest{

enginetest/queries/query_plans.go

+156
Original file line numberDiff line numberDiff line change
@@ -7694,6 +7694,162 @@ With c as (
76947694
" └─ columns: [i]\n" +
76957695
"",
76967696
},
7697+
{
7698+
Query: `
7699+
SELECT COUNT(*)
7700+
FROM keyless
7701+
WHERE keyless.c0 IN (
7702+
WITH RECURSIVE cte(depth, i, j) AS (
7703+
SELECT 0, T1.c0, T1.c1
7704+
FROM keyless T1
7705+
WHERE T1.c0 = 0
7706+
7707+
UNION ALL
7708+
7709+
SELECT cte.depth + 1, cte.i, T2.c1 + 1
7710+
FROM cte, keyless T2
7711+
WHERE cte.depth = T2.c0
7712+
)
7713+
7714+
SELECT U0.c0
7715+
FROM keyless U0, cte
7716+
WHERE cte.j = keyless.c0
7717+
);`,
7718+
ExpectedPlan: "Project\n" +
7719+
" ├─ columns: [COUNT(1):0!null as COUNT(*)]\n" +
7720+
" └─ GroupBy\n" +
7721+
" ├─ select: COUNT(1 (bigint))\n" +
7722+
" ├─ group: \n" +
7723+
" └─ Filter\n" +
7724+
" ├─ InSubquery\n" +
7725+
" │ ├─ left: keyless.c0:0\n" +
7726+
" │ └─ right: Subquery\n" +
7727+
" │ ├─ cacheable: false\n" +
7728+
" │ └─ Project\n" +
7729+
" │ ├─ columns: [U0.c0:2]\n" +
7730+
" │ └─ Filter\n" +
7731+
" │ ├─ Eq\n" +
7732+
" │ │ ├─ cte.j:5\n" +
7733+
" │ │ └─ keyless.c0:0\n" +
7734+
" │ └─ CrossJoin\n" +
7735+
" │ ├─ TableAlias(U0)\n" +
7736+
" │ │ └─ Table\n" +
7737+
" │ │ ├─ name: keyless\n" +
7738+
" │ │ └─ columns: [c0]\n" +
7739+
" │ └─ SubqueryAlias\n" +
7740+
" │ ├─ name: cte\n" +
7741+
" │ ├─ outerVisibility: true\n" +
7742+
" │ ├─ cacheable: true\n" +
7743+
" │ └─ RecursiveCTE\n" +
7744+
" │ └─ Union all\n" +
7745+
" │ ├─ Project\n" +
7746+
" │ │ ├─ columns: [0 (tinyint), T1.c0:2, T1.c1:3]\n" +
7747+
" │ │ └─ Filter\n" +
7748+
" │ │ ├─ Eq\n" +
7749+
" │ │ │ ├─ T1.c0:2\n" +
7750+
" │ │ │ └─ 0 (tinyint)\n" +
7751+
" │ │ └─ TableAlias(T1)\n" +
7752+
" │ │ └─ Table\n" +
7753+
" │ │ ├─ name: keyless\n" +
7754+
" │ │ └─ columns: [c0 c1]\n" +
7755+
" │ └─ Project\n" +
7756+
" │ ├─ columns: [(cte.depth:3!null + 1 (tinyint)), cte.i:4, (T2.c1:7 + 1 (tinyint))]\n" +
7757+
" │ └─ HashJoin\n" +
7758+
" │ ├─ Eq\n" +
7759+
" │ │ ├─ cte.depth:3!null\n" +
7760+
" │ │ └─ T2.c0:6\n" +
7761+
" │ ├─ RecursiveTable(cte)\n" +
7762+
" │ └─ HashLookup\n" +
7763+
" │ ├─ source: TUPLE(cte.depth:3!null)\n" +
7764+
" │ ├─ target: TUPLE(T2.c0:2)\n" +
7765+
" │ └─ CachedResults\n" +
7766+
" │ └─ TableAlias(T2)\n" +
7767+
" │ └─ Table\n" +
7768+
" │ ├─ name: keyless\n" +
7769+
" │ └─ columns: [c0 c1]\n" +
7770+
" └─ Table\n" +
7771+
" ├─ name: keyless\n" +
7772+
" └─ columns: [c0 c1]\n" +
7773+
"",
7774+
},
7775+
{
7776+
Query: `
7777+
SELECT COUNT(*)
7778+
FROM keyless
7779+
WHERE keyless.c0 IN (
7780+
WITH RECURSIVE cte(depth, i, j) AS (
7781+
SELECT 0, T1.c0, T1.c1
7782+
FROM keyless T1
7783+
WHERE T1.c0 = 0
7784+
7785+
UNION ALL
7786+
7787+
SELECT cte.depth + 1, cte.i, T2.c1 + 1
7788+
FROM cte, keyless T2
7789+
WHERE cte.depth = T2.c0
7790+
)
7791+
7792+
SELECT U0.c0
7793+
FROM cte, keyless U0
7794+
WHERE cte.j = keyless.c0
7795+
);`,
7796+
ExpectedPlan: "Project\n" +
7797+
" ├─ columns: [COUNT(1):0!null as COUNT(*)]\n" +
7798+
" └─ GroupBy\n" +
7799+
" ├─ select: COUNT(1 (bigint))\n" +
7800+
" ├─ group: \n" +
7801+
" └─ Filter\n" +
7802+
" ├─ InSubquery\n" +
7803+
" │ ├─ left: keyless.c0:0\n" +
7804+
" │ └─ right: Subquery\n" +
7805+
" │ ├─ cacheable: false\n" +
7806+
" │ └─ Project\n" +
7807+
" │ ├─ columns: [U0.c0:5]\n" +
7808+
" │ └─ Filter\n" +
7809+
" │ ├─ Eq\n" +
7810+
" │ │ ├─ cte.j:4\n" +
7811+
" │ │ └─ keyless.c0:0\n" +
7812+
" │ └─ CrossJoin\n" +
7813+
" │ ├─ SubqueryAlias\n" +
7814+
" │ │ ├─ name: cte\n" +
7815+
" │ │ ├─ outerVisibility: true\n" +
7816+
" │ │ ├─ cacheable: true\n" +
7817+
" │ │ └─ RecursiveCTE\n" +
7818+
" │ │ └─ Union all\n" +
7819+
" │ │ ├─ Project\n" +
7820+
" │ │ │ ├─ columns: [0 (tinyint), T1.c0:2, T1.c1:3]\n" +
7821+
" │ │ │ └─ Filter\n" +
7822+
" │ │ │ ├─ Eq\n" +
7823+
" │ │ │ │ ├─ T1.c0:2\n" +
7824+
" │ │ │ │ └─ 0 (tinyint)\n" +
7825+
" │ │ │ └─ TableAlias(T1)\n" +
7826+
" │ │ │ └─ Table\n" +
7827+
" │ │ │ ├─ name: keyless\n" +
7828+
" │ │ │ └─ columns: [c0 c1]\n" +
7829+
" │ │ └─ Project\n" +
7830+
" │ │ ├─ columns: [(cte.depth:2!null + 1 (tinyint)), cte.i:3, (T2.c1:6 + 1 (tinyint))]\n" +
7831+
" │ │ └─ HashJoin\n" +
7832+
" │ │ ├─ Eq\n" +
7833+
" │ │ │ ├─ cte.depth:2!null\n" +
7834+
" │ │ │ └─ T2.c0:5\n" +
7835+
" │ │ ├─ RecursiveTable(cte)\n" +
7836+
" │ │ └─ HashLookup\n" +
7837+
" │ │ ├─ source: TUPLE(cte.depth:2!null)\n" +
7838+
" │ │ ├─ target: TUPLE(T2.c0:2)\n" +
7839+
" │ │ └─ CachedResults\n" +
7840+
" │ │ └─ TableAlias(T2)\n" +
7841+
" │ │ └─ Table\n" +
7842+
" │ │ ├─ name: keyless\n" +
7843+
" │ │ └─ columns: [c0 c1]\n" +
7844+
" │ └─ TableAlias(U0)\n" +
7845+
" │ └─ Table\n" +
7846+
" │ ├─ name: keyless\n" +
7847+
" │ └─ columns: [c0]\n" +
7848+
" └─ Table\n" +
7849+
" ├─ name: keyless\n" +
7850+
" └─ columns: [c0 c1]\n" +
7851+
"",
7852+
},
76977853
}
76987854

76997855
// QueryPlanTODOs are queries where the query planner produces a correct (results) but suboptimal plan.

sql/analyzer/analyzer.go

-1
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,6 @@ func postPrepareRuleSelector(id RuleId) bool {
528528
stripTableNameInDefaultsId,
529529
resolvePreparedInsertId,
530530
finalizeSubqueriesId,
531-
finalizeUnionsId,
532531

533532
// DefaultValidationRules
534533
validateResolvedId,

sql/analyzer/exec_builder.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,11 @@ func (b *ExecBuilder) buildHashJoin(j *hashJoin, input sql.Schema, children ...s
180180
if err != nil {
181181
return nil, err
182182
}
183-
outerAttrs, err := b.buildFilters(j.g.m.scope, j.right.relProps.OutputCols(), expression.Tuple(j.outerAttrs))
183+
tmpScope := j.g.m.scope
184+
if tmpScope != nil {
185+
tmpScope = tmpScope.newScopeNoJoin()
186+
}
187+
outerAttrs, err := b.buildFilters(tmpScope, j.right.relProps.OutputCols(), expression.Tuple(j.outerAttrs))
184188
if err != nil {
185189
return nil, err
186190
}

sql/analyzer/indexed_joins.go

+1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ func inOrderReplanJoin(
117117

118118
// two different base cases, depending on whether we reorder or not
119119
if reorder {
120+
scope.SetJoin(true)
120121
ret, err := replanJoin(ctx, j, a, scope)
121122
if err != nil {
122123
return nil, transform.SameTree, fmt.Errorf("failed to replan join: %w", err)

sql/analyzer/pushdown.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -953,7 +953,8 @@ func convertIsNullForIndexes(ctx *sql.Context, e sql.Expression) sql.Expression
953953
// pushdownFixIndices fixes field indices for non-join expressions (replanJoin
954954
// is responsible for join filters and conditions.)
955955
func pushdownFixIndices(a *Analyzer, n sql.Node, scope *Scope) (sql.Node, transform.TreeIdentity, error) {
956-
if _, ok := n.(*plan.JoinNode); ok {
956+
switch n := n.(type) {
957+
case *plan.JoinNode, *plan.HashLookup:
957958
return n, transform.SameTree, nil
958959
}
959960
return FixFieldIndexesForExpressions(a, n, scope)

0 commit comments

Comments
 (0)