Skip to content

Commit e3a1717

Browse files
committed
RCTE
1 parent d043dcd commit e3a1717

File tree

2 files changed

+136
-129
lines changed

2 files changed

+136
-129
lines changed

Patterns RCTEs Modify.md

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
---
2+
layout: default
3+
title: Recursive CTEs
4+
nav_order: 9
5+
parent: Design Patterns
6+
permalink: /patterns/rcte-modify
7+
---
8+
9+
Recursive CTEs enable the implementation of the following task. Suppose we have a set of file system directory paths and a set of COPY operations defined as the original and new paths. We need an SQL query to perform this transformation. The RCTE feature is the only way to implement this transformation in SQL/SQLite because of potential successive modifications. The snippet below shows only the core code (the LOOP_COPY block) without further processing. Note that straightforward use of the _replace_ routine would be incorrect because of possible matches in the middle of the path.
10+
11+
~~~sql
12+
WITH RECURSIVE
13+
folders(path_old) AS (
14+
VALUES
15+
('doc/thesis/exp'),
16+
('doc/thesis/theory'),
17+
('doc/app/job/lor'),
18+
('code/scripts/py'),
19+
('code/scripts/bas')
20+
),
21+
ops(opid, rootpath_old, rootpath_new) AS (
22+
VALUES
23+
(1, 'doc/', 'docABC' ),
24+
(2, 'docABC/thesis/', 'docABC/master' ),
25+
(3, 'docABC/app/job/', 'docABC/app/academic_job'),
26+
(4, 'code/', 'prog' )
27+
),
28+
LOOP_COPY AS (
29+
-- Initial SELECT --
30+
SELECT 0 AS opid, path_old AS path_new
31+
FROM folders
32+
UNION ALL -- LOOP body starts here --
33+
-- Place input rows in the processing queue for the next loop
34+
SELECT ops.opid, path_new
35+
FROM LOOP_COPY AS BUFFER, ops
36+
WHERE ops.opid = BUFFER.opid + 1
37+
UNION ALL
38+
-- Append the processing queue with new paths generated by the current operation
39+
SELECT ops.opid,
40+
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
41+
FROM LOOP_COPY AS BUFFER, ops
42+
WHERE ops.opid = BUFFER.opid + 1
43+
AND BUFFER.path_new like rootpath_old || '%'
44+
)
45+
SELECT * FROM LOOP_COPY;
46+
~~~
47+
48+
Note that this task is still not particularly well suited for SQL. The initial SELECT places all paths in the processing queue, making them available in the first loop cycle. Because the COPY operation does not delete any folders, the entire input must be placed in the processing queue for the next loop cycle, meaning that the initial set and all previously created paths are duplicated during each loop cycle.
49+
50+
Even though the RCTE loop body processes one row at a time, when the processing queue acts as FIFO (the default behavior), it might be helpful to treat the RCTE loop as if it processed the entire row set produced by the preceding cycle. When the processing queue acts as FIFO, this treatment is appropriate, as illustrated by the two tables above (compare the output of *Loop Cycle #1* shown in the first table with the column *Cycle #3* from the second table). The query below shows an equivalent implementation of the RCTE block (only valid for the given input), which unravels the above *LOOP_COPY* RCTE.
51+
52+
~~~sql
53+
WITH RECURSIVE
54+
folders(path_old) AS (
55+
VALUES
56+
('doc/thesis/exp'),
57+
('doc/thesis/theory'),
58+
('doc/app/job/lor'),
59+
('code/scripts/py'),
60+
('code/scripts/bas')
61+
),
62+
ops(opid, rootpath_old, rootpath_new) AS (
63+
VALUES
64+
(1, 'doc/', 'docABC' ),
65+
(2, 'docABC/thesis/', 'docABC/master' ),
66+
(3, 'docABC/app/job/', 'docABC/app/academic_job'),
67+
(4, 'code/', 'prog' )
68+
),
69+
LOOP_COPY_INIT AS (
70+
SELECT 0 AS opid, path_old AS path_new
71+
FROM folders
72+
),
73+
LOOP_COPY_STEP_1 AS (
74+
SELECT ops.opid, path_new
75+
FROM LOOP_COPY_INIT AS BUFFER, ops
76+
WHERE ops.opid = BUFFER.opid + 1
77+
UNION ALL
78+
SELECT ops.opid,
79+
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
80+
FROM LOOP_COPY_INIT AS BUFFER, ops
81+
WHERE ops.opid = BUFFER.opid + 1
82+
AND BUFFER.path_new like rootpath_old || '%'
83+
),
84+
LOOP_COPY_STEP_2 AS (
85+
SELECT ops.opid, path_new
86+
FROM LOOP_COPY_STEP_1 AS BUFFER, ops
87+
WHERE ops.opid = BUFFER.opid + 1
88+
UNION ALL
89+
SELECT ops.opid,
90+
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
91+
FROM LOOP_COPY_STEP_1 AS BUFFER, ops
92+
WHERE ops.opid = BUFFER.opid + 1
93+
AND BUFFER.path_new like rootpath_old || '%'
94+
),
95+
LOOP_COPY_STEP_3 AS (
96+
SELECT ops.opid, path_new
97+
FROM LOOP_COPY_STEP_2 AS BUFFER, ops
98+
WHERE ops.opid = BUFFER.opid + 1
99+
UNION ALL
100+
SELECT ops.opid,
101+
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
102+
FROM LOOP_COPY_STEP_2 AS BUFFER, ops
103+
WHERE ops.opid = BUFFER.opid + 1
104+
AND BUFFER.path_new like rootpath_old || '%'
105+
),
106+
LOOP_COPY_STEP_4 AS (
107+
SELECT ops.opid, path_new
108+
FROM LOOP_COPY_STEP_3 AS BUFFER, ops
109+
WHERE ops.opid = BUFFER.opid + 1
110+
UNION ALL
111+
SELECT ops.opid,
112+
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
113+
FROM LOOP_COPY_STEP_3 AS BUFFER, ops
114+
WHERE ops.opid = BUFFER.opid + 1
115+
AND BUFFER.path_new like rootpath_old || '%'
116+
),
117+
LOOP_COPY_STEP_5_STOP AS (
118+
SELECT ops.opid, path_new
119+
FROM LOOP_COPY_STEP_4 AS BUFFER, ops
120+
WHERE ops.opid = BUFFER.opid + 1
121+
UNION ALL
122+
SELECT ops.opid,
123+
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
124+
FROM LOOP_COPY_STEP_4 AS BUFFER, ops
125+
WHERE ops.opid = BUFFER.opid + 1
126+
AND BUFFER.path_new like rootpath_old || '%'
127+
)
128+
-- SELECT * FROM LOOP_COPY_INIT;
129+
-- SELECT * FROM LOOP_COPY_STEP_1;
130+
-- SELECT * FROM LOOP_COPY_STEP_2;
131+
-- SELECT * FROM LOOP_COPY_STEP_3;
132+
-- SELECT * FROM LOOP_COPY_STEP_4;
133+
-- SELECT * FROM LOOP_COPY_STEP_5_STOP;
134+
SELECT * FROM LOOP_COPY_INIT;
135+
~~~

Patterns Recursive CTEs.md

Lines changed: 1 addition & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ parent: Design Patterns
66
permalink: /patterns/rec-cte
77
---
88

9-
### Recursive CTE structure vs. While/Repeat Loop
9+
### Recursive CTE structure vs. While/Repeat loop
1010

1111
By design, SQL delegates statement-level flow control to the database engine, so the standard SQL grammar does not include any flow control structures. The only exception is expression-level branching control. This control structure has operator and function representations, so it naturally integrates within an expression (similar to other functions/operators). Adding statement-level flow controls, such as branching or loops, necessitates grammar extension or special conventions. [Recursive CTEs][] (RCTEs) follow the latter approach and implement the while/repeat loop structure using the standard SELECT grammar/syntax (except for the self-reference) and a special convention. The result is quite contrived, so comprehending and mastering RCTEs may not be straightforward.
1212

@@ -44,134 +44,6 @@ The table below shows a proper representation of the processing queue. This tabl
4444
| | row 1.2 | row 1.3 | row 2.1 |
4545
| | row 1.3 | row 2.1 | row 3.1 |
4646

47-
Recursive CTEs enable the implementation of the following task. Suppose we have a set of file system directory paths and a set of COPY operations defined as the original and new paths. We need an SQL query to perform this transformation. The RCTE feature is the only way to implement this transformation in SQL/SQLite because of potential successive modifications. The snippet below shows only the core code (the LOOP_COPY block) without further processing. Note that straightforward use of the _replace_ routine would be incorrect because of possible matches in the middle of the path.
48-
49-
~~~sql
50-
WITH RECURSIVE
51-
folders(path_old) AS (
52-
VALUES
53-
('doc/thesis/exp'),
54-
('doc/thesis/theory'),
55-
('doc/app/job/lor'),
56-
('code/scripts/py'),
57-
('code/scripts/bas')
58-
),
59-
ops(opid, rootpath_old, rootpath_new) AS (
60-
VALUES
61-
(1, 'doc/', 'docABC' ),
62-
(2, 'docABC/thesis/', 'docABC/master' ),
63-
(3, 'docABC/app/job/', 'docABC/app/academic_job'),
64-
(4, 'code/', 'prog' )
65-
),
66-
LOOP_COPY AS (
67-
-- Initial SELECT --
68-
SELECT 0 AS opid, path_old AS path_new
69-
FROM folders
70-
UNION ALL -- LOOP body starts here --
71-
-- Place input rows in the processing queue for the next loop
72-
SELECT ops.opid, path_new
73-
FROM LOOP_COPY AS BUFFER, ops
74-
WHERE ops.opid = BUFFER.opid + 1
75-
UNION ALL
76-
-- Append the processing queue with new paths generated by the current operation
77-
SELECT ops.opid,
78-
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
79-
FROM LOOP_COPY AS BUFFER, ops
80-
WHERE ops.opid = BUFFER.opid + 1
81-
AND BUFFER.path_new like rootpath_old || '%'
82-
)
83-
SELECT * FROM LOOP_COPY;
84-
~~~
85-
86-
Note that this task is still not particularly well suited for SQL. The initial SELECT places all paths in the processing queue, making them available in the first loop cycle. Because the COPY operation does not delete any folders, the entire input must be placed in the processing queue for the next loop cycle, meaning that the initial set and all previously created paths are duplicated during each loop cycle.
87-
88-
Even though the RCTE loop body processes one row at a time, when the processing queue acts as FIFO (the default behavior), it might be helpful to treat the RCTE loop as if it processed the entire row set produced by the preceding cycle. When the processing queue acts as FIFO, this treatment is appropriate, as illustrated by the two tables above (compare the output of *Loop Cycle #1* shown in the first table with the column *Cycle #3* from the second table). The query below shows an equivalent implementation of the RCTE block (only valid for the given input), which unravels the above *LOOP_COPY* RCTE.
89-
90-
~~~sql
91-
WITH RECURSIVE
92-
folders(path_old) AS (
93-
VALUES
94-
('doc/thesis/exp'),
95-
('doc/thesis/theory'),
96-
('doc/app/job/lor'),
97-
('code/scripts/py'),
98-
('code/scripts/bas')
99-
),
100-
ops(opid, rootpath_old, rootpath_new) AS (
101-
VALUES
102-
(1, 'doc/', 'docABC' ),
103-
(2, 'docABC/thesis/', 'docABC/master' ),
104-
(3, 'docABC/app/job/', 'docABC/app/academic_job'),
105-
(4, 'code/', 'prog' )
106-
),
107-
LOOP_COPY_INIT AS (
108-
SELECT 0 AS opid, path_old AS path_new
109-
FROM folders
110-
),
111-
LOOP_COPY_STEP_1 AS (
112-
SELECT ops.opid, path_new
113-
FROM LOOP_COPY_INIT AS BUFFER, ops
114-
WHERE ops.opid = BUFFER.opid + 1
115-
UNION ALL
116-
SELECT ops.opid,
117-
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
118-
FROM LOOP_COPY_INIT AS BUFFER, ops
119-
WHERE ops.opid = BUFFER.opid + 1
120-
AND BUFFER.path_new like rootpath_old || '%'
121-
),
122-
LOOP_COPY_STEP_2 AS (
123-
SELECT ops.opid, path_new
124-
FROM LOOP_COPY_STEP_1 AS BUFFER, ops
125-
WHERE ops.opid = BUFFER.opid + 1
126-
UNION ALL
127-
SELECT ops.opid,
128-
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
129-
FROM LOOP_COPY_STEP_1 AS BUFFER, ops
130-
WHERE ops.opid = BUFFER.opid + 1
131-
AND BUFFER.path_new like rootpath_old || '%'
132-
),
133-
LOOP_COPY_STEP_3 AS (
134-
SELECT ops.opid, path_new
135-
FROM LOOP_COPY_STEP_2 AS BUFFER, ops
136-
WHERE ops.opid = BUFFER.opid + 1
137-
UNION ALL
138-
SELECT ops.opid,
139-
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
140-
FROM LOOP_COPY_STEP_2 AS BUFFER, ops
141-
WHERE ops.opid = BUFFER.opid + 1
142-
AND BUFFER.path_new like rootpath_old || '%'
143-
),
144-
LOOP_COPY_STEP_4 AS (
145-
SELECT ops.opid, path_new
146-
FROM LOOP_COPY_STEP_3 AS BUFFER, ops
147-
WHERE ops.opid = BUFFER.opid + 1
148-
UNION ALL
149-
SELECT ops.opid,
150-
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
151-
FROM LOOP_COPY_STEP_3 AS BUFFER, ops
152-
WHERE ops.opid = BUFFER.opid + 1
153-
AND BUFFER.path_new like rootpath_old || '%'
154-
),
155-
LOOP_COPY_STEP_5_STOP AS (
156-
SELECT ops.opid, path_new
157-
FROM LOOP_COPY_STEP_4 AS BUFFER, ops
158-
WHERE ops.opid = BUFFER.opid + 1
159-
UNION ALL
160-
SELECT ops.opid,
161-
rootpath_new || substr(path_new, length(rootpath_old)) AS path_new
162-
FROM LOOP_COPY_STEP_4 AS BUFFER, ops
163-
WHERE ops.opid = BUFFER.opid + 1
164-
AND BUFFER.path_new like rootpath_old || '%'
165-
)
166-
-- SELECT * FROM LOOP_COPY_INIT;
167-
-- SELECT * FROM LOOP_COPY_STEP_1;
168-
-- SELECT * FROM LOOP_COPY_STEP_2;
169-
-- SELECT * FROM LOOP_COPY_STEP_3;
170-
-- SELECT * FROM LOOP_COPY_STEP_4;
171-
-- SELECT * FROM LOOP_COPY_STEP_5_STOP;
172-
SELECT * FROM LOOP_COPY_INIT;
173-
~~~
174-
17547

17648
<!-- References -->
17749

0 commit comments

Comments
 (0)