8 Solutions to Puzzle of the Week #21

Puzzle of the Week #21:

Produce a report that shows employee name, his/her immediate manager name, and the next level manager name. The following conditions should be met:

  • Use Single SELECT statement only
  • Use mgr column to identify employee’s immediate manager
  • The query should work in Oracle 11g.
  • A preferred solution should use only a single instance of emp table.

Expected Result:

NAME1      NAME2      NAME3
---------- ---------- ------
SMITH      FORD       JONES
ALLEN      BLAKE      KING
WARD       BLAKE      KING
JONES      KING
MARTIN     BLAKE      KING
BLAKE      KING
CLARK      KING
SCOTT      JONES      KING
KING
TURNER     BLAKE      KING
ADAMS      SCOTT      JONES
JAMES      BLAKE      KING
FORD       JONES      KING
MILLER     CLARK      KING

Solutions:

#1. Using connect_by_root, sys_connect_by_path, and regexp_substr functions

col name1 for a10
col name2 for a10
col name3 for a10
WITH x AS(
SELECT CONNECT_BY_ROOT(ename) name,
       SYS_CONNECT_BY_PATH(ename, ',') path,
       CONNECT_BY_ROOT(empno) empno
FROM emp
WHERE LEVEL<=3
CONNECT BY empno=PRIOR mgr
)
SELECT name, REGEXP_SUBSTR(MAX(path), '[^,]+', 1, 2) name2,
             REGEXP_SUBSTR(MAX(path), '[^,]+', 1, 3) name3
FROM x
GROUP BY name, empno
ORDER BY empno;

#2. Using CONNECT BY twice

WITH x AS (
SELECT ename, PRIOR ename mname, empno, mgr
FROM emp
WHERE LEVEL=2 OR mgr IS NULL
CONNECT BY PRIOR empno=mgr
)
SELECT ename name1, mname name2, MAX(PRIOR mname) name3
FROM x
WHERE LEVEL<=2
CONNECT BY PRIOR empno=mgr
GROUP BY ename, mname, empno
ORDER BY empno

#3. Using CONNECT BY and Self Outer Join

WITH x AS (
SELECT ename, PRIOR ename mname, PRIOR mgr AS mgr, empno
FROM emp
WHERE LEVEL=2 OR mgr IS NULL
CONNECT BY PRIOR empno=mgr
)
SELECT x.ename name1, x.mname name2, e.ename name3
FROM x LEFT JOIN emp e ON x.mgr=e.empno
ORDER BY x.empno

#4. Using 2 Self Outer Joins

SELECT a.ename name1, b.ename name2, c.ename name3
FROM emp a LEFT JOIN emp b ON a.mgr=b.empno
           LEFT JOIN emp c ON b.mgr=c.empno
ORDER BY a.empno

#5. Using CONNECT BY and PIVOT

SELECT name1, name2, name3
FROM (
SELECT ename, LEVEL lvl, CONNECT_BY_ROOT(empno) empno
FROM emp
WHERE LEVEL<=3
CONNECT BY empno=PRIOR mgr
)
PIVOT(
MAX(ename)
FOR lvl IN (1 AS name1, 2 AS name2, 3 AS name3)
)
ORDER BY empno;

#6. PIVOT Simulation

WITH x AS (
SELECT ename, LEVEL lvl, CONNECT_BY_ROOT(empno) empno
FROM emp
WHERE LEVEL<=3
CONNECT BY empno=PRIOR mgr
)
SELECT MAX(DECODE(lvl, 1, ename)) name1,
       MAX(DECODE(lvl, 2, ename)) name2,
       MAX(DECODE(lvl, 3, ename)) name3
FROM x
GROUP BY empno
ORDER BY empno;

#7. Using CONNECT BY and no WITH/Subqueries (Credit to Krishna Jamal)

SELECT ename Name1, PRIOR ename Name2,
DECODE(LEVEL, 
    3, CONNECT_BY_ROOT(ename), 
    4, TRIM(BOTH ' ' FROM 
        REPLACE(
            REPLACE(SYS_CONNECT_BY_PATH(PRIOR ename, ' '), PRIOR ename), 
        CONNECT_BY_ROOT(ename)))
        ) Name3
FROM emp
START WITH mgr IS NULL
CONNECT BY PRIOR empno = mgr
ORDER BY ROWID;

#8. A composition of Methods 1 and 7:

SELECT ename Name1, PRIOR ename Name2,
       CASE WHEN LEVEL IN (3,4) 
          THEN REGEXP_SUBSTR(SYS_CONNECT_BY_PATH(ename, ','),'[^,]+',1,LEVEL-2) 
       END AS Name3
FROM emp
START WITH mgr IS NULL
CONNECT BY PRIOR empno = mgr
ORDER BY ROWID;

My Oracle Group on Facebook:

If you like this post, you may want to join my new Oracle group on Facebook: https://www.facebook.com/groups/sqlpatterns/

Would you like to read about many more tricks and puzzles?

For more tricks and cool techniques check my book “Oracle SQL Tricks and Workarounds” for instructions.

Solutions to Puzzle of the Week #13

Puzzle of the Week #13:

Table Setup and Puzzle description can be located here

Expected Result:

  ID FULL_NAME                             GROUP_ID
---- ----------------------------------- ----------
   8 Oscar Pedro Fernando Rodriguez               1
   9 Rodriguez, Oscar Pedro Fernando              1
  10 Oscar Fernando Rodriguez Pedro               1
   1 John Smith                                   2
   2 John L. Smith                                2
   4 Smith, John                                  2
   5 Tom Khan                                     3
  11 KHAN, TOM S.                                 3

Solutions:

#1. Using CTE (Recursive WITH) and LISTAGG

WITH x AS (
SELECT name_id, UPPER(REGEXP_REPLACE(full_name,'[[:punct:]]')) full_name
FROM name_list
), y(id, token, lvl) AS (
SELECT name_id, REGEXP_SUBSTR(full_name, '[^ ]+', 1, 1), 1 
FROM x
UNION ALL
SELECT x.name_id, REGEXP_SUBSTR(full_name, '[^ ]+', 1, y.lvl+1), y.lvl+1
FROM x JOIN y ON x.name_id=y.id AND REGEXP_SUBSTR(full_name, '[^ ]+', 1, y.lvl+1) IS NOT NULL
), z AS (
SELECT id, LISTAGG(token, ' ') WITHIN GROUP(ORDER BY token) ordered_name, 
       COUNT(*)OVER(PARTITION BY LISTAGG(token, ' ') WITHIN GROUP(ORDER BY token)) cnt,
       DENSE_RANK()OVER(ORDER BY LISTAGG(token, ' ') WITHIN GROUP(ORDER BY token)) group_id
FROM y
WHERE LENGTH(token)>1
GROUP BY id
)
SELECT z.id, n.full_name, DENSE_RANK()OVER(ORDER BY group_id) group_id
FROM z JOIN name_list n ON z.id=n.name_id
WHERE z.cnt>1
ORDER BY 3, 1;

  ID FULL_NAME                                  GROUP_ID
--- ---------------------------------------- ----------
  8 Oscar Pedro Fernando Rodrigues                    1
  9 Rodrigues, Oscar Pedro Fernando                   1
 10 Oscar Fernando Rodrigues Pedro                    1
  1 John Smith                                        2
  2 John L. Smith                                     2
  4 Smith, John                                       2
  5 Tom Khan                                          3
 11 KHAN, TOM S.                                      3

Explanation:

The key idea is to split each name into multiple name tokens, then sort and merge them back into a single line. Matching (duplicate) names will have the same merged line so we could use it to identify duplicates. DENSE_RANK analytic function is used to generate sequential group id values.

The same idea is used in the solution below. The only difference is the way to split the names into tokens.

#2: Using CONNECT BY and TABLE/CAST/MULTISET functions

 WITH x AS (
SELECT name_id, UPPER(REGEXP_REPLACE(full_name,'[[:punct:]]')) full_name
FROM name_list
), y AS (
SELECT name_id AS id, y.column_value AS token
FROM x,
     TABLE(CAST(MULTISET(SELECT REGEXP_SUBSTR(x.full_name, '[^ ]+', 1, LEVEL) token
                    FROM dual
                    CONNECT BY LEVEL <= LENGTH(full_name)-LENGTH(REPLACE(full_name,' '))+1
                        )
                AS sys.odcivarchar2list)
          ) y
WHERE LENGTH(y.column_value)>1
), z AS (
SELECT id, LISTAGG(token, ' ') WITHIN GROUP(ORDER BY token) ordered_name,
       COUNT(*)OVER(PARTITION BY LISTAGG(token, ' ') WITHIN GROUP(ORDER BY token)) cnt,
       DENSE_RANK()OVER(ORDER BY LISTAGG(token, ' ') WITHIN GROUP(ORDER BY token)) group_id
FROM y
WHERE LENGTH(token)>1
GROUP BY id
)
SELECT z.id, n.full_name, DENSE_RANK()OVER(ORDER BY group_id) group_id
FROM z JOIN name_list n ON z.id=n.name_id
WHERE z.cnt>1
ORDER BY 3, 1;

  ID FULL_NAME                                  GROUP_ID
---- ---------------------------------------- ----------
   8 Oscar Pedro Fernando Rodrigues                    1
   9 Rodrigues, Oscar Pedro Fernando                   1
  10 Oscar Fernando Rodrigues Pedro                    1
   1 John Smith                                        2
   2 John L. Smith                                     2
   4 Smith, John                                       2
   5 Tom Khan                                          3
  11 KHAN, TOM S.                                      3

 

7 Solutions to Puzzle of the Week #11

Puzzle of the Week #11

Produce the Employee Roll Report that satisfies the following list of requirements:

  • Use single SELECT statement
  • Single column “Names” should have a list of the employee names separated by comma
  • The maximum size of the values in the “Names” column should be 23
  • The report should have as few rows as possible
  • All the employee names should be concatenated in the alphabetical order

Expected Result:

(the Length column is added for length verification only)

Names                                        Length
---------------------------------------- ----------
ADAMS,ALLEN,BLAKE,CLARK                          23
FORD,JAMES,JONES,KING                            21
MARTIN,MILLER,SCOTT                              19
SMITH,TURNER,WARD                                17

Solutions

#1 – Using Recursive WITH clause (Common Table Expression) – to contactenate names

WITH e AS (
SELECT ename, ROW_NUMBER()OVER(ORDER BY ename) rn, 23 AS maxlen
FROM emp
), x (rn, txt, grp) AS (
SELECT 1, CAST(ename AS VARCHAR2(100)), 1
FROM e
WHERE rn=1
UNION ALL
SELECT e.rn,
       CASE WHEN LENGTH(x.txt||','||e.ename)>e.maxlen THEN e.ename
            ELSE x.txt||','||e.ename
       END,
       CASE WHEN LENGTH(x.txt||','||e.ename)>e.maxlen THEN x.grp+1
            ELSE x.grp
       END
FROM e JOIN x ON e.rn=x.rn+1
)
SELECT MAX(txt) "Names", LENGTH(MAX(txt)) "Length"
FROM x
GROUP BY grp
ORDER BY grp;

Names                         Length
------------------------- ----------
ADAMS,ALLEN,BLAKE,CLARK           23
FORD,JAMES,JONES,KING             21
MARTIN,MILLER,SCOTT               19
SMITH,TURNER,WARD                 17

#2 – Using Recursive WITH clause (Common Table Expression) – to group names AND LISTAGG function

WITH t (ename, len, rn) AS (  
SELECT ename, LENGTH(ename) + 1, ROW_NUMBER() OVER(ORDER BY ename)  
FROM emp  
), r (ename, running_len, rn, gp) AS (  
SELECT ename, len, rn, 1 
FROM t 
WHERE rn = 1  
UNION ALL  
SELECT t.ename,  
       CASE WHEN t.len > 24 - r.running_len THEN t.len ELSE r.running_len + t.len END,  
       t.rn,  
       r.gp + CASE WHEN t.len > 24 - r.running_len THEN 1 ELSE 0 END  
FROM t JOIN r ON t.rn = r.rn + 1  
)  
SELECT LISTAGG(ename, ',') WITHIN GROUP(ORDER BY rn) AS "Names", MAX(running_len) - 1 AS "Length"  
FROM r  
GROUP BY gp  
ORDER BY gp
/

#3: Using Recursive WITH clause (CTE) – to group names in a different way

WITH data (ename, grp, pass) AS (  
SELECT ename,  
    CASE WHEN SUM(LENGTH(ename) + 1) OVER(ORDER BY  ename) - 1 <= 23  
   THEN 1  
   ELSE 0  
    END, 1  
FROM emp  
UNION ALL  
SELECT ename,  
    CASE WHEN SUM(LENGTH(ename) + 1) OVER (ORDER BY  ename) - 1 <= 23  
   THEN 1  
    END, pass + 1  
FROM data  
WHERE (grp = 0 AND pass = 1) OR grp IS NULL  
), x AS (
SELECT LISTAGG(ename, ',') WITHIN GROUP(ORDER BY ename) AS names, pass  
FROM data  
WHERE grp = 1  
GROUP BY pass 
)  
SELECT names "Names", LENGTH(names) AS "Length"  
FROM x 
ORDER BY 1;

#4: Using XMLAGG with Regular Expressions

WITH t AS (
SELECT TRIM(',' FROM XMLAGG(xmlelement(e, ename||',') ORDER BY ename).EXTRACT('//text()')) AS txt
FROM  emp
), x AS (
SELECT LEVEL AS l,
       TRIM(',' FROM TRIM(REGEXP_SUBSTR(txt,'.{1,23}(,|$)',1,LEVEL))) AS names
       FROM t
       CONNECT BY TRIM(',' FROM TRIM(REGEXP_SUBSTR(txt,'.{1,23}(,|$)',1,LEVEL))) IS NOT NULL
)
SELECT names "Names", LENGTH(names) "Length"
FROM x
/

#5: Using LISTAGG with Regular Expressions

WITH  x AS (
SELECT LISTAGG (ename, ',') WITHIN GROUP (ORDER BY 1) str
FROM emp
)
SELECT RTRIM(REGEXP_SUBSTR (str, '.{1,23}(,|$)', 1, LEVEL), ',')  "Names",
       LENGTH(RTRIM(REGEXP_SUBSTR (str, '.{1,23}(,|$)', 1, LEVEL), ',')) "Length"
FROM x
CONNECT BY RTRIM(REGEXP_SUBSTR (str, '.{1,23}(,|$)', 1, LEVEL), ',') IS NOT NULL

#6: Using MODEL clause for grouping names

WITH m AS (
SELECT i, ename, grp, len, prevlen  
FROM emp  
MODEL  
   DIMENSION BY (ROW_number() OVER (ORDER BY  ename) AS i)  
   MEASURES 
    (
       ename AS ename, 
    CAST('' AS VARCHAR2(24)) AS names,
    0 AS grp,
    0 AS len, 
    0 AS prevlen
 )  
    RULES 
 (
   len[i] = LENGTH(ename[CV()]),
   prevlen[i] = CASE WHEN (CASE WHEN NVL(prevlen[CV()-1],0) = 0 THEN NVL(len[CV()-1],0) 
           ELSE NVL(prevlen[CV()-1],0) + 1 +  NVL(len[CV()-1],0) 
         END) > 23  
         THEN NVL(len[CV()-1],0)  
         ELSE CASE WHEN NVL(prevlen[CV()-1],0) = 0 THEN NVL(len[CV()-1],0) 
          ELSE NVL(prevlen[CV()-1],0) + 1 +  NVL(len[CV()-1],0) 
           END  
       END,
   grp[i] = NVL(grp[CV()-1],0) + CASE WHEN prevlen[CV()+1] < prevlen[CV()] THEN 1 ELSE 0 END   
 )  
)             
SELECT LISTAGG(ename,',') WITHIN GROUP (ORDER BY ename) AS "Names" , LENGTH(listagg(ename,',') WITHIN GROUP (ORDER BY  ename)) AS "Length"  
FROM m  
GROUP BY grp;  
 

#7: Oracle 12c Solution – Using MATCH_RECOGNIZE clause

SELECT  LISTAGG(name,',') WITHIN GROUP(ORDER BY name) "Names",
        LENGTH(LISTAGG(name,',') WITHIN GROUP(ORDER BY name)) "Length"
FROM  EMP
MATCH_RECOGNIZE
 (
  ORDER BY ENAME
  MEASURES
 MATCH_NUMBER() rn,
 UP.ENAME name
 ALL ROWS PER MATCH
 PATTERN (UP+)
 DEFINE
   UP AS SUM(LENGTH(UP.ENAME || ',')) <= 24
  )
GROUP BY RN
ORDER BY RN

My Oracle Group on Facebook:

If you like this post, you may want to join my new Oracle group on Facebook: https://www.facebook.com/groups/sqlpatterns/

Would you like to read about many more tricks and puzzles?

For more tricks and cool techniques check my book “Oracle SQL Tricks and Workarounds” for instructions.