Suppose I have two tables, t1 and t2 which are identical in layout but which may contain different data.
What\'s the best way to diff these two tables?
Below is my solution - taking into account that the diffed tables can have duplicate rows. The accepted answer does not take this into account which would give you wrong results in case of duplicates. I am taking care of duplicate rows by numbering them using row_number() and then comparing the numbered rows:
-- TEST TABLES
create table t1 (col_num number,col_date date,col_varchar varchar2(400));
create table t2 (col_num number,col_date date,col_varchar varchar2(400));
-- TEST DATA
insert into t1 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am in both');
insert into t2 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am in both');
insert into t1 values (null,null,'I am in both with nulls');
insert into t2 values (null,null,'I am in both with nulls');
insert into t1 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am in T1 only');
insert into t2 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am in T2 only');
insert into t1 values (null,null,'I am in T1 only with nulls');
insert into t2 values (null,null,'I am in T2 only with nulls');
insert into t1 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am twice in T1 but not in T2');
insert into t1 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am twice in T1 but not in T2');
insert into t2 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am twice in T2 but not in T1');
insert into t2 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am twice in T2 but not in T1');
insert into t1 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am twice in T1 and once in T2');
insert into t1 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am twice in T1 and once in T2');
insert into t2 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am twice in T1 and once in T2');
insert into t2 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am twice in T2 and once in T1');
insert into t2 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am twice in T2 and once in T1');
insert into t1 values (1,TO_DATE ('01.JAN.3000 00:00:00', 'DD.MON.YYYY HH24:MI:SS'),'I am twice in T2 and once in T1');
-- THE DIFF
-- All columns need to be named in the partition by clause, it is not possible to just say 'partition by *'
-- The column used in the order by clause does not matter in terms of functionality
(
select 'In T1 but not in T2' diff,s.* from (
select row_number() over (partition by col_num,col_date,col_varchar order by col_num) rn,t.* from t1 t
minus
select row_number() over (partition by col_num,col_date,col_varchar order by col_num) rn,t.* from t2 t
) s
) union all (
select 'In T2 but not in T1' diff,s.* from (
select row_number() over (partition by col_num,col_date,col_varchar order by col_num) rn,t.* from t2 t
minus
select row_number() over (partition by col_num,col_date,col_varchar order by col_num) rn,t.* from t1 t
) s
);