I need to join table A and table B to create table C.
Table A and Table B store status flags for the IDs. The status flags (A_Flag and B_Flag) can change from time to ti
One possible SAS solution to this is to perform a partial join, and then create the necessary additional rows in the data step. This should work assuming tableA has all possible records; if that's not the case (if tableB can start before tableA), some additional logic may be needed to consider that possibility (if first.id and start gt b_start). There may also be additional logic needed for issues not present in the example data - I don't have a lot of time this morning and didn't debug this for anything beyond the example data cases, but the concept should be evident.
data tableA;
informat start end DDMMYY10.;
format start end DATE9.;
input ID Start End A_Flag;
datalines;
1 01/01/2008 23/03/2008 1
1 23/03/2008 15/06/2008 0
1 15/06/2008 18/08/2008 1
;;;;
run;
data tableB;
informat start end DDMMYY10.;
format start end DATE9.;
input ID Start End B_Flag;
datalines;
1 19/01/2008 17/02/2008 1
1 17/02/2008 15/06/2008 0
1 15/06/2008 18/08/2008 1
;;;;
run;
proc sql;
create table c_temp as
select * from tableA A
left join (select id, start as b_start, end as b_end, b_flag from tableB) B
on A.Id = B.id
where (A.start le B.b_start and A.end gt B.b_start) or (A.start lt B.b_end and A.end ge B.b_end)
order by A.ID, A.start, B.b_start;
quit;
data tableC;
set c_temp;
by id start;
retain b_flag_ret;
format start_fin end_fin DATE9.;
if first.id then b_flag_ret=0;
do until (start=end);
if (start lt b_start) and first.start then do;
start_fin=start;
end_fin=b_start;
a_flag_fin=a_flag;
b_flag_fin=b_flag_ret;
output;
start=b_start;
end;
else do; *start=b_start;
start_fin=ifn(start ge b_start, start, b_start);
end_fin = ifn(b_end le end, b_end, end);
a_flag_fin=a_flag;
b_flag_fin=b_flag;
output;
start=end; *leave the loop as there will be a later row that matches;
end;
end;
run;