How to recode variables in table 1 using info from table 2 (in SAS)

陌路散爱 提交于 2019-12-02 10:08:42

A different option - PROC RANK. You could probably make it more 'automated' but it's pretty straightforward. Using PROC RANK you could also specify different ways of dealing with ties. Note that it would go from 0 to 9 rather than 1 to 10 but that's trivial to change.

data test (drop=i);
do i=1 to 1000;
a=round(uniform(1)*4,.01);
b=round(uniform(1)*10,.01);
c=round(uniform(1)*7.5,.01);
output;
end;
stop;
run;

proc rank data=test out=want groups=10;
var a b c;
ranks rankA rankB rankC;
run;

The following should work for you dynamically with no hard-coding -- I edited to compact it into a single macro. Essentially it puts your desired variables into a list, creates a dataset using your output, and then uses the variable contents to put your data steps into long strings. These strings are then put into a macro variable and you can call it in your final data step. Again, no hard-coding involved.

%MACRO stratify(library=,input=,output=);
%local varlist varlist_space data_step_list;

    ** get vars into comma-separated list and space-separated list **;
    proc sql noprint;
        select NAME
        into: varlist separated by ","
        from dictionary.columns
        where libname=upcase("&library.") and memname=upcase("&input.");

        select NAME
        into: varlist_space separated by " "
        from dictionary.columns
        where libname=upcase("&library.") and memname=upcase("&input.");
    quit;

    %percentiles(%bquote(&varlist.)); 

    ** put data into long format **;
    proc transpose data = pcts out=pcts_long;
        by recode percentile;
        var &varlist_space.;
    run;

    ** sort to get if-else order **;
    proc sort data = pcts_long;
        by _NAME_ percentile;
    run;

    ** create your if-then strings using data itself **;
    data str; 
        length STR $100;
        set pcts_long;
        bin = percentile/10;
        by _NAME_;
        if first._NAME_ then do;
            STR = "if "||strip(_NAME_)||" <= "||strip(put(COL1,best.))||" then "||catx("_","recode",_NAME_)||" = "||strip(put(bin,best.))||";";
        end;
        else do;
            STR = "else if "||strip(_NAME_)||" <= "||strip(put(COL1,best.))||" then "||catx("_","recode",_NAME_)||" = "||strip(put(bin,best.))||";";
        end;
    run; 

    ** put strings into a list **;
    proc sql noprint;
        select STR
        into: data_step_list separated by " "
        from STR;
    quit;

    ** call data step list in final data **;
    data &output.; set &input.;
        &data_step_list.;
    run;

    proc print data = &output.(obs=5);
    run;

%MEND;

%stratify(library=work,input=test,output=final);

No need for all of that code generation. Just use an array. Basically load the percentiles from the dataset generated by PROC UNIVARIATE into an two dimensional array and then find the decile rank for your actual values.

%macro stratify(varlist,in=,out=,pcts=pcts);
%local nvars pctls droplist recodes ;
%let varlist=%sysfunc(compbl(&varlist));
%let nvars=%sysfunc(countw(&varlist));
%let pctls=pctl_%sysfunc(tranwrd(&varlist,%str( ),%str( pctl_)));
%let droplist=pctl_%sysfunc(tranwrd(&varlist,%str( ),%str(: pctl_))):;
%let recodes=recode_%sysfunc(tranwrd(&varlist,%str( ),%str( recode_)));

proc univariate data=&in noprint ;
  var &varlist;
  output out=&pcts pctlpre=&pctls
         pctlpts = 10 20 30 40 50 60 70 80 90 100 
  ;
run;

data want ;
  if _n_=1 then set &pcts ;
  array _pcts (10,&nvars) _numeric_;
  set test;
  array _in &varlist ;
  array out &recodes ;
  do i=1 to dim(_in);
    do j=1 to 10 while(_in(i) > _pcts(j,i)); 
    end;
    out(i)=j;
  end;
  drop i j &droplist;
run;
%mend stratify;

So if I use your generated sample here is what the log looks like with the MPRINT option turned on.

1093  %stratify(a b c,in=test,out=want);
MPRINT(STRATIFY):   proc univariate data=test noprint ;
MPRINT(STRATIFY):   var a b c;
MPRINT(STRATIFY):   output out=pcts pctlpre=pctl_a pctl_b pctl_c pctlpts = 10 20 30 40 50 
60 70 80 90 100 ;
MPRINT(STRATIFY):   run;

NOTE: The data set WORK.PCTS has 1 observations and 30 variables.
NOTE: PROCEDURE UNIVARIATE used (Total process time):
      real time           0.01 seconds
      cpu time            0.01 seconds


MPRINT(STRATIFY):   data want ;
MPRINT(STRATIFY):   if _n_=1 then set pcts ;
MPRINT(STRATIFY):   array _pcts (10,3) _numeric_;
MPRINT(STRATIFY):   set test;
MPRINT(STRATIFY):   array _in a b c ;
MPRINT(STRATIFY):   array out recode_a recode_b recode_c ;
MPRINT(STRATIFY):   do i=1 to dim(_in);
MPRINT(STRATIFY):   do j=1 to 10 while(_in(i) > _pcts(j,i));
MPRINT(STRATIFY):   end;
MPRINT(STRATIFY):   out(i)=j;
MPRINT(STRATIFY):   end;
MPRINT(STRATIFY):   drop i j pctl_a: pctl_b: pctl_c:;
MPRINT(STRATIFY):   run;

NOTE: There were 1 observations read from the data set WORK.PCTS.
NOTE: There were 1000 observations read from the data set WORK.TEST.
NOTE: The data set WORK.WANT has 1000 observations and 6 variables

And the first five observations are:

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!