How to summarize all possible combinations of variables?

后端 未结 3 637
北恋
北恋 2021-02-11 11:44

I am trying to summarize the count based on the all possible combinations of variables. Here is an example data:

3条回答
  •  梦如初夏
    2021-02-11 12:29

    Poshan:

    As Robert stated, SUMMARY can be used to count combinations. A second SUMMARY can count the computed types. One difficulty is ignoring the combinations that involve a zero value. If they can be converted to missings the processing is much cleaner. Presuming zeros converted to missing, this code would count distinct combinations:

    proc summary noprint data=have;
      class v2-v4 s1;
      output out=counts_eachCombo;
    run;
    
    proc summary noprint data=counts_eachCombo(rename=_type_=combo_type);
      class combo_type;
      output out=counts_eachClassType;
    run;
    

    You can see how the use of a CLASS variable in a combination determines the TYPE, and the class variables can be of mixed type (numeric, character)

    A different 'home-grown' approach that does not use SUMMARY can use data step with LEXCOMB to compute each combination and SQL with into / separated to generate a SQL statement that will count each distinctly.

    Note: The following code contains macro varListEval for resolving a SAS variable list to individual variable names.

    %macro makeHave(n=,m=,maxval=&m*4,prob0=0.25);
    
      data have;
        do id = 1 to &n;
          array v v1-v&m;
          do over v;
            if ranuni(123) < &prob0 then v = 0; else v = ceil(&maxval*ranuni(123));
          end;
          s1 = byte(65+5*ranuni(123));
          output;
        end;
      run;
    
    %mend;
    
    %makeHave (n=100,m=5,maxval=15)
    
    %macro varListEval (data=, var=);
      %* resolve a SAS variable list to individual variable names;
      %local dsid dsid2 i name num;
      %let dsid = %sysfunc(open(&data));
      %if &dsid %then %do;
        %let dsid2 = %sysfunc(open(&data(keep=&var)));
        %if &dsid2 %then %do;
          %do i = 1 %to %sysfunc(attrn(&dsid,nvar));
            %let name = %sysfunc(varname(&dsid,&i));
            %let num = %sysfunc(varnum(&dsid2,&name));
            %if &num %then "&NAME";
          %end;
          %let dsid2 = %sysfunc(close(&dsid2));
        %end;
        %let dsid = %sysfunc(close(&dsid));
      %end;
      %else
        %put %sysfunc(sysmsg());
    %mend;
    
    %macro combosUCounts(data=, var=);
      %local vars n;
      %let vars = %varListEval(data=&data, var=&var);
    
      %let n = %eval(1 + %sysfunc(count(&vars,%str(" ")));
    
      * compute combination selectors and criteria;
      data combos;
        array _names (&n) $32 (&vars);
        array _combos (&n) $32;
        array _comboCriterias (&n) $200;
    
        length _selector $32000;
        length _criteria $32000;
    
        if 0 then set &data; %* prep PDV for vname;
    
        do _k = 1 to &n;
          do _j = 1 to comb(&n,_k);
            _rc = lexcomb(_j,_k, of _names[*]);
            do _p = 1 to _k;
              _combos(_p) = _names(_p);
              if vtypex(_names(_p)) = 'C' 
                then _comboCriterias(_p) = trim(_names(_p)) || " is not null and " || trim(_names(_p)) || " ne ''";
                else _comboCriterias(_p) = trim(_names(_p)) || " is not null and " || trim(_names(_p)) || " ne 0";
            end;
            _selector = catx(",", of _combos:);
            _criteria = catx(" and ", of _comboCriterias:);
            output;
          end;
        end;
    
        stop;
      run;
    
      %local union;
    
      proc sql noprint;
        * generate SQL statement that uses combination selectors and criteria;
        select "select "
        || quote(trim(_selector))
        || " as combo" 
        || ", "
        || "count(*) as uCount from (select distinct "
        || trim(_selector)
        || " from &data where "
        || trim(_criteria)
        || ")"
        into :union separated by " UNION "
        from combos
        ;
    
        * perform the generated SQL statement;
        create table comboCounts as
        &union;
    
        /* %put union=%superq(union); */
      quit;
    %mend;
    
    options mprint nosymbolgen;
    %combosUCounts(data=have, var=v2-v4);
    %combosUCounts(data=have, var=v2-v4 s1);
    
    %put NOTE: Done;
    /*
    data _null_;
    put %varListEval(data=have, var=v2-v4) ;
    run;
    */
    

提交回复
热议问题