If the output code below is not understood, there will be a post on report output using ODS coming shortly.
The most basic of all clinical programming is doing those tedious PROC FREQ and PROC MEANS runs. It is not the procedure itself that is difficult, but rather the formatting for output. This would involve transposing data, converting numeric data to character data, concatenating values and symbols for more stylish output, etc. I have developed a couple of simple SAS macros that can quickly produce summary values based on two different data types: categorical and numerical.
THE MACROS
Categorical Data
This macro is called CATSTATS ("Categorical Statistics"). Below is a look at the macro call. You can see that it takes in 9 parameters, 4 of which have default parameters. Default parameters mean that if you do not want to change the default value, you do not have to include it in the macro call.
%CATSTATS(dsin=, whr=%str(1=1), var=, dsout=, srt1=1, byvar=, lbl=%str(), pctdec=2, keepfmt=YES);
Parameter
|
Required?
|
Default Value
|
Description
|
dsin
|
Yes
|
Name of the input dataset
|
|
whr
|
No
|
1=1
|
Used in a where statement. Used to filter out specific data. Default keeps all observations.
|
var
|
Yes
|
Name of the variable to be summarized
|
|
dsout
|
Yes
|
Name of the dataset in which to store the output
|
|
srt1
|
No
|
1
|
Sorting variable. Number
that places the group of output in a proper sequence.
|
byvar
|
No
|
Name of the grouping variable. If this is left blank, one column named COL1
is created.
|
|
lbl
|
No
|
Character string for a label
|
|
pctdec
|
No
|
2
|
How many decimal places to report percents?
|
keepfmt
|
No
|
YES
|
Should the SAS formats attached to VAR be retained? Possible values: YES/NO
|
Numeric Data
This macro is called NUMSTATS ("Numceric Statistics"). This macro is much like CATSTATS with the addition of a few formatting variables.
%NUMSTATS(dsin=, whr=%str(1=1), var=, dsout=, srt1=1, byvar=, mnmxd=4.0, rndm=.1, rndsd=.01, rndmed=.01, lbl=%str());
Parameter
|
Required?
|
Default Value
|
Description
|
dsin
|
Yes
|
Name of the input dataset
|
|
whr
|
No
|
1=1
|
Used in a where statement. Used to filter out specific data. Default keeps all observations.
|
var
|
Yes
|
Name of the variable to be summarized
|
|
dsout
|
Yes
|
Name of the dataset in which to store the output
|
|
srt1
|
No
|
1
|
Sorting variable. Number
that places the group of output in a proper sequence.
|
byvar
|
No
|
Name of the grouping variable. If this is left blank, one column named COL1
is created.
|
|
mnmxd
|
No
|
4.0
|
Format for rounding the minimum and maximum. Default is to a whole integer.
|
rndm
|
No
|
.1
|
Format for rounding the mean. Default is to 1 decimal place.
|
rndsd
|
No
|
.01
|
Format for rounding the standard deviation. Default is to 2 decimal places.
|
rndmed
|
No
|
.01
|
Format for rounding the median. Default is to 2 decimal places.
|
lbl
|
No
|
Character string for a label
|
Let's take a loot at the actual macro code:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/*********************************/
/*** CATEGORICAL STATS MACRO ***//*********************************/
%MACRO
CATSTATS(dsin=, whr=%str(1=1),
var=, dsout=, srt1=1, byvar=, lbl=%str(),
pctdec=2, keepfmt=YES);
/*** Open the input dataset and get summary variable information ***/
/*** Open the input dataset and get summary variable information ***/
%let
dsid=%sysfunc(open(&dsin.,IS));
%let
varn=%sysfunc(varnum(&dsid.,&var.));
%let
varfmt=%sysfunc(varfmt(&dsid.,&varn.));
/*** Format data and store in a temporary table ***/
data temp1;
data temp1;
set &dsin.;
where &whr.;
%if
&byvar.= %then
%do;
byvar='col1';
%end;
%else
%do;
byvar=compress(&byvar.,'
.<(+&!$?*)>;^-=/,%|');
%end;
keep &var.
byvar;
run;
proc sort data=temp1;
by byvar;
run;
/*** Set up temporary local macro variables ***/
proc sql noprint;
proc sql noprint;
select count(distinct byvar) into
:ncol from temp1;
%let
ncol=%sysfunc(compress(&ncol.));
select distinct byvar into
:by1-:by&ncol. from temp1;
select count(byvar) into :n1-:n&ncol.
from temp1 group by byvar;
quit;
%do
i=1 %to &ncol.;
%if
%sysfunc(anydigit(&&by&i.))=1
%then %let by&i.=%sysfunc(cats(_,"&&by&i."));
%let
by&i.=%sysfunc(compress(&&by&i.,'"'));
%end;
/*** Get variable counts per category ***/
proc freq data=temp1 noprint;
proc freq data=temp1 noprint;
tables &var.*byvar
/ missing outpct out=cts (drop=PERCENT PCT_ROW);
run;
data cts;
set cts;
outcol=compress(put(COUNT,best.))
|| ' (' || compress(put(PCT_COL,9.&pctdec.))
|| '%)'; * Format the output variable;
run;
/*** Transpose count data for output ***/
/*** Transpose count data for output ***/
proc transpose data=cts out=&dsout.
(drop=_NAME_);
id byvar;
by &var.;
var outcol;
run;
/*** Clean up and store output data in the specified output table ***/
data &dsout.;
data &dsout.;
length label $60.;
set &dsout.;
%do
i=1 %to &ncol.;
if &&by&i.='
' then &&by&i.='0
(0.00%)';
%end;
%if
&keepfmt.=YES & %sysfunc(vartype(&dsid.,&varn.))=N
& &varfmt.^= %then
%do;
label=' ' || strip(put(&var.,&varfmt.));
%end;
%else
%if &keepfmt.=YES
& %sysfunc(vartype(&dsid.,&varn.))=C
& &varfmt.^= %then
%do;
label=' ' || strip(putc(&var.,"&varfmt."));
%end;
%else
%do;
label=' ' || strip(&var.);
%end;
srt1=&srt1.;
%if
%sysfunc(vartype(&dsid.,&varn.))=N
%then %do;
srt2=&var.;
%end;
%else
%do;
srt2=.;
%end;
keep label &var.
srt1 srt2
%do
i=1 %to &ncol.;
&&by&i.
%end;
;
run;
/*** If VAR is character (no format); find distinct values, alphabetize, and assign value for sorting ***/
%if %sysfunc(vartype(&dsid.,&varn.))=C %then %do;
%if %sysfunc(vartype(&dsid.,&varn.))=C %then %do;
proc sql;
create table temp2 as
select distinct &var.
from &dsin.
order by &var.;
quit;
data temp2;
set temp2;
srt2=_n_;
run;
data &dsout.;
merge &dsout.
(drop=srt2) temp2;
by &var.;
run;
%end;
/*** Combine output data with labels and headers ***/
data header;
data header;
length label $60.;
label="&lbl.";
srt1=&srt1.; srt2=.;
output;
run;
data &dsout.;
set header &dsout.;
if substr(label,1,4)
in (' .',' ') then do;
label=' Missing';
srt2=9999;
end;
drop &var.;
run;
proc sort data=&dsout.;
by srt1 srt2;
run;
/*** Clean up the database ***/
proc datasets lib=work;
proc datasets lib=work;
delete temp1 temp2 cts header;
quit;
%let
rc=%sysfunc(close(&dsid.));
%MEND
CATSTATS;
/*****************************/
/*** SUMMARY STATS MACRO ***/
/*** SUMMARY STATS MACRO ***/
/*****************************/
%MACRO NUMSTATS(dsin=, whr=%str(1=1), var=, dsout=, srt1=1, byvar=, mnmxd=4.0, rndm=.1, rndsd=.01, rndmed=.01, lbl=%str());
%MACRO NUMSTATS(dsin=, whr=%str(1=1), var=, dsout=, srt1=1, byvar=, mnmxd=4.0, rndm=.1, rndsd=.01, rndmed=.01, lbl=%str());
/*** Format data and store in a temporary table ***/
data temp;
data temp;
set &dsin.;
where &whr.;
%if
&byvar.= %then
%do;
byvar='col1';
%end;
%if
&byvar.^= %then
%do;
byvar=compress(&byvar.,'
.<(+&!$?*);^-/,%|');
%end;
keep &var.
byvar;
run;
proc sort data=temp;
by byvar;
run;
/*** Set up temporary local macro variables ***/
proc sql noprint;
proc sql noprint;
select count(distinct byvar) into
:ncol from temp;
%let
ncol=%sysfunc(compress(&ncol.));
select distinct byvar into
:by1-:by&ncol. from temp;
select count(byvar) into :n1-:n&ncol.
from temp group by byvar;
quit;
/*** Set up grouping macro variables used in loops ***/
/*** Set up grouping macro variables used in loops ***/
%do
i=1 %to &ncol.;
%if
%sysfunc(anydigit(&&by&i.))=1
%then %let by&i.=%sysfunc(cats(_,&&by&i.));
%end;
/*** Get summary data ***/
proc univariate data=temp noprint;
proc univariate data=temp noprint;
var &var.;
output out=&dsout.
n=n mean=mean median=med std=sd min=min max=max nmiss=nmiss;
by byvar;
run;
/*** Format summary data and store in specified output table ***/
/*** Format summary data and store in specified output table ***/
data &dsout.;
set &dsout.;
meansd=compress(put(round(mean,&rndm.),best.))
|| ' (' || compress(put(round(sd,&rndsd.),best.))
|| ')';
medminmax=compress(put(round(med,&rndmed.),best.))
|| ' (' || compress(put(min,&mnmxd.))
|| ' - ' || compress(put(max,&mnmxd.))
|| ')';
run;
/*** Transpose summary data for output ***/
/*** Transpose summary data for output ***/
proc transpose data=&dsout.
out=&dsout. (drop=_label_);
id byvar;
var n meansd medminmax nmiss;
run;
/*** Add higher level formatting to output data ***/
data &dsout.;
data &dsout.;
length label $100.
col1-col&ncol. $60.;
set &dsout.;
if _NAME_='n'
then label=' No. of
Subjects';
else if _NAME_='meansd'
then label=' Mean
(SD)';
else if _NAME_='medminmax'
then label=' Median
(Min-Max)';
else if _NAME_='nmiss'
then label='
Missing';
if _NAME_='n'
then srt2=1;
else if _NAME_='meansd'
then srt2=2;
else if _NAME_='medminmax'
then srt2=3;
else if _NAME_='nmiss'
then srt2=4;
srt1=&srt1.;
%do
i=1 %to &ncol.;
col&i.=strip(&&by&i.);
%end;
keep label srt1 srt2 col1-col&ncol.;
run;
/*** Combine output data with labels and headers ***/
data header;
data header;
length label $60.;
label="&lbl.";
srt1=&srt1.; srt2=.;
output;
run;
data &dsout.;
set header &dsout.;
%do
i=1 %to &ncol.;
rename col&i.=&&by&i.;
%end;
run;
/*** Clean up the database ***/
proc datasets lib=work;
proc datasets lib=work;
delete temp header;
quit;
%MEND NUMSTATS;
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
EXAMPLE
First, let's look at the data. I am going to use the built-in dataset sashelp.class. This is a dataset that contains a student name (NAME) along with each student's SEX, AGE, HEIGHT, and WEIGHT.
Here is a look at the data:
The macros that I have written can do one of two things. We can either summarize a variable as is, or we can summarize a variable while grouping by another variable. First, we will summarize each variable in the dataset excluding NAME.
Summarize without Grouping
/* Call the summary macros */
%inc 'D:\Reusable Macros\Summary Statistic Macros.sas'; * I like to store big macros like these in separate files and call them. It makes the program look cleaner.;
%inc 'D:\Reusable Macros\Summary Statistic Macros.sas'; * I like to store big macros like these in separate files and call them. It makes the program look cleaner.;
%CATSTATS(dsin=sashelp.class, var=sex, dsout=sex, srt1=1, lbl=%str(Gender));
%NUMSTATS(dsin=sashelp.class,
var=age, dsout=age, srt1=2, lbl=%str(Age
(years)));
%NUMSTATS(dsin=sashelp.class, var=height, dsout=height, srt1=3, lbl=%str(Height (in)));
%NUMSTATS(dsin=sashelp.class, var=weight, dsout=weight, srt1=4, lbl=%str(Weight (lbs)));
data
demog;
set
sex age height weight;
run;
/* Insert blank
lines if wanted */
proc
sql;
insert
into demog (srt1)
values
(1.5)
values
(2.5)
values
(3.5);
quit;
/* sort the data
so the inserted blank lines are in the correct place */
proc
sort data=demog;
by
srt1 srt2;
run;
/* output the
table */
options
formchar="|____|+|___+=|_/\<>*"
pageno=1 nonumber nodate
orientation=landscape center;
ods
listing close;
ods
rtf file="T:\Personal\Class
Summary.rtf" style=journal;
proc
report data=demog
nowindows split='^'
style(column)={asis=on}
style(report)={font=('Arial',9.5pt,bold
italic) pretext="Class Demographics \line\ (sashelp.class
dataset)"};
column
label col1;
define
label / display left
' ' style=[CellWidth=20%];
define
col1 / display center
'Statistics' style=[CellWidth=15%];
run;
ods
rtf close;
ods
listing;
title;
footnote;
Here is what the RTF output in journal style will look like:
Class Demographics
(sashelp.class dataset)
Statistics
|
|
Gender
|
|
F
|
9 (47.37%)
|
M
|
10 (52.63%)
|
Age (years)
|
|
No. of Subjects
|
19
|
Mean (SD)
|
13.3 (1.49)
|
Median (Min-Max)
|
13 (11 - 16)
|
Missing
|
0
|
Height (in)
|
|
No. of Subjects
|
19
|
Mean (SD)
|
62.3 (5.13)
|
Median (Min-Max)
|
62.8 (51 - 72)
|
Missing
|
0
|
Weight (lbs)
|
|
No. of Subjects
|
19
|
Mean (SD)
|
100 (22.77)
|
Median (Min-Max)
|
99.5 (51 - 150)
|
Missing
|
0
|
Summarize with Grouping
Now, let's take a look at removing SEX from the variable lists and instead using it as a grouping variable:
/* Call the summary macros */
%NUMSTATS(dsin=sashelp.class, var=age, dsout=age, srt1=1, byvar=sex, lbl=%str(Age (years)));
%NUMSTATS(dsin=sashelp.class, var=height, dsout=height, srt1=2, byvar=sex, lbl=%str(Height (in)));
%NUMSTATS(dsin=sashelp.class, var=weight, dsout=weight, srt1=3, byvar=sex, lbl=%str(Weight (lbs)));
data
demog;
set
age height weight;
run;
/* Insert blank
lines if wanted */
proc
sql;
insert
into demog (srt1)
values
(1.5)
values
(2.5);
quit;
/* sort the data
so the inserted blank lines are in the correct place */
proc
sort data=demog;
by
srt1 srt2;
run;
/* output the
table */
options
formchar="|____|+|___+=|_/\<>*"
pageno=1 nonumber nodate
orientation=landscape center;
ods
listing close;
ods
rtf file="T:\Personal\Class
Summary by Sex.rtf" style=journal;
proc
report data=demog
nowindows split='^'
style(column)={asis=on}
style(report)={font=('Arial',9.5pt,bold
italic) pretext="Class Demographics \line\ (sashelp.class
dataset)"};
column
label m f;
define
label / display left
' ' style=[CellWidth=20%];
define
m / display center 'Male'
style=[CellWidth=15%];
define
f / display center 'Female'
style=[CellWidth=15%];
run;
ods
rtf close;
ods
listing;
title;
footnote;
Here is what the RTF output in journal style will look like:
Class Demographics
(sashelp.class dataset)
Male
|
Female
|
|
Age (years)
|
||
No. of Subjects
|
10
|
9
|
Mean (SD)
|
13.4 (1.65)
|
13.2 (1.39)
|
Median (Min-Max)
|
13.5 (11 - 16)
|
13 (11 - 15)
|
Missing
|
0
|
0
|
Height (in)
|
||
No. of Subjects
|
10
|
9
|
Mean (SD)
|
63.9 (4.94)
|
60.6 (5.02)
|
Median (Min-Max)
|
64.15 (57 - 72)
|
62.5 (51 - 67)
|
Missing
|
0
|
0
|
Weight (lbs)
|
||
No. of Subjects
|
10
|
9
|
Mean (SD)
|
109 (22.73)
|
90.1 (19.38)
|
Median (Min-Max)
|
107.25 (83 - 150)
|
90 (51 - 113)
|
Missing
|
0
|
0
|
No comments:
Post a Comment