source: main/adopters/md/trunk/src/main/backend_qModules/brfss23/SurveyCrudeRateMultiYR.sas @ 21019

Last change on this file since 21019 was 21019, checked in by Paul Leo, 2 months ago

Maryland BRFSS SAS back-end programs and .cfg for multi-year, multi-split samples.
Note - SurveyCrudeRateMultiYR.sas should work for multi-year, multi-split, and multi-year, one split per year.
Still testing to see if it will work with all queries.

File size: 12.1 KB
Line 
1*******************************************************************************************************;
2* Program filename: SurveyCrudeRate.sas                                                                ;
3*                                                                                                      ;
4* This file is "included" in Multi Year BRFSS indicator .def file for crude rates.                     ;
5*                                                                                                      ;
6* This file weights split samples used in different years. Currently Tested with 1 split per year      ;
7* The macro variable (varname) is set in .def file. The %spvar1% variable is referenced in the .def    ;
8* file. The value for %spvar1% is set in the IBIS Module.xml file. It is also a filter-exclude         ;
9* variable, which allows keeps the whole survey dataset in the analytic file, and allows the user to   ;
10* select whether to calculate the at-risk percentage                                                   ;
11*                                                                                                      ;
12* Missing values have been set to '.' for all indicator and dimension variables.                       ;
13*                                                                                                      ;
14* Notes for ibisq survey program.                                                                      ;
15*                                                                                                      ;
16* surveyvar1 allow the variance estimate to be calculated with the entire dataset, regardless of other ;
17* dataset filters. This is appropriate according to Michael Friedrichs in Utah.                        ;
18*                                                                                                      ;
19* ageadjfilter has something to do with maintaining the entire population dataset in the results for   ;
20* the state overall.                                                                                   ;
21*******************************************************************************************************;
22
23OPTIONS MPRINT MLOGIC SYMBOLGEN SPOOL SOURCE2;
24options sumsize=138m nocenter pagesize=4000 linesize=MAX;
25
26/*
27        *********************For now only in 1 split;
28        ****** But I am pretty sure that in some years there are problably 2 splits for that indicator in one year;
29*/
30
31title2 'PROC SURVEYMEANS OUTPUT';
32
33******************************************************************************;
34%macro crudrt(varname);
35********************************************************************;
36****** tmp: Adjust weights based on different split samples ********;
37********************************************************************;
38* Need to know correct weight var for each indicator and year.      ;
39* Set a variable called CorrectWeight = value of correct weight var ;
40********************************************************************;
41
42
43
44        data tmp;
45                set tmp;
46                if &varname. > . then do;  * selects records where that variable was asked;
47                        if _LCPWTV1 > . then do; CorrectWeight = _LCPWTV1; Split=1; end; * identifies what split it is in;
48                        else if _LCPWTV2 > . then do; CorrectWeight = _LCPWTV2; Split=2; end;
49                        else if _LCPWTV3 > .  then do; CorrectWeight = _LCPWTV3; Split=3; end;
50                end;
51        run;
52
53*** Grabs frequency by year, to set proprotion;
54   proc freq data=tmp noprint;
55     tables year*Split  / out=wgt;
56     run;
57     proc print data=wgt noobs; title1 '-------------'; title2 'wgt: first output frequency table'; run;
58   
59   data wgt;
60     set wgt;
61     keep year wpercent;
62     wpercent=percent/100;
63     run;
64     proc print data=wgt noobs; title1 '-------------'; title2 'wgt: final year percentages'; run;
65     
66   proc sort data=tmp; by year; run;
67   data tmp;
68     merge wgt tmp;
69     by year;
70     FinalWeight=CorrectWeight*wpercent;
71     run;
72   
73        proc print data=tmp (obs=20); where split > .; title2 'tmp: for submission to proc surveymeans'; run;
74
75***********************************************************************;
76***** Proc SurveyMeans ************************************************;
77***********************************************************************;
78
79?cross1? proc surveymeans data=tmp nomcar nobs sum mean stderr CLM CV;
80?cross1?  var &varname. ;
81?cross1?   class &varname. ;
82?cross1?   strata _STSTR;
83?cross1?   domain
84?cross1? %cross1%
85?cross1? %surveyvar1%
86?cross1? ;
87?cross1?   ods output statistics=stats
88?cross1?        summary=sum
89?cross1?        domain=domain ;
90?cross1?   weight FinalWeight ;
91?cross1?   proc print data=stats; title1 ' '; title2 'stats';
92?cross1?   proc print data=domain; title2 'domain';
93?cross1? run;
94
95?cross1? ?cross2? proc surveymeans data=tmp nomcar nobs sum mean stderr CLM CV;
96?cross1? ?cross2?   var &varname. ;
97?cross1? ?cross2?   class &varname. ;
98?cross1? ?cross2?   strata _STSTR;
99?cross1? ?cross2?  domain
100?cross1? ?cross2? %cross1%*%cross2%
101?cross1? ?cross2? %surveyvar1%
102?cross1? ?cross2? ;
103?cross1? ?cross2?   ods output statistics=stats
104?cross1? ?cross2?       summary=sum
105?cross1? ?cross2?       domain=domain ;
106?cross1? ?cross2?   weight FinalWeight ;
107?cross1? ?cross2?   proc print data=stats; title1 ' '; title2 'stats';
108?cross1? ?cross2?   proc print data=domain; title2 'domain';
109?cross1? ?cross2? run;
110
111*****************************************************************;
112********* tmp1: Grab stats for dimension totals *****************;
113*****************************************************************;
114data tmp1;
115  set stats;
116  wgtN=ROUND(sum,1);
117  &varname.=input(VarLevel, 8.);
118  %cross1% = -1;
119  ?cross2? %cross2% = -1;
120  *drop VarLevel VarName VarLabel wgtN StdDev;
121  proc print;
122        title1 '===================================================================';
123        title2 'tot: Grab percentage for MD overall';
124run;
125
126******************************************************************;
127********* tmp2: Grab %, SE, codes for indicator variable *********;
128******************************************************************;
129data tmp2;
130  set domain;
131    wgtN=ROUND(sum,1);
132  &varname.=input(VarLevel, 8.);
133  *drop LowerCLMean UpperCLMean VarLevel DomainLabel VarLabel VarName wgtN StdDev;
134  %if '%cross1%' != 'year' %then %do;
135        if year_sflag<=0 then delete;
136  %end;
137  proc print; title2 'tmp2: stats for indicator variable';
138run;
139
140**********************************************************;
141********** tmp3: Grab sample size (denominator) **********;
142****** N Number of respondents in sample *****************;
143*****  Denom denominator in each cross by group **********;
144**********************************************************;
145proc summary data=tmp2;
146  var N;
147  class %cross1%
148  ?cross2? %cross2%
149  &varname.
150  ;
151  output out=sampleD sum(N)=Denom;
152  proc sort data=SampleD; by %cross1%
153  ?cross2? %cross2%
154  ;
155  proc print; title2 'SampleD - (Unweighted number of folks who answered either y or n) Number of Records for cell suppression';
156  run;
157data tmp3;
158  set SampleD;
159  if &varname. =.;
160  *Denom=N;
161  drop  _TYPE_ _FREQ_ N ;
162  if %cross1% = . then %cross1% = -1;
163  ?cross2? if %cross2% = . then %cross2% = -1;
164  proc print; title2 'tmp3, Sample Size (denominator)';
165run;
166/*   MD will not use Asymmetric Confidence Intervals
167        If they want to in future, need to align the tmp datasets
168*****************************************************************;
169********** tmp4: Calculate asymmetric confidence ints ***********;
170*****************************************************************;
171data tmp2;
172        set tmp2;
173run;
174data tmp4;
175  set tmp2 tmp1;
176  f=log(mean)-log(1-mean); 
177  s=stderr/(mean*(1-mean));
178  Lf=f-1.96*s; 
179  Uf=f+1.96*s; 
180  percent=mean*100;
181  lower=(exp(Lf)/(1+exp(Lf)))*100;   
182  upper=(exp(Uf)/(1+exp(Uf)))*100;
183  drop f s Lf Uf VarName;
184  proc sort data=tmp4; by %cross1%
185        ?cross2? %cross2%
186        ;
187  proc print; title2 'tmp4: Calculate asymmetric CIs';
188run;
189*/
190data tmp4;
191        set tmp2 tmp1; /* totals onto the domain (cross1 and cross2 */
192        percent=mean*100;
193        lower=LowerCLMean*100;
194        upper=UpperCLMean*100;
195         proc sort data=tmp4; by %cross1%
196        ?cross2? %cross2%
197        ;
198        proc print; title2 'tmp4:merged tmp2 (from proc summary) and tmp1 (stats), and set percent=mean';
199run;
200******************************************************************;
201********* tmp5: Add Denom field to every record ******************;
202********* this is only for cell suppression           ************;
203******************************************************************;
204
205data tmp5; *Add Denom field to each record;
206  merge tmp3 tmp4 ;
207  by %cross1%
208  ?cross2? %cross2%
209  ;
210  proc print; title2 'tmp5: Add denom field to every record';
211run;
212
213*****************************************************************;
214********** tmp5alt: Add wgtdenom field to every record ************;
215**********wgtdenom is the weighted denominator
216*********Used by MD and HI, they want pop num, not Denom*******;
217***********  **************************;
218*****************************************************************;
219proc summary data=tmp5;
220  var wgtN;
221  class %cross1%
222  ?cross2? %cross2%
223  &varname.
224  ;
225  output out=wgtNdat sum(wgtN)=wgtdenom;
226  proc sort data=wgtNdat; by %cross1%
227  ?cross2? %cross2%
228  ;
229  proc print; title2 'wgtNdat - estimated population category in that category or response';
230  run;
231  data wgtdenomdat;
232  set wgtNdat;
233  if &varname. =.;
234  if %cross1% = . then delete;
235        ?cross2? if %cross2% = . then delete;
236  proc print; title2 'wgtdenomdat: Total rows of weighted N - estimated number in that category or response';
237        run;
238       
239  proc sort data=tmp5;
240        by %cross1%
241        ?cross2? %cross2%
242        ;
243        run;
244  proc sort data=wgtdenomdat;
245        by %cross1%
246        ?cross2? %cross2%
247        ;
248        run;
249
250data tmp6; *Add wgtdenom field to each record;
251  merge wgtdenomdat tmp5;
252  by %cross1%
253  ?cross2? %cross2%
254  ;
255  drop _TYPE_ _FREQ_ ;
256  proc print; title2 'tmp6: Add weighted sample (wgtdenom) field to every record';
257run;
258
259************************************************************;
260********** tmp6: Add RSE, redflag to dataset, clean up *****;
261************************************************************;
262* redflag is the statistical stability indicator, based on  ;
263* the relative standard error (RSE, or coefficient of       ;
264* variation.                                                ;
265************************************************************;
266
267data tmp7;
268  set tmp6;
269/* USING CV instead
270  if 0<mean<.50 then RSE=(StdErr/mean);
271  if .50<=mean<1 then RSE=(StdErr/(1-mean));
272 */
273  redflag=put('Stable', $14.);
274  if CV>.3 then redflag=put('Unstable', $14.);
275  if Denom in (0 1) then redflag=put('No Variance', $14.);
276
277
278
279  if (%cross1%=-1) then %cross1%=.;
280  ?cross2? if (%cross2%=-1) then %cross2%=.;
281  popnum=wgtdenom;
282  *drop RSE Mean StdErr Denom;
283
284  proc sort data=tmp7; by %cross1%
285  ?cross2? %cross2%
286  &varname.
287  ;
288  run;
289
290proc print data=tmp7; title2 'tmp7: Add redflag to dataset, clean up';
291run;
292
293************************************************************;
294*********** tmp: Convert values for cell suppression *******;
295************************************************************;
296data tmp;
297  set tmp7;
298  **** spvar2 ************************************************************;
299  * spvar2 is the indicator DIMENSION/VALUE passed in by the URL.         ;
300  * I have included "." in the if statement because ZWs total row cell    ;
301  * suppression algorithm was getting confused about where the total rows ;
302  * were and suppressing too many rows. ibisph-view will ignore all but   ;
303  * the grand total row. But the others need to stay in.                  ;
304  ************************************************************************;
305  if &varname in (%spvar2% .);
306  if (0<Denom<30)  OR  (CV >.3) then do;        /* MD cell supression Rule  Denom (number of folks who answered either y or n)*/
307        wgtN  = .A;
308        percent = .A;
309        lower = .A;
310        upper = .A;
311        popnum = .A;
312        redflag=put('Not Reportable', $14.);
313  end;
314  proc print data=tmp; title2 'final tmp: Convert values for cell suppression';
315run;
316
317%mend;
318
Note: See TracBrowser for help on using the repository browser.