1 | # Generic average .def file |
---|
2 | #Uses the IBIS-Q data_frame for cross1 and cross2 |
---|
3 | # Uses %spvar1% in place of name of continuous variable |
---|
4 | # spvar1 is set in Module.xml file as a configuration parameter value |
---|
5 | # !! Also be sure to have data_where param to not include missing values (99, 999) in average |
---|
6 | #MT_ = Montana cell suppression version |
---|
7 | #Applies MT cell suppression rules |
---|
8 | # |
---|
9 | f type special |
---|
10 | ######################################### |
---|
11 | --------BoNdArY-------- |
---|
12 | 1 script |
---|
13 | OPTIONS MPRINT MLOGIC SYMBOLGEN NONUMBER NODATE PAGESIZE=4000 LINESIZE=200; |
---|
14 | OPTION SPOOL; |
---|
15 | |
---|
16 | ************************** 1. TMP ******************************; |
---|
17 | * The dataset 'tmp' is the numerator dataset that has been read ; |
---|
18 | * in already by ibis-q. Any filters have already been applied. ; |
---|
19 | * The proc summary counts births by cross1 and cross2. ; |
---|
20 | * The variable "x" must be in the dataset, it is set equal to 1.; |
---|
21 | ****************************************************************; |
---|
22 | proc summary data=tmp; |
---|
23 | var %spvar1%; |
---|
24 | class %cross1% |
---|
25 | ?cross2? %cross2% |
---|
26 | ; |
---|
27 | *format mean 5.2 count 9.0 sum 9.0 stderr 6.2; |
---|
28 | output out=tmp mean=mean n=count sum=sum stderr=stderr; |
---|
29 | run; |
---|
30 | proc sort data=tmp; by %cross1% |
---|
31 | ?cross2? %cross2% |
---|
32 | ; run; |
---|
33 | proc print data=tmp noobs; |
---|
34 | title1 '---------------------------------'; |
---|
35 | title2 '1. TMP - numerator dataset'; |
---|
36 | run; |
---|
37 | |
---|
38 | *********************** 2. data_frame **************************; |
---|
39 | * df_%cross1%%cross2% is a dataset created by IBIS-Q. ; |
---|
40 | * It consists of %cross1% and %cross2% (if the user specified ; |
---|
41 | * %cross2%) and a variable named "count" that is set to "0". ; |
---|
42 | * The results of the proc summary must be merged with the ; |
---|
43 | * df_%cross1%%cross2% dataset. ; |
---|
44 | ****************************************************************; |
---|
45 | data frame; |
---|
46 | set df_%cross1%%cross2%; |
---|
47 | run; |
---|
48 | proc sort data=frame; by %cross1% |
---|
49 | ?cross2? %cross2% |
---|
50 | ; run; |
---|
51 | proc print data=frame noobs; |
---|
52 | title2 '2. Data Frame'; |
---|
53 | run; |
---|
54 | |
---|
55 | data new; |
---|
56 | length mean count sum stderr 8; *set length so that values don't get truncated; |
---|
57 | merge frame tmp; *must list frame dataset first, then tmp; |
---|
58 | by %cross1% |
---|
59 | ?cross2? %cross2% |
---|
60 | ; |
---|
61 | run; |
---|
62 | proc print data=new noobs; |
---|
63 | title2 '2. NEW, after frame merged with tmp'; |
---|
64 | run; |
---|
65 | data tmp; |
---|
66 | set new; |
---|
67 | drop _TYPE_ _FREQ_; |
---|
68 | |
---|
69 | *Only keep the grand total; |
---|
70 | ?cross2? if %cross1% = . and %cross2% ^= . then delete; |
---|
71 | ?cross2? if %cross1% ^= . and %cross2% = . then delete; |
---|
72 | |
---|
73 | run; |
---|
74 | |
---|
75 | proc print data=tmp noobs; |
---|
76 | title2 '2. TMP'; |
---|
77 | run; |
---|
78 | |
---|
79 | *************** 3. Flag variable and popcross macro *************; |
---|
80 | * The flag variable checks the cross variables for presence of ; |
---|
81 | * variables that are found in the population dataset. IBIS-q ; |
---|
82 | * created popcross vars based on info from the .CFG file. ; |
---|
83 | * The popcross macro will merge the numerator and denominator ; |
---|
84 | * data, matching up the appropriate values of the cross vars. ; |
---|
85 | *****************************************************************; |
---|
86 | * IBIS does not need the population dataset to produce an avg. ; |
---|
87 | * The following code, through the end of the macro, is used to ; |
---|
88 | * compute popcount for the data stability indicator and to check ; |
---|
89 | * the numerator and denominator counts for the cell suppression. ; |
---|
90 | *****************************************************************; |
---|
91 | %let flag=0; |
---|
92 | ?popcross1? %let flag=1; |
---|
93 | ?popcross2? %let flag=1; |
---|
94 | ?popcross1? ?popcross2? %let flag=2; |
---|
95 | |
---|
96 | %macro popcross; |
---|
97 | |
---|
98 | %if &flag=0 %then %do; |
---|
99 | proc summary data=poptmp; |
---|
100 | var popcount; |
---|
101 | output out=pop sum=popcount; |
---|
102 | run; |
---|
103 | proc sql; |
---|
104 | create table rate as |
---|
105 | select tmp.*, pop.* |
---|
106 | from tmp, pop |
---|
107 | quit; |
---|
108 | %end; |
---|
109 | |
---|
110 | %if &flag=1 %then %do; |
---|
111 | proc summary data=poptmp; |
---|
112 | var popcount; |
---|
113 | class %popcross1% %popcross2%; |
---|
114 | output out=pop sum=popcount; |
---|
115 | run; |
---|
116 | proc sql; |
---|
117 | create table rate as |
---|
118 | select tmp.*, pop.* |
---|
119 | from tmp, pop |
---|
120 | where |
---|
121 | ?popcross1? tmp.%cross1%=pop.%popcross1%; |
---|
122 | ?popcross2? tmp.%cross2%=pop.%popcross2%; |
---|
123 | quit; |
---|
124 | %end; |
---|
125 | |
---|
126 | %if &flag=2 %then %do; |
---|
127 | proc summary data=poptmp; |
---|
128 | var popcount; |
---|
129 | class %popcross1% %popcross2% ; |
---|
130 | output out=pop sum=popcount; |
---|
131 | run; |
---|
132 | proc sql; |
---|
133 | create table rate as |
---|
134 | select tmp.*, pop.* |
---|
135 | from tmp, pop |
---|
136 | where tmp.%cross1%=pop.%popcross1% and |
---|
137 | tmp.%cross2%=pop.%popcross2%; |
---|
138 | quit; |
---|
139 | %end; |
---|
140 | %mend; |
---|
141 | %popcross; |
---|
142 | proc print data=rate noobs; title2 '3. merged tmp and pop prior to cell suppression'; |
---|
143 | |
---|
144 | ********************** 4. tmp, again ****************************; |
---|
145 | * Create the output variables for the IBIS xml/map file. ; |
---|
146 | * The following uses T distribution, for count, but when count ; |
---|
147 | * is above 30, it is really using normal distribution ; |
---|
148 | *****************************************************************; |
---|
149 | data tmp; |
---|
150 | set rate; |
---|
151 | |
---|
152 | *****************************************************************; |
---|
153 | * Student's t distribution. df=count-1. The "Z" value is really ; |
---|
154 | * the critical value of "t", two tailed, with an alpha of 0.025. ; |
---|
155 | *****************************************************************; |
---|
156 | if count=1 then Z= .; |
---|
157 | else if count=2 then Z= 12.706; |
---|
158 | else if count=3 then Z= 4.303; |
---|
159 | else if count=4 then Z= 3.182; |
---|
160 | else if count=5 then Z= 2.776; |
---|
161 | else if count=6 then Z= 2.571; |
---|
162 | else if count=7 then Z= 2.447; |
---|
163 | else if count=8 then Z= 2.365; |
---|
164 | else if count=9 then Z= 2.306; |
---|
165 | else if count=10 then Z= 2.262; |
---|
166 | else if count=11 then Z= 2.228; |
---|
167 | else if count=12 then Z= 2.201; |
---|
168 | else if count=13 then Z= 2.179; |
---|
169 | else if count=14 then Z= 2.160; |
---|
170 | else if count=15 then Z= 2.145; |
---|
171 | else if count=16 then Z= 2.131; |
---|
172 | else if count=17 then Z= 2.120; |
---|
173 | else if count=18 then Z= 2.110; |
---|
174 | else if count=19 then Z= 2.101; |
---|
175 | else if count=20 then Z= 2.093; |
---|
176 | else if count=21 then Z= 2.086; |
---|
177 | else if count=22 then Z= 2.074; |
---|
178 | else if count=23 then Z= 2.069; |
---|
179 | else if count=24 then Z= 2.064; |
---|
180 | else if count=25 then Z= 2.060; |
---|
181 | else if count=26 then Z= 2.056; |
---|
182 | else if count=27 then Z= 2.052; |
---|
183 | else if count=28 then Z= 2.048; |
---|
184 | else if count=29 then Z= 2.045; |
---|
185 | else if count=30 then Z= 2.042; |
---|
186 | else if count>30 then Z= 1.96; |
---|
187 | |
---|
188 | t1=mean-(Z*stderr); |
---|
189 | if (t1<0) then t1=0; |
---|
190 | if count in (0 1) then t1=.; |
---|
191 | LL=put(t1, 8.2); |
---|
192 | UL=put((mean+(z*stderr)), 8.2); |
---|
193 | LL=compress(LL); |
---|
194 | UL=compress(UL); |
---|
195 | n=count; *ibis-q needs a count variable named 'n'; |
---|
196 | |
---|
197 | proc print data=tmp; title2 '4. tmp again - t1=mean*stderr, LL,UL,n before suppression'; |
---|
198 | run; |
---|
199 | run; |
---|
200 | |
---|
201 | ********************** 5. tmp, again Redflag and cell suppression *************; |
---|
202 | * Create the output variables for the IBIS xml/map file. ; |
---|
203 | * Add Redflag ; |
---|
204 | * Redflag is used to describe why valuses have been suppressed ; |
---|
205 | * Redflag values created here may be converted to images or special ; |
---|
206 | * characters in IBIS-View application XSLTfiles, for instance: ; |
---|
207 | * (xslt\html\query\module\result\ResultPage.xslt, ...Values.xslt) ; |
---|
208 | * ; |
---|
209 | * And Apply Montana specific Small numbers rules as follows ; |
---|
210 | * (Comments include rulse for Vital Statistics Counts and Rates( ; |
---|
211 | * 1. If denominator (popcount > 300) ; |
---|
212 | * 1a. If count <5 ; |
---|
213 | * Suppress cells ; |
---|
214 | * 1b. Suppress complimentarty cells if you can arrive at the ; |
---|
215 | * number in a cell less than 5 by subtraction ; |
---|
216 | * Note: I think we just suppress everything in case #1a and 1b ; |
---|
217 | * 2. If count > 5 and < 20 ; |
---|
218 | * Suprss rates, but compute counts ; |
---|
219 | * This would include rates like deaths per 100,000, low birth ; |
---|
220 | * weight as % of live births, or ratios like maternal mortality ; |
---|
221 | * per 100 live births. ; |
---|
222 | * if 20<=count<=100 (events between 20 and 100) ; |
---|
223 | * (according to Cody this does not apply for avgs?? ; |
---|
224 | * 2. If denominater (popcount < 300) ; |
---|
225 | * 2a. If count > 20 ; |
---|
226 | * Report count and rates with confidence intervals ; |
---|
227 | * 2b. If count > 20 ; |
---|
228 | * Suppress rates and counts ; |
---|
229 | * ZW-s program uses ".A" to identify cells for suppression. ; |
---|
230 | * I have co-opted his method so I can use the MT logic for cell ; |
---|
231 | * suppression instead of the standard IBIS logic. And I need to use ; |
---|
232 | * ZWs program because it will suppress the table marginals that can ; |
---|
233 | * be used to calculate the suppressed cells. If this code is used, ; |
---|
234 | * the .def file should have the MT_ prefix. Needs suppressed_variables ; |
---|
235 | * code at the end of this file to work. ; |
---|
236 | *******************************************************************************; |
---|
237 | |
---|
238 | **************** ASK LOIS ABOUT STDERR COMPUTATIONS ***************************; |
---|
239 | |
---|
240 | data tmp; |
---|
241 | set tmp; |
---|
242 | n=count; *ibis-q needs a count variable named 'n'; |
---|
243 | |
---|
244 | |
---|
245 | if popcount>=300 then do; |
---|
246 | |
---|
247 | * Suppress cells if absolute count <5 ; |
---|
248 | |
---|
249 | if 1<=count<5 then do; |
---|
250 | mean=.A; |
---|
251 | LL=.A; |
---|
252 | UL=.A; |
---|
253 | redflag=put('Suppressed', $15.); |
---|
254 | n=.A; |
---|
255 | end; |
---|
256 | |
---|
257 | * do not compute rates based on fewer than 20 events, but provide counts; |
---|
258 | |
---|
259 | if 5<=count<20 then do; |
---|
260 | mean=.A; |
---|
261 | LL=.A; |
---|
262 | UL=.A; |
---|
263 | redflag=put('Suppressed Avg', $16.); |
---|
264 | end; |
---|
265 | |
---|
266 | if 20<=count<=100 then do; |
---|
267 | * for events between 20 and 100, use T distribution to calculate confidence intervals; |
---|
268 | * note LL and UL are calculated in step 4 above. |
---|
269 | |
---|
270 | redflag=put('-', $15.); |
---|
271 | |
---|
272 | end; |
---|
273 | |
---|
274 | * and normal approximation for more than 100 events. ; |
---|
275 | * which is also computed in step 4 above |
---|
276 | |
---|
277 | if count>100 then do; |
---|
278 | |
---|
279 | redflag=put('-', $15.); |
---|
280 | end; |
---|
281 | end; |
---|
282 | |
---|
283 | if popcount<300 then do; |
---|
284 | |
---|
285 | * suppress if popcount <300 and count < 20 events. ; |
---|
286 | |
---|
287 | if 1<=count<20 then do; |
---|
288 | mean=.A; |
---|
289 | LL=.A; |
---|
290 | UL=.A; |
---|
291 | redflag=put('Suppressed', $15.); |
---|
292 | n=.A; |
---|
293 | end; |
---|
294 | |
---|
295 | |
---|
296 | * report rate and n if popcount <300 and count >= 20 events. ; |
---|
297 | |
---|
298 | if 20<=count<=100 then do; |
---|
299 | * for events between 20 and 100, use T distribution to calculate confidence intervals; |
---|
300 | * note LL and UL are calculated in step 4 above. |
---|
301 | |
---|
302 | redflag=put('-', $15.); |
---|
303 | |
---|
304 | end; |
---|
305 | |
---|
306 | * and normal approximation for more than 100 events. ; |
---|
307 | * which is also computed in step 4 above |
---|
308 | |
---|
309 | if count>100 then do; |
---|
310 | |
---|
311 | redflag=put('-', $15.); |
---|
312 | end; |
---|
313 | end; |
---|
314 | |
---|
315 | if count=0 then do; |
---|
316 | mean=.A; |
---|
317 | LL=.A; |
---|
318 | UL=.A; |
---|
319 | end; |
---|
320 | |
---|
321 | if count<0 then do; |
---|
322 | mean=0; |
---|
323 | mean=.A; |
---|
324 | LL=.A; |
---|
325 | UL=.A; |
---|
326 | redflag=put('No Variance', $15.); *no variance; |
---|
327 | end; |
---|
328 | |
---|
329 | proc print data=tmp; title2 '4. tmp again - output variables for the IBIS xml/map and Redflag and cell suppression'; |
---|
330 | run; |
---|
331 | |
---|
332 | --------BoNdArY-------- |
---|
333 | # definition for output file |
---|
334 | f out_variable average |
---|
335 | f xml_out_map_file XMLAverageDenomLCLUCL.map |
---|
336 | --------BoNdArY-------- |
---|
337 | f out_detail lbl_not_used__see_xml_out_map_file |
---|
338 | mean 15.2 |
---|
339 | n 15.2 |
---|
340 | LL 15.2 |
---|
341 | UL 15.2 |
---|
342 | redflag 16.0 |
---|
343 | --------BoNdArY-------- |
---|
344 | |
---|
345 | ****************** 6. SUPPRESSED VARIABLES *************************; |
---|
346 | * ZW-s CGI program must be told how many variables it will need to ; |
---|
347 | * suppress and which ones they are. NOTE: If the SAS code, above, is; |
---|
348 | * commented out, these lines can be left in the .def file without ; |
---|
349 | * causing any problems. They will only be used if the SAS code, ; |
---|
350 | * above is active, OR if the small_num and small_pop parameters ; |
---|
351 | * are active in the .CFG file, and with non-zero values. ; |
---|
352 | * Sometimes ZW-s program does not turn .A into **, perhaps because ; |
---|
353 | * I've turned off suppression, but it seems more than that - the ; |
---|
354 | * workaround for this is to put ** above, but not add them to ; |
---|
355 | * suppressed variable lists below ; |
---|
356 | ********************************************************************; |
---|
357 | --------BoNdArY-------- |
---|
358 | 1 suppressed_variables 4 |
---|
359 | mean |
---|
360 | n |
---|
361 | LL |
---|
362 | UL |
---|
363 | --------BoNdArY-------- |
---|