Changeset 21801 in main


Ignore:
Timestamp:
10/16/20 11:01:05 (9 days ago)
Author:
Paul Leo
Message:

Hawaii IBIS v2.3 PRAMS back-end .def and .sas programs
Correcting most, if not all issues with rounding and cell suppression.
Still have questions though:
1) need to verify cell suppression rules
2) need to verify Statistical Stability output rules, and possibly the data_notes section describing the Statistical Stability output rules.

Location:
adopters/hi/branches/2.3/src/main/backend_qModules/prams
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • adopters/hi/branches/2.3/src/main/backend_qModules/prams/RateCrude.def

    r13903 r21801  
    55f type special_survey
    66f include SurveyCrudeRate.sas
    7 #f no_total true
     7f no_total true
    88#########################################
    99--------BoNdArY--------
     
    2020--------BoNdArY--------
    2121f out_detail lbl_not_used__see_xml_out_map_file 
    22   percent 7.4
    23   lower 7.4
    24   upper 7.4
    25   sum 15.0
    26   wgtsum 15.0
    27   redflag 14.0
     22 percent 7.4
     23 lower 7.4
     24 upper 7.4
     25 finalsum 15.0
     26 wgtsum 15.0
     27 redflag 14.0
    2828--------BoNdArY--------
    2929--------BoNdArY--------
     
    3232lower
    3333upper
    34 sum
     34finalsum
    3535wgtsum
    3636--------BoNdArY--------
  • adopters/hi/branches/2.3/src/main/backend_qModules/prams/RateCrudeNoVarLevel.def

    r13903 r21801  
    2121--------BoNdArY--------
    2222f out_detail lbl_not_used__see_xml_out_map_file 
    23   percent 7.4
     23 percent 7.4
    2424 lower 7.4
    25   upper 7.4
    26   sum 15.0
    27   wgtsum 15.0
    28   redflag 14.0
     25 upper 7.4
     26 finalsum 15.0
     27 wgtsum 15.0
     28 redflag 14.0
    2929--------BoNdArY--------
    3030--------BoNdArY--------
     
    3333lower
    3434upper
    35 sum
     35finalsum
    3636wgtsum
    37 --------BoNdArY--------
     37--------BoNdArY-------- 
  • adopters/hi/branches/2.3/src/main/backend_qModules/prams/SurveyCrudeRate.sas

    r18257 r21801  
    1 /**********************************************************************;
     1**********************************************************************;
    22* Program filename: TotalCrudeSurv.sas                                ;
    33*                                                                     ;
     
    1212*                                                                     ;
    1313**********************************************************************;
    14 */
    15 
    16 OPTIONS MPRINT MLOGIC MLOGICNEST SYMBOLGEN SPOOL SOURCE2 SUMSIZE=138M PAGESIZE=4000;
    17 
     14
     15OPTIONS MPRINT MLOGIC MLOGICNEST SYMBOLGEN SPOOL SOURCE2 SUMSIZE=138M PAGESIZE=4000 LINESIZE=MAX;
    1816
    1917**********************************************************************************************;
     
    2220* the user selects                                                                            ;
    2321**********************************************************************************************;
    24 
     22 
    2523/* proc print data=tmp (obs=10); run; */
     24
    2625proc freq data=tmp;
    2726   tables year / out=yrfreq noprint;
     
    5655     set tmpyrwgt;
    5756         if upcase(%cross1%) ^=YEAR
    58          ?cross2? and %cross2% ^=YEAR
     57         ?cross2? and upcase(%cross2%) ^=YEAR
    5958         then
    6059     Final_weight=Final_weight/nyears;
    6160   run;
     61   
    6262/*  proc print data=tmpyrwgt; title2 'tmypwgt'; run; */
    6363/*   
     
    6565        title1 ' '; title2 'checktmp .'; run;
    6666*/
     67 
    6768%macro crudrt(varname,weight,FocusLevel);
    6869?cross1? proc surveymeans data=tmp nobs sum mean stderr;
     
    9091?cross1? ?cross2?   *cluster PSU;
    9192?cross1? ?cross2?  domain
    92 
    9393?cross1? ?cross2? %cross1%*%cross2%
    9494?cross1? ?cross2? %surveyvar1%
     
    103103********* tmp1: Grab stats for dimension totals *****************;
    104104*********(NOT SURE WE NEED THIS FOR CRUDE RATES)*****************;
     105*****************************************************************;
    105106proc print data=stats; title1 ' '; title2 'stats';
    106107
     
    113114  proc print;
    114115        title1 '===================================================================';
    115         title2 'tmp1: Grab percentage for HI overall';
     116        title2 'tmp1: Grab percentage for Hawaii overall';
    116117run;
    117118
     
    125126  &varname.=input(VarLevel, $54.);
    126127  drop LowerCLMean UpperCLMean VarLevel DomainLabel VarLabel VarName StdDev;
    127 
    128  %if '%cross1%' != 'YEAR' %then %do;
    129         %if year_sflag<=0 %then delete;
     128/* this code has never worked, originally there was a ! (logical OR) operator instead of the ^=
     129so the test was never true, fixed code to make test work, but that actually broke the output
     130since it has never worked, am commenting it out 
     131  %if %UPCASE(%cross1%) ^= YEAR %then %do;
     132    if year_sflag<=0  then delete;
    130133  %end;
    131134*/
     
    133136run;
    134137
    135 **********************************************************;
    136 ********** tmp3: Grab sample size (denominator) **********;
    137 **********************************************************;
     138****************************************************************;
     139********** tmp3: Grab sample size (denominator) ****************;
     140*** N (sas var), ouput in domain dataset by proc surveymeans ***;
     141*** is number of records with valid data for varname          **;
     142****************************************************************;
    138143proc summary data=tmp2;
    139144  var N;
     
    145150  proc sort data=SampleN; by %cross1%
    146151  ?cross2? %cross2%
    147   ;
     152  ;                     * sum(N)=, sum the values of N across all combinations of cross1, cross2 and varname;
     153                        *         gets subtotals by each class var, and a grand total across all values of all class variables;
    148154  proc print; title2 'sampleN - (Unweighted number of folks who answered either y or n) Number of Records for cell suppression';
    149155  run;
     
    152158  set sampleN;
    153159  if &varname. ='';
    154   sampleN=N;
     160    sampleN=N;
    155161  /*vtypetst=vtype(%cross1%);*/
    156162  drop  _TYPE_ _FREQ_ ;
     
    169175********** tmp4: Calculate asymmetric confidence ints ***********;
    170176*****************************************************************;
    171 
    172177data tmp4;
    173178  set tmp2 tmp1;
     
    189194********* tmp5: Add sample size field to every record ************;
    190195********* this is only for cell suppression           ************;
     196******  N in output is unweighted numerator               ********;
     197****  Samplen in output is the unweighted denominator        *****;
    191198******************************************************************;
    192199
     
    196203  ?cross2? %cross2%
    197204  ;
    198   proc print; title2 'tmp5: Add sample size field to every record';
    199 run;
    200 
    201 *****************************************************************;
    202 ********** tmp6: Add wgtsum field to every record ***************;
    203 ***********Used by Hawaii, they want estimated ******************;
    204 *number in pop having that category/repsonse, not samplen********;
    205 *********** (May need debugging... ??) **************************;
    206 *****************************************************************;
     205  proc print; title2 'tmp5: Add sample size (Samplen) field to every record';
     206run;
     207
     208******************************************************************;
     209********** tmp6: Add wgtsum field to every record ****************;
     210***********Used by Hawaii, they want estimated number in *********;
     211***   pop having that category/repsonse, not unweighted samplen **;
     212******************************************************************;
    207213proc summary data=tmp5;
    208   var Sum;
     214  var sum;
    209215  class %cross1%
    210216  ?cross2? %cross2%
    211217  &varname.
    212218  ;
    213   output out=weightedN sum(Sum)=wgtsum;
    214   proc sort data=weightedN; by %cross1%
    215   ?cross2? %cross2%
    216   ;
    217   proc print; title2 'weightedN - estimated population category in that category or response';
     219  output out=wgtsum sum(sum)=wgtsum;
     220  proc sort data=wgtsum; by %cross1%
     221  ?cross2? %cross2%
     222  ;
     223  proc print; title2 'wgtsum - estimated population category in that category or response';
    218224  run;
    219   data wgtsum;
    220         set weightedN;
    221         if &varname ='';
    222   proc print; title2 'wgtsum: Total rows of weighted N - estimated number in that category or response';
     225  data wgtdata;
     226        set wgtsum;
     227         if &varname ='';
     228  proc print; title2 'wgtdata: Total rows of wgtsum dataset - estimated number in that category or response';
    223229        run;
    224230       
     
    228234        ;
    229235        run;
    230   proc sort data=wgtsum;
     236  proc sort data=wgtdata;
    231237        by %cross1%
    232238        ?cross2? %cross2%
     
    234240        run;
    235241
    236 data tmp6; *Add weightedN (wgtsum) field to each record;
    237   merge wgtsum tmp5 ;
     242data tmp6; *Add wgtsum field (pop estimates) to each record by merging wgtdata and tmp5;
     243  merge wgtdata tmp5 ;
    238244  by %cross1%
    239245  ?cross2? %cross2%
    240246  ;
    241247  drop _TYPE_ _FREQ_ ;
    242   proc print; title2 'tmp6: Add weighted sample (wgtsum) field to every record';
     248  proc print; title2 'tmp6: Add wgtsum field (pop estimates) to each record by merging wgtdata and tmp5';
    243249run;
    244250
     
    274280************************************************************;
    275281
    276 data tmp7;
    277         set tmp6;
    278 
    279        
     282data tmp7;
     283  set tmp6;
     284
    280285  if 0<mean<.50 then RSE=(StdErr/mean);
    281286  if .50<=mean<1 then RSE=(StdErr/(1-mean));
    282287 
    283288  redflag=put('Stable', $14.);
    284   if rse>.3 then redflag=put('Unstable', $14.);
    285   if rse>.5 then redflag=put('Very Unstable', $14.);
    286   if SampleN in (0 1) then redflag=put('No Variance', $14.);
     289 /**********************************************************************************************************
     290  * Not sure we are using this, just commenting out, and keeping it here;
     291  *   If we do use this we will have to change logic way below where we Convert values for cell suppression;
     292  *if rse>.3 then redflag=put('Unstable', $14.);
     293  *if rse>.5 then redflag=put('Very Unstable', $14.);
     294  ***********************************************************************************************************/
     295  /* may want to comment this out at some future date */
     296  if mean in (0 1) then redflag=put('No Variance', $14.);
    287297
    288298  if trow1=1 then %cross1%='.';
    289299  ?cross2? if trow2=1 then %cross2%='.';
     300
     301  /* Since these are esimates HI wants to round to 100's and sum is weighted numerator */
    290302 
    291   if (upcase(%cross1%) =YEAR and year='.')
    292          ?cross2? OR (upcase(%cross2%)=YEAR and year='.')
    293          THEN DO;
    294            sum=sum/nyears;
    295            wgtsum=wgtsum/nyears;
    296          END;
    297        
    298 /*
    299         Since these are esimates HI wants to round to 100's and sum is numerator
    300         AND if numerator is less than 50, then set to 50
    301 */
    302   If 0<sum<50 then sum=50;
    303         else if sum >= 50 then sum=ROUND(sum,100);
    304   /* original wgtsum=ROUND(wgtsum,100); */
    305   /* prevents wgtsum from being 0, we'll round up */
    306   If 0<wgtsum<50 then wgtsum=50;
    307          else if wgtsum >= 50 then wgtsum=ROUND(wgtsum,100);
     303   finalsum=ROUND(sum,100);
     304   
     305   /* had this as  If 0<sum<50 then finalsum=50;, but in reading emails from around 3/18/2019
     306    Katherine want this to be if weighted count is less than 50 show 50, because even if no one
     307        responded yes to th e particular indicator question, that does not mean no one in population, just sample
     308  */
     309    If sum<50 then finalsum=50;
     310   
     311  wgtsum=ROUND(wgtsum,100);
     312
    308313 
    309314  proc sort data=tmp7; by %cross1%
     
    313318  run;
    314319
    315 proc print data=tmp7; title2 'tmp7: Calc RSE, Add redflag to dataset, Set total rows to ., round num and denom';
     320proc print data=tmp7; title2 'tmp7: Compute RSE, Add redflag to dataset, delete Unknowns, Set total rows to ., remove other total rows, round num and denom';
    316321run;
    317322
     
    322327  set tmp7;
    323328  if &varname = ('%spvar2%'); /*This is the value for the indicator dimension passed in by the URL.;*/
    324   if (0<SampleN<30)  OR  (rse >.3) then do;     /* Hawaii cell supression Rule  SampleN (number of folks who answered either y or n)*/
    325 
    326         percent = .A;
    327         lower = .A;
    328         upper = .A;
    329         sum = .A;
    330         wgtsum = .A;
     329  if (0<SampleN<30)  OR  (rse >.3) then do;     /* Hawaii cell supression Rule  SampleN (unweighted number of folks who answered either y or n)*/
     330
     331        percent = .A;   * percent who answered yes or no;
     332        lower = .A;             * lower confidence interval;
     333        upper = .A;             * upper confidence interval;
     334        finalsum = .A;  * pop estimated weighted numerator;
     335        wgtsum = .A;    * pop estimated weighted denominator;
    331336        redflag=put('Not Reportable', $14.);
    332337  end;
    333 
     338 
    334339  proc print data=tmp; title2 'final tmp: Convert values for cell suppression';
    335340run;
  • adopters/hi/branches/2.3/src/main/backend_qModules/prams/SurveyCrudeRateNoVarLevel.sas

    r20761 r21801  
    1 /**********************************************************************;
     1**********************************************************************;
    22* Program filename: SurveyCrudeRateNoVarLevel.sas                      ;
    33*                                                                      ;
     
    77* than 2 response categories. The varname passed in becomes            ;
    88* becomes &varname analyzed by proc surveymeans                        ;
     9*                                                                      ;
    910* Missing set to . for all indicator and dimension vars                ;
    1011*                                                                      ;
    1112***********************************************************************;
    12 */
    13 
    14 OPTIONS MPRINT MLOGIC MLOGICNEST SYMBOLGEN SPOOL SOURCE2 SUMSIZE=138M PAGESIZE=4000;
    15 
     13
     14OPTIONS MPRINT MLOGIC MLOGICNEST SYMBOLGEN SPOOL SOURCE2 SUMSIZE=138M PAGESIZE=4000 LINESIZE=MAX;
    1615
    1716***************************************************************************************************;
     
    2221 
    2322/* proc print data=tmp (obs=10); run; */
     23
    2424proc freq data=tmp;
    2525   tables year / out=yrfreq noprint;
     
    5353 data tmp;
    5454     set tmpyrwgt;
    55          if %cross1% ^=YEAR
    56          ?cross2? and %cross2% ^=YEAR
     55         if upcase(%cross1%) ^=YEAR
     56         ?cross2? and upcase(%cross2%) ^=YEAR
    5757         then
    5858     Final_weight=Final_weight/nyears;
     
    6262        title1 ' '; title2 'checktmp .'; run;
    6363*/
    64 
     64 
    6565%macro crudrt(varname,weight);
    66 ?cross1? proc surveymeans data=tmp  nobs sum mean stderr;
     66?cross1? proc surveymeans data=tmp nobs sum mean stderr;
    6767?cross1?  var &varname. ;
    6868?cross1?   class &varname. ;
     
    8383********* tmp1: Grab stats for dimension totals *****************;
    8484*********(NOT SURE WE NEED THIS FOR CRUDE RATES)*****************;
     85*****************************************************************;
    8586proc print data=stats; title1 ' '; title2 'stats';
    8687
    8788data tmp1;
    8889  set stats;
    89   &varname.=input(VarLevel, $30.);
     90  &varname.=input(VarLevel, $54.);
    9091  trow1 = 1;
    9192  ?cross2? trow2 = 1;
    92   drop VarName VarLabel StdDev;
     93  drop VarLevel VarName VarLabel StdDev;
    9394  proc print;
    9495        title1 '===================================================================';
    95         title2 'tmp1: Grab percentage for HI overall';
     96        title2 'tmp1: Grab percentage for Hawaii overall';
    9697run;
    9798
     
    103104data tmp2;
    104105  set domain;
    105 
    106   &varname.=input(VarLevel, $30.);
    107  
    108   drop LowerCLMean UpperCLMean DomainLabel VarLabel VarName StdDev;
    109 
    110   %if '%cross1%' != 'Year' %then %do;
    111         if year_sflag<=0 then delete;
     106  &varname.=input(VarLevel, $54.);
     107  drop LowerCLMean UpperCLMean VarLevel DomainLabel VarLabel VarName StdDev;
     108/* this code has never worked, originally there was a ! (logical OR) operator instead of the ^=
     109so the test was never true, fixed code to make test work, but that actually broke the output
     110since it has never worked, am commenting it out 
     111  %if %UPCASE(%cross1%) ^= YEAR %then %do;
     112    if year_sflag<=0 then delete;
    112113  %end;
    113 
     114*/
    114115  proc print; title2 'tmp2: Grab %, SE, codes for indicator variable';
    115116run;
    116117
    117 **********************************************************;
    118 ********** tmp3: Grab sample size (denominator) **********;
    119 **********************************************************;
     118****************************************************************;
     119********** tmp3: Grab sample size (denominator) ****************;
     120*** N (sas var), ouput in domain dataset by proc surveymeans ***;
     121*** is number of records with valid data for varname          **;
     122****************************************************************;
    120123proc summary data=tmp2;
    121124  var N;
     
    127130  proc sort data=SampleN; by %cross1%
    128131  ?cross2? %cross2%
    129   ;
     132  ;                     * sum(N)=, sum the values of N across all combinations of cross1, cross2 and varname;
     133                        *         gets subtotals by each class var, and a grand total across all values of all class variables;
    130134  proc print; title2 'sampleN - (Unweighted number of folks who answered either y or n) Number of Records for cell suppression';
    131135  run;
     
    133137  format trow1 trow2 1.0;
    134138  set sampleN;
    135     if &varname. =''; 
    136                 sampleN=N;
     139  if &varname. ='';
     140    sampleN=N;
    137141  /*vtypetst=vtype(%cross1%);*/
    138142  drop  _TYPE_ _FREQ_ ;
     
    156160*****************************************************************;
    157161data tmp4;
    158   set tmp2;
     162  set tmp2 tmp1;
    159163        if mean >0 then do;
    160164                f=log(mean)-log(1-mean); 
     
    176180********* tmp5: Add sample size field to every record ************;
    177181********* this is only for cell suppression           ************;
     182******  N in output is unweighted numerator               ********;
     183****  Samplen in output is the unweighted denominator        *****;
    178184******************************************************************;
    179185
     
    182188  by %cross1%
    183189  ;
    184   proc print; title2 'tmp5: Add sample size field (Samplen) to every record';
    185 run;
    186 
    187 *****************************************************************;
    188 ********** tmp6: Add wgtsum field to every record ************;
    189 ***********Used by Hawaii, they want pop num, not samplen********;
    190 *****************************************************************;
     190  proc print; title2 'tmp5: Add sample size (Samplen) field to every record';
     191run;
     192
     193******************************************************************;
     194********** tmp6: Add wgtsum field to every record ****************;
     195***********Used by Hawaii, they want estimated number in *********;
     196***   pop having that category/repsonse, not unweighted samplen **;
     197******************************************************************;
    191198proc summary data=tmp5;
    192199  var sum;
     
    198205  proc sort data=wgtsum; by %cross1%
    199206  ;
    200   proc print; title2 'wgtsum';
     207  proc print; title2 'wgtsum - estimated population category in that category or response';
    201208  run;
    202209  data wgtdata;
    203210        set wgtsum;
    204211         if &varname ='';
    205   proc print; title2 'wgtdata: Total rows of sum';
     212  proc print; title2 'wgtdata: Total rows of wgtsum dataset - estimated number in that category or response';
    206213        run;
    207214       
     
    216223        run;
    217224
    218 data tmp6; *Add wgtsum field to each record;
     225data tmp6; *Add wgtsum field (pop estimates) to each record by merging wgtdata and tmp5;
    219226  merge wgtdata tmp5 ;
    220227  by %cross1%
    221228  ;
    222229  drop _TYPE_ _FREQ_ ;
    223   proc print; title2 'tmp6: Add wgtsum field to every record';
     230  proc print; title2 'tmp6: Add wgtsum field (pop estimates) to each record by merging wgtdata and tmp5';
    224231run;
    225232
     
    260267  if 0<mean<.50 then RSE=(StdErr/mean);
    261268  if .50<=mean<1 then RSE=(StdErr/(1-mean));
    262 
     269 
    263270  redflag=put('Stable', $14.);
    264 
    265   if rse>.3 then redflag=put('Unstable', $14.);
    266   if rse>.5 then redflag=put('Very Unstable', $14.);
    267   if sum=1 then redflag=put('No Variance', $14.); 
     271 /**********************************************************************************************************
     272  * Not sure we are using this, just commenting out, and keeping it here;
     273  *   If we do use this we will have to change logic way below where we Convert values for cell suppression;
     274  *if rse>.3 then redflag=put('Unstable', $14.);
     275  *if rse>.5 then redflag=put('Very Unstable', $14.);
     276  ***********************************************************************************************************/
     277  /* may want to comment this out at some future date */
     278  if mean in (0 1) then redflag=put('No Variance', $14.);
    268279
    269280  if trow1=1 and trow2=1 then delete; /* get rid of total */
    270        
    271 /*
    272         Since these are esimates HI wants to round to 100's and sum is numerator
    273         AND if numerator is less than 50, then set to 50
    274 */
    275 
    276 
    277   If 0<sum<50 then sum=50;
    278         else if sum >= 50 then sum=ROUND(sum,100);
    279   If 0<wgtsum<50 then wgtsum=50;
    280          else if wgtsum >= 50 then wgtsum=ROUND(wgtsum,100);
    281 
    282   proc sort data=tmp7; by %cross1% &varname.
     281
     282  /* Since these are esimates HI wants to round to 100's and sum is weighted numerator */
     283 
     284   finalsum=ROUND(sum,100);
     285   
     286   /* had this as  If 0<sum<50 then finalsum=50;, but in reading emails from around 3/18/2019
     287    Katherine want this to be if weighted count is less than 50 show 50, because even if no one
     288        responded yes to th e particular indicator question, that does not mean no one in population, just sample
     289  */
     290    If sum<50 then finalsum=50;
     291   
     292  wgtsum=ROUND(wgtsum,100);
     293
     294 
     295  proc sort data=tmp7; by %cross1%
     296  ?cross2? %cross2%
     297  &varname.
    283298  ;
    284299  run;
     
    292307data tmp;
    293308  set tmp7;
    294   if (SampleN<30) OR  (rse >.3) then do;        /* Hawaii cell supression Rule  */
    295         percent = .A;
    296         lower = .A;
    297         upper = .A;
    298         sum = .A;
    299         wgtsum = .A;
     309
     310  if (SampleN<30) OR  (rse >.3) then do;        /* Hawaii cell supression Rule */
     311
     312        percent = .A;   * percent who answered yes or no;
     313        lower = .A;             * lower confidence interval;
     314        upper = .A;             * upper confidence interval;
     315        finalsum = .A;  * pop estimated weighted numerator;
     316        wgtsum = .A;    * pop estimated weighted denominator;
    300317        redflag=put('Not Reportable', $14.);
    301318  end;
    302 
     319 
    303320  proc print data=tmp; title2 'final tmp: Convert values for cell suppression';
    304321run;
Note: See TracChangeset for help on using the changeset viewer.