Ignore:
Timestamp:
05/04/22 19:36:40 (3 months ago)
Author:
GarthBraithwaite_STG
Message:

db - updated the ds create to key off of SO3 and "Country" and cleaned out dup SO1 and SO3 usage. Added map_name cleanup to set to None vs DEFAULT. Added more select tests. Added test for VALUE_ATTR and null measure value.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/db/src/main/scripts/mysql/migrate/10_data-dataset.sql

    r25095 r25198  
    1717        sources are determined by the DATA_SOURCE_TO_INDICATOR_VIEW's SORT_ORDER.
    1818        Lois standardized/updated version 2.3 to have all SO=3's to be the US
    19         dataset.  SO=1 is the standard numerator for non US data.  SO=2 is the denom
    20         source for the SO=1 numerator.  As such here's the rules:
    21         1) GeoCountry & SO3 should all be inserted straight up.
    22         2) Insert the SO3 special split usages like:
    23                 'ACA_Pre_Post', 'BRFSS_Wgt', 'SexStateUS', 'Girls15-17_18-19', 'DiagPriAny'.
    24                 This is needed so that an actual dataset with the approp measure name
    25                 and data source name will be created.  The data-dataset_record script
    26                 will handle/create the approp split records.
    27         3) All SO 1 should be processed straight up. 
     19        dataset with SO=1 being the standard numerator for non US datasets.  SO=2
     20        is an optional denom source for the SO=1 numerator.  SO3 should all be
     21        inserted straight up as both numer and denom.
    2822
    2923        NOTES: The special SO3 splits is semi handled.  For example BRFSS_Wgt is a
     
    3529*/
    3630
    37 /* quick test to see what DS to IV SORT_ORDER=3 records are missing */
     31/* Before beginning do a quick test of IVVs to see what DS to IV SORT_ORDER=3
     32        records are missing.  If any the DS to IV needs to be updated with approp SO3s.
     33*/
    3834select ivv.INDICATOR_VIEW_NAME
    3935from INDICATOR_VIEW_VALUE ivv
     
    4238  and ivv.VALUE > ''
    4339  and ivv.ACTIVE_FLAG = 'x'
    44   and ((DIMENSION_1_NAME = 'GeoCountry') or (DIMENSION_2_NAME = 'GeoCountry'))
     40  and ((DIMENSION_1_NAME = 'Country') or (DIMENSION_2_NAME = 'Country'))
    4541  and ivv.INDICATOR_VIEW_NAME not in
    4642        (
     
    7268
    7369
    74 /* do some basic tests to make sure all IPVs have a data source */
     70/* basic counts */
    7571select count(*) from INDICATOR_VIEW;
    7672select count(*) from DATA_SOURCE_TO_INDICATOR_VIEW;
    7773
     74/* The standard test for IVs that don't have a data source */
     75select * from DATA_SOURCE_TO_INDICATOR_VIEW
     76where INDICATOR_VIEW_NAME not in
     77        (select NAME from INDICATOR_VIEW)
     78;
     79
     80/* Another way to check views (active) that don't have a DS */
    7881select NAME, ACTIVE_FLAG
    7982from INDICATOR_VIEW
     
    8184;
    8285
     86/* Count of IVVs that won't be picked up because no DS to IV mapping. */
    8387select iv.NAME, iv.ACTIVE_FLAG, count(ivv.INDICATOR_VIEW_NAME)
    8488from INDICATOR_VIEW iv, INDICATOR_VIEW_VALUE ivv
    8589where iv.NAME not in (select INDICATOR_VIEW_NAME from DATA_SOURCE_TO_INDICATOR_VIEW)
    8690  and ivv.INDICATOR_VIEW_NAME = iv.NAME
    87 ;
    88 
    89 select * from DATA_SOURCE_TO_INDICATOR_VIEW
    90 where INDICATOR_VIEW_NAME not in
    91         (select NAME from INDICATOR_VIEW)
    9291;
    9392
     
    109108;
    110109
    111 /* quick test to see what DS_TO_IV is not used */
     110/* quick test to see what DS_TO_IV do not have a SO1 */
    112111select distinct INDICATOR_VIEW_NAME
    113112from TMP_INDICATOR_VIEW_DATASET
     
    132131where dstiv.INDICATOR_VIEW_NAME = ivv.INDICATOR_VIEW_NAME
    133132  and dstiv.SORT_ORDER = 3
     133  and ((ivv.DIMENSION_1_NAME = 'Country') or (ivv.DIMENSION_2_NAME = 'Country'))
    134134group by 2, 3, 4, 5, 6, 7
    135135;
    136136
    137137
     138/* remove the US SO1 entries where SO 3 exists */
     139select *
     140from TMP_INDICATOR_VIEW_DATASET
     141where DATA_SOURCE_SORT_ORDER = 1
     142  and ((DIMENSION_1_NAME = 'Country') or (DIMENSION_2_NAME = 'Country'))
     143  and INDICATOR_VIEW_NAME in
     144        (select INDICATOR_VIEW_NAME
     145         from TMP_INDICATOR_VIEW_DATASET
     146         where DATA_SOURCE_SORT_ORDER = 3
     147        )
     148;
     149
     150delete tivd1 from TMP_INDICATOR_VIEW_DATASET tivd1
     151join TMP_INDICATOR_VIEW_DATASET tivd2
     152  on tivd2.INDICATOR_VIEW_NAME = tivd1.INDICATOR_VIEW_NAME
     153where tivd1.DATA_SOURCE_SORT_ORDER = 1
     154  and ((tivd1.DIMENSION_1_NAME = 'Country') or (tivd1.DIMENSION_2_NAME = 'Country'))
     155  and tivd2.DATA_SOURCE_SORT_ORDER = 3
     156;
     157
     158
    138159/*  The SO3 insert is very specific to only include those records that have a
    139         US related dimension.  Inserting the blanket SO1 results in some dup US type
    140         entries as the insert keys off of the view name, dataset, SO, and DIM 1 & 2. 
    141         Thus need to delete US SO1 if there's a US SO3 record.  Also need to delete
    142         any NON US SO3's from the SO1 *IF* the data source name matches.  Otherwise
    143         it needs to stay as those non US SO3's are the split use entries.
    144        
    145         Note that the "select" and "delete" counts should match.
    146 */
    147 select *
    148 from TMP_INDICATOR_VIEW_DATASET
    149 where DATA_SOURCE_SORT_ORDER = 1
    150   and INDICATOR_VIEW_NAME in
    151         (select INDICATOR_VIEW_NAME
    152          from TMP_INDICATOR_VIEW_DATASET
    153          where DATA_SOURCE_SORT_ORDER = 3
    154         )
    155 order by INDICATOR_VIEW_NAME, NUMERATOR_DATA_SOURCE_NAME, DIMENSION_1_NAME,  DIMENSION_2_NAME
    156 ;
    157 
    158 delete
    159 from TMP_INDICATOR_VIEW_DATASET
    160 where DATA_SOURCE_SORT_ORDER = 1
    161   and INDICATOR_VIEW_NAME in
    162         (select t1.INDICATOR_VIEW_NAME from
    163                 (select INDICATOR_VIEW_NAME
    164                  from TMP_INDICATOR_VIEW_DATASET
    165                  where DATA_SOURCE_SORT_ORDER = 3
    166                 ) t1
    167         )
    168 ;
    169 
    170 
    171 
    172 /* Now process for the special SO3 uses - shouldn't be any - if some delete */
     160        US related dimension (Country or GeoCountry).  Inserting the blanket SO1
     161        in some cases results in some dup US type entries for SO1 and some dup
     162        NON US entries for SO3.
     163
     164        Test to make sure there are not any SO1 and SO3 both having the same data
     165        source name.  If there are some then need to adjust the DS to IV accordingly.
     166*/
    173167select *
    174168from TMP_INDICATOR_VIEW_DATASET tivd
     
    183177;
    184178
    185 
    186 
    187 /* Finally, should not have any of these but test/remove all SO3's that only have 1 US SO1 */
    188 select *
    189 from TMP_INDICATOR_VIEW_DATASET tivd
    190 where tivd.DATA_SOURCE_SORT_ORDER = 3
    191   and 1 = 
    192         (select count(*)
    193          from TMP_INDICATOR_VIEW_DATASET t
    194          where t.DATA_SOURCE_SORT_ORDER = 1
    195            and t.INDICATOR_VIEW_NAME = tivd.INDICATOR_VIEW_NAME
    196         )
    197 ;
    198 
    199 /* code that can be modified if the above exists...
    200 delete
    201 from TMP_INDICATOR_VIEW_DATASET
    202 where DATA_SOURCE_SORT_ORDER = 3
    203   and INDICATOR_VIEW_NAME = 
    204         (select t.INDICATOR_VIEW_NAME from
    205                 (select INDICATOR_VIEW_NAME
     179/* If needing to delete the SO1 DS then could do something like:
     180delete
     181from TMP_INDICATOR_VIEW_DATASET t2
     182where DATA_SOURCE_SORT_ORDER = 1
     183  and INDICATOR_VIEW_NAME in
     184        (select t1.INDICATOR_VIEW_NAME from
     185                (select INDICATOR_VIEW_NAME
    206186                 from TMP_INDICATOR_VIEW_DATASET
    207                  where DATA_SOURCE_SORT_ORDER = 1
    208                    and ((DIMENSION_1_NAME = 'GeoCountry') or (DIMENSION_2_NAME = 'GeoCountry'))
    209                  group by INDICATOR_VIEW_NAME
    210                  having count(INDICATOR_VIEW_NAME) > 1
    211                 ) t
     187                 where DATA_SOURCE_SORT_ORDER = 3
     188                   and t2.DATA_SOURCE_NAME = t1.DATA_SOURCE_NAME
     189                ) t1
    212190        )
    213191;
     
    231209;
    232210
    233 select count(*) from TMP_INDICATOR_VIEW_DATASET;
     211
     212/* check any temp DS that do NOT have a DS NAME */
    234213select * from TMP_INDICATOR_VIEW_DATASET where DATASET_NAME is null;
    235214
    236 
    237 
    238 
     215/* List of temp IV datasets
     216select INDICATOR_NAME, DATASET_NAME, count(DATASET_NAME)
     217from TMP_INDICATOR_VIEW_DATASET
     218group by DATASET_NAME, INDICATOR_NAME
     219;
     220*/
     221
     222
     223
     224
     225/* create the DATASET records based on the temp table */
    239226delete from DATASET;
    240227
     
    255242;
    256243
    257 select INDICATOR_NAME, DATASET_NAME, count(DATASET_NAME)
    258 from TMP_INDICATOR_VIEW_DATASET
    259 group by DATASET_NAME, INDICATOR_NAME
    260 ;
     244/* basic count of temp datasets to actual */
     245select count(*) from TMP_INDICATOR_VIEW_DATASET;
     246select count(*) from DATASET;
     247
    261248
    262249/* update any null dataset data notes via the IPV */
     
    299286
    300287/* Insert the DATA_SOURCE_TO_DATASET records.
    301         Indicator view to data source sort order which specifies 1=numerator vs 2=denomn 3=US
     288        Indicator view to data source sort order which specifies 1=numerator, 2=denomn, 3=US
    302289        1 only:  1 = numer, 1 = denom
    303290        1, 2:    1 = numer, 2 = denom
    304         1, 2, 3: 1 = NM numer, 2 = NM denom  ,,  3 = US numer, US denom
    305         1, 3:    1 = NM numer, 1 = NM denom  ,,  3 = US numer, US denom
     291        1, 2, 3: 1 = State numer, 2 = State denom  ,,  3 = US numer, US denom
     292        1, 3:    1 = State numer, 1 = State denom  ,,  3 = US numer, US denom
    306293        3 only:  3 = US numer, 3 = US denom
    307294*/
     
    340327group by substring_index(INDICATOR_VIEW_NAME, '.', 1), DATA_SOURCE_NAME
    341328;
     329
     330select * from DATA_SOURCE_TO_DATASET dstd
     331where DATA_SOURCE_NAME = (select dstiv.DATA_SOURCE_NAME
     332         from DATA_SOURCE_TO_INDICATOR_VIEW dstiv, TMP_INDICATOR_VIEW_DATASET tivd
     333         where tivd.DATASET_NAME = dstd.DATASET_NAME
     334           and tivd.DATA_SOURCE_SORT_ORDER = 1
     335           and dstiv.INDICATOR_VIEW_NAME = tivd.INDICATOR_VIEW_NAME
     336           and dstiv.SORT_ORDER = 2
     337         group by tivd.DATASET_NAME, dstiv.DATA_SOURCE_NAME
     338         limit 1
     339        )
     340  and DATA_SOURCE_USAGE = 'denominator'
     341;
     342
    342343
    343344update DATA_SOURCE_TO_DATASET dstd
     
    355356;
    356357
     358/* TEST: use 'garth' to test which ones do NOT have a SO of 2 of the 307 out of 894
     359        ), concat('garth', DATA_SOURCE_NAME) ) 
     360
     361select * from DATA_SOURCE_TO_DATASET where data_source_name like 'garth%';
     362*/
    357363
    358364
Note: See TracChangeset for help on using the changeset viewer.