Changeset 25198 in main
- Timestamp:
- 05/04/22 19:36:40 (2 weeks ago)
- Location:
- trunk/db/src/main/scripts/mysql/migrate
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/db/src/main/scripts/mysql/migrate/10_data-dataset.sql
r25095 r25198 17 17 sources are determined by the DATA_SOURCE_TO_INDICATOR_VIEW's SORT_ORDER. 18 18 Lois standardized/updated version 2.3 to have all SO=3's to be the US 19 dataset. SO=1 is the standard numerator for non US data. SO=2 is the denom 20 source for the SO=1 numerator. As such here's the rules: 21 1) GeoCountry & SO3 should all be inserted straight up. 22 2) Insert the SO3 special split usages like: 23 'ACA_Pre_Post', 'BRFSS_Wgt', 'SexStateUS', 'Girls15-17_18-19', 'DiagPriAny'. 24 This is needed so that an actual dataset with the approp measure name 25 and data source name will be created. The data-dataset_record script 26 will handle/create the approp split records. 27 3) All SO 1 should be processed straight up. 19 dataset with SO=1 being the standard numerator for non US datasets. SO=2 20 is an optional denom source for the SO=1 numerator. SO3 should all be 21 inserted straight up as both numer and denom. 28 22 29 23 NOTES: The special SO3 splits is semi handled. For example BRFSS_Wgt is a … … 35 29 */ 36 30 37 /* quick test to see what DS to IV SORT_ORDER=3 records are missing */ 31 /* Before beginning do a quick test of IVVs to see what DS to IV SORT_ORDER=3 32 records are missing. If any the DS to IV needs to be updated with approp SO3s. 33 */ 38 34 select ivv.INDICATOR_VIEW_NAME 39 35 from INDICATOR_VIEW_VALUE ivv … … 42 38 and ivv.VALUE > '' 43 39 and ivv.ACTIVE_FLAG = 'x' 44 and ((DIMENSION_1_NAME = ' GeoCountry') or (DIMENSION_2_NAME = 'GeoCountry'))40 and ((DIMENSION_1_NAME = 'Country') or (DIMENSION_2_NAME = 'Country')) 45 41 and ivv.INDICATOR_VIEW_NAME not in 46 42 ( … … 72 68 73 69 74 /* do some basic tests to make sure all IPVs have a data source*/70 /* basic counts */ 75 71 select count(*) from INDICATOR_VIEW; 76 72 select count(*) from DATA_SOURCE_TO_INDICATOR_VIEW; 77 73 74 /* The standard test for IVs that don't have a data source */ 75 select * from DATA_SOURCE_TO_INDICATOR_VIEW 76 where INDICATOR_VIEW_NAME not in 77 (select NAME from INDICATOR_VIEW) 78 ; 79 80 /* Another way to check views (active) that don't have a DS */ 78 81 select NAME, ACTIVE_FLAG 79 82 from INDICATOR_VIEW … … 81 84 ; 82 85 86 /* Count of IVVs that won't be picked up because no DS to IV mapping. */ 83 87 select iv.NAME, iv.ACTIVE_FLAG, count(ivv.INDICATOR_VIEW_NAME) 84 88 from INDICATOR_VIEW iv, INDICATOR_VIEW_VALUE ivv 85 89 where iv.NAME not in (select INDICATOR_VIEW_NAME from DATA_SOURCE_TO_INDICATOR_VIEW) 86 90 and ivv.INDICATOR_VIEW_NAME = iv.NAME 87 ;88 89 select * from DATA_SOURCE_TO_INDICATOR_VIEW90 where INDICATOR_VIEW_NAME not in91 (select NAME from INDICATOR_VIEW)92 91 ; 93 92 … … 109 108 ; 110 109 111 /* quick test to see what DS_TO_IV is not used*/110 /* quick test to see what DS_TO_IV do not have a SO1 */ 112 111 select distinct INDICATOR_VIEW_NAME 113 112 from TMP_INDICATOR_VIEW_DATASET … … 132 131 where dstiv.INDICATOR_VIEW_NAME = ivv.INDICATOR_VIEW_NAME 133 132 and dstiv.SORT_ORDER = 3 133 and ((ivv.DIMENSION_1_NAME = 'Country') or (ivv.DIMENSION_2_NAME = 'Country')) 134 134 group by 2, 3, 4, 5, 6, 7 135 135 ; 136 136 137 137 138 /* remove the US SO1 entries where SO 3 exists */ 139 select * 140 from TMP_INDICATOR_VIEW_DATASET 141 where DATA_SOURCE_SORT_ORDER = 1 142 and ((DIMENSION_1_NAME = 'Country') or (DIMENSION_2_NAME = 'Country')) 143 and INDICATOR_VIEW_NAME in 144 (select INDICATOR_VIEW_NAME 145 from TMP_INDICATOR_VIEW_DATASET 146 where DATA_SOURCE_SORT_ORDER = 3 147 ) 148 ; 149 150 delete tivd1 from TMP_INDICATOR_VIEW_DATASET tivd1 151 join TMP_INDICATOR_VIEW_DATASET tivd2 152 on tivd2.INDICATOR_VIEW_NAME = tivd1.INDICATOR_VIEW_NAME 153 where tivd1.DATA_SOURCE_SORT_ORDER = 1 154 and ((tivd1.DIMENSION_1_NAME = 'Country') or (tivd1.DIMENSION_2_NAME = 'Country')) 155 and tivd2.DATA_SOURCE_SORT_ORDER = 3 156 ; 157 158 138 159 /* The SO3 insert is very specific to only include those records that have a 139 US related dimension. Inserting the blanket SO1 results in some dup US type 140 entries as the insert keys off of the view name, dataset, SO, and DIM 1 & 2. 141 Thus need to delete US SO1 if there's a US SO3 record. Also need to delete 142 any NON US SO3's from the SO1 *IF* the data source name matches. Otherwise 143 it needs to stay as those non US SO3's are the split use entries. 144 145 Note that the "select" and "delete" counts should match. 146 */ 147 select * 148 from TMP_INDICATOR_VIEW_DATASET 149 where DATA_SOURCE_SORT_ORDER = 1 150 and INDICATOR_VIEW_NAME in 151 (select INDICATOR_VIEW_NAME 152 from TMP_INDICATOR_VIEW_DATASET 153 where DATA_SOURCE_SORT_ORDER = 3 154 ) 155 order by INDICATOR_VIEW_NAME, NUMERATOR_DATA_SOURCE_NAME, DIMENSION_1_NAME, DIMENSION_2_NAME 156 ; 157 158 delete 159 from TMP_INDICATOR_VIEW_DATASET 160 where DATA_SOURCE_SORT_ORDER = 1 161 and INDICATOR_VIEW_NAME in 162 (select t1.INDICATOR_VIEW_NAME from 163 (select INDICATOR_VIEW_NAME 164 from TMP_INDICATOR_VIEW_DATASET 165 where DATA_SOURCE_SORT_ORDER = 3 166 ) t1 167 ) 168 ; 169 170 171 172 /* Now process for the special SO3 uses - shouldn't be any - if some delete */ 160 US related dimension (Country or GeoCountry). Inserting the blanket SO1 161 in some cases results in some dup US type entries for SO1 and some dup 162 NON US entries for SO3. 163 164 Test to make sure there are not any SO1 and SO3 both having the same data 165 source name. If there are some then need to adjust the DS to IV accordingly. 166 */ 173 167 select * 174 168 from TMP_INDICATOR_VIEW_DATASET tivd … … 183 177 ; 184 178 185 186 187 /* Finally, should not have any of these but test/remove all SO3's that only have 1 US SO1 */ 188 select * 189 from TMP_INDICATOR_VIEW_DATASET tivd 190 where tivd.DATA_SOURCE_SORT_ORDER = 3 191 and 1 = 192 (select count(*) 193 from TMP_INDICATOR_VIEW_DATASET t 194 where t.DATA_SOURCE_SORT_ORDER = 1 195 and t.INDICATOR_VIEW_NAME = tivd.INDICATOR_VIEW_NAME 196 ) 197 ; 198 199 /* code that can be modified if the above exists... 200 delete 201 from TMP_INDICATOR_VIEW_DATASET 202 where DATA_SOURCE_SORT_ORDER = 3 203 and INDICATOR_VIEW_NAME = 204 (select t.INDICATOR_VIEW_NAME from 205 (select INDICATOR_VIEW_NAME 179 /* If needing to delete the SO1 DS then could do something like: 180 delete 181 from TMP_INDICATOR_VIEW_DATASET t2 182 where DATA_SOURCE_SORT_ORDER = 1 183 and INDICATOR_VIEW_NAME in 184 (select t1.INDICATOR_VIEW_NAME from 185 (select INDICATOR_VIEW_NAME 206 186 from TMP_INDICATOR_VIEW_DATASET 207 where DATA_SOURCE_SORT_ORDER = 1 208 and ((DIMENSION_1_NAME = 'GeoCountry') or (DIMENSION_2_NAME = 'GeoCountry')) 209 group by INDICATOR_VIEW_NAME 210 having count(INDICATOR_VIEW_NAME) > 1 211 ) t 187 where DATA_SOURCE_SORT_ORDER = 3 188 and t2.DATA_SOURCE_NAME = t1.DATA_SOURCE_NAME 189 ) t1 212 190 ) 213 191 ; … … 231 209 ; 232 210 233 select count(*) from TMP_INDICATOR_VIEW_DATASET; 211 212 /* check any temp DS that do NOT have a DS NAME */ 234 213 select * from TMP_INDICATOR_VIEW_DATASET where DATASET_NAME is null; 235 214 236 237 238 215 /* List of temp IV datasets 216 select INDICATOR_NAME, DATASET_NAME, count(DATASET_NAME) 217 from TMP_INDICATOR_VIEW_DATASET 218 group by DATASET_NAME, INDICATOR_NAME 219 ; 220 */ 221 222 223 224 225 /* create the DATASET records based on the temp table */ 239 226 delete from DATASET; 240 227 … … 255 242 ; 256 243 257 select INDICATOR_NAME, DATASET_NAME, count(DATASET_NAME) 258 from TMP_INDICATOR_VIEW_DATASET 259 group by DATASET_NAME, INDICATOR_NAME 260 ; 244 /* basic count of temp datasets to actual */ 245 select count(*) from TMP_INDICATOR_VIEW_DATASET; 246 select count(*) from DATASET; 247 261 248 262 249 /* update any null dataset data notes via the IPV */ … … 299 286 300 287 /* Insert the DATA_SOURCE_TO_DATASET records. 301 Indicator view to data source sort order which specifies 1=numerator vs 2=denomn 3=US288 Indicator view to data source sort order which specifies 1=numerator, 2=denomn, 3=US 302 289 1 only: 1 = numer, 1 = denom 303 290 1, 2: 1 = numer, 2 = denom 304 1, 2, 3: 1 = NM numer, 2 = NMdenom ,, 3 = US numer, US denom305 1, 3: 1 = NM numer, 1 = NMdenom ,, 3 = US numer, US denom291 1, 2, 3: 1 = State numer, 2 = State denom ,, 3 = US numer, US denom 292 1, 3: 1 = State numer, 1 = State denom ,, 3 = US numer, US denom 306 293 3 only: 3 = US numer, 3 = US denom 307 294 */ … … 340 327 group by substring_index(INDICATOR_VIEW_NAME, '.', 1), DATA_SOURCE_NAME 341 328 ; 329 330 select * from DATA_SOURCE_TO_DATASET dstd 331 where DATA_SOURCE_NAME = (select dstiv.DATA_SOURCE_NAME 332 from DATA_SOURCE_TO_INDICATOR_VIEW dstiv, TMP_INDICATOR_VIEW_DATASET tivd 333 where tivd.DATASET_NAME = dstd.DATASET_NAME 334 and tivd.DATA_SOURCE_SORT_ORDER = 1 335 and dstiv.INDICATOR_VIEW_NAME = tivd.INDICATOR_VIEW_NAME 336 and dstiv.SORT_ORDER = 2 337 group by tivd.DATASET_NAME, dstiv.DATA_SOURCE_NAME 338 limit 1 339 ) 340 and DATA_SOURCE_USAGE = 'denominator' 341 ; 342 342 343 343 344 update DATA_SOURCE_TO_DATASET dstd … … 355 356 ; 356 357 358 /* TEST: use 'garth' to test which ones do NOT have a SO of 2 of the 307 out of 894 359 ), concat('garth', DATA_SOURCE_NAME) ) 360 361 select * from DATA_SOURCE_TO_DATASET where data_source_name like 'garth%'; 362 */ 357 363 358 364 -
trunk/db/src/main/scripts/mysql/migrate/11_data-dataset_record.sql
r25154 r25198 166 166 /* check DSRs that don't have a dataset. */ 167 167 select * from TMP_DATASET_RECORD where DATASET_NAME is null; 168 ==> 224 rows169 168 170 169 /* IVV names that don't match */ … … 180 179 and INDICATOR_VIEW_NAME not in (select INDICATOR_VIEW_NAME from TMP_INDICATOR_VIEW_DATASET) 181 180 ; 182 ==> removed the above 224 recs183 181 184 182 … … 419 417 from DATASET_RECORD 420 418 where MEASURE_VALUE is null 421 and (LABEL < " ") 419 and ((LABEL is null) or (LABEL <= " ")) 420 and ((VALUE_ATTRIBUTE_NAME is null) or (VALUE_ATTRIBUTE_NAME < " ")) 422 421 group by LABEL, VALUE_ATTRIBUTE_NAME 423 422 ; … … 425 424 delete from DATASET_RECORD 426 425 where MEASURE_VALUE is null 427 and ((LABEL is null) or (LABEL <= " "))428 ; 429 ==> 1190 426 and ((LABEL is null) or (LABEL <= " ")) 427 and ((VALUE_ATTRIBUTE_NAME is null) or (VALUE_ATTRIBUTE_NAME < " ")) 428 ; 430 429 431 430 delete from DATASET_RECORD … … 635 634 ; 636 635 */ 637 638 639 /* 640 issue appears to be dup keys for different IPVs with different measure values... 641 */ 642 select ivv.INDICATOR_VIEW_NAME, 643 ivv.DIMENSION_1_NAME, ivv.DIMENSION_1_VALUE, 644 ivv.DIMENSION_2_NAME, ivv.DIMENSION_2_VALUE, 645 ivv.PERIOD_DIMENSION_NAME, ivv.PERIOD_DIMENSION_VALUE 636 select count(ivv.INDICATOR_VIEW_NAME) 646 637 from INDICATOR_VIEW_VALUE ivv 647 638 left join TMP_DATASET_RECORD tdr on … … 653 644 and tdr.PERIOD_DIMENSION_NAME = ivv.PERIOD_DIMENSION_NAME 654 645 and tdr.PERIOD_DIMENSION_VALUE = ivv.PERIOD_DIMENSION_VALUE 655 order by INDICATOR_VIEW_NAME 656 ; 657 646 ; 647 648 /* Possible issue of dup keys for different IPVs with different measure values. */ 658 649 select * 659 650 from TMP_DATASET_RECORD … … 697 688 698 689 699 690 /* spot check */ 700 691 select INDICATOR_NAME, count(INDICATOR_VIEW_NAME) count_ivn, 701 692 MEASURE_NAME, … … 712 703 ; 713 704 714 715 705 select INDICATOR_VIEW_NAME, 716 706 MEASURE_NAME, … … 727 717 ; 728 718 729 730 731 719 select INDICATOR_NAME, count(INDICATOR_VIEW_NAME) count_ivn, 732 720 MEASURE_NAME, … … 742 730 limit 5000 743 731 ; 744 ==> 2451 745 746 747 select distinct(INDICATOR_NAME) from 748 ( 749 select INDICATOR_NAME from TMP_DATASET_RECORD 750 group by INDICATOR_NAME, MEASURE_NAME, 751 DIMENSION_1_NAME, DIMENSION_1_VALUE, 752 DIMENSION_2_NAME, DIMENSION_2_VALUE, 753 PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE 754 having count(INDICATOR_VIEW_NAME) > 1 755 ) as a 756 ; 757 ==> 114 758 759 will need a mapping file for source IP with a new name and all the assoc 760 views. Script would then need to create all the data sources and other 761 related intersections as well as renaming views and view values. 732 733 /* Show temp DSRs that have multiple view names */ 734 select distinct(INDICATOR_NAME) 735 from 736 ( 737 select INDICATOR_NAME from TMP_DATASET_RECORD 738 group by INDICATOR_NAME, MEASURE_NAME, 739 DIMENSION_1_NAME, DIMENSION_1_VALUE, 740 DIMENSION_2_NAME, DIMENSION_2_VALUE, 741 PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE 742 having count(INDICATOR_VIEW_NAME) > 1 743 ) as a 744 ; 762 745 763 746 /*------------------------------ END OF FILE ------------------------------*/ -
trunk/db/src/main/scripts/mysql/migrate/1_data-cleanup.sql
r25052 r25198 6 6 has been complteted. 7 7 8 NOTE: When mixing myisam tables need to use "collate" so that character set9 data can be compared. If using .sql exports then not needed.8 Also, not removing apparent orphaned IVV records as these are manually set 9 by Kim for special splitting of views to have a "constant series". 10 10 */ 11 12 11 13 12 /* REMOVE ORPHANED IPVs: delete invalid or orphaned IV records */ 14 13 delete from INDICATOR_VIEW where INDICATOR_NAME not in (select NAME from INDICATOR); 15 delete from INDICATOR_VIEW16 where DATASET_CATEGORY_NAME is null17 or DATASET_CATEGORY_NAME not in (select NAME from DATASET_CATEGORY)18 or DATASET_SERIES_NAME not in (select NAME from DATASET_SERIES)19 ;20 14 21 15 /* REMOVE ORPHANED IVV: … … 28 22 delete from INDICATOR_VIEW_VALUE 29 23 where INDICATOR_VIEW_NAME not in (select NAME from INDICATOR_VIEW) 30 ;31 32 delete from INDICATOR_VIEW_VALUE33 where DATASET_CATEGORY_NAME is null34 or DATASET_CATEGORY_NAME != (select DATASET_CATEGORY_NAME from INDICATOR_VIEW where NAME = INDICATOR_VIEW_NAME)35 or DATASET_SERIES_NAME != (select DATASET_SERIES_NAME from INDICATOR_VIEW where NAME = INDICATOR_VIEW_NAME)36 or DATASET_SERIES_VALUE not in (select VALUE from DATASET_SERIES_VALUE dsv where dsv.DATASET_SERIES_NAME = INDICATOR_VIEW_VALUE.DATASET_SERIES_NAME)37 or DATASET_CATEGORY_VALUE not in (select VALUE from DATASET_CATEGORY_VALUE dcv where dcv.DATASET_CATEGORY_NAME = INDICATOR_VIEW_VALUE.DATASET_CATEGORY_NAME)38 24 ; 39 25 -
trunk/db/src/main/scripts/mysql/migrate/4_data-iv_misc.sql
r25154 r25198 6 6 */ 7 7 update INDICATOR_VIEW set MEASURE_TITLE_OVERRIDE = (select MEASURE_TITLE_OVERRIDE from TMP_IPV_MEASURE where IPV_NAME3 = NAME); 8 9 8 10 9 -
trunk/db/src/main/scripts/mysql/migrate/6_data-iv_dimensions.sql
r25052 r25198 163 163 164 164 165 /* Clear the DEFAULT map name to none if the view doesn't have a geo dim */ 166 update INDICATOR_VIEW set MAP_NAME = 'None' 167 where CATEGORY_DIMENSION_NAME not in (select NAME from DIMENSION where GEOGRAPHY_FLAG='x'); 168 169 170 165 171 /* spot test */ 166 172 select * from INDICATOR_VIEW where PERIOD_DIMENSION_NAME = 'YearDefault' order by NAME; -
trunk/db/src/main/scripts/mysql/migrate/8_data-iv_dimension_usage.sql
r25052 r25198 77 77 78 78 79 80 79 /* Test that all views have a period dimension used. */ 81 80 select NAME, ACTIVE_FLAG … … 93 92 ) 94 93 ; 95 /* 96 ==> 321 that don't have an entry. This is because the PD is not used as a 97 series or cat. 98 */ 94 99 95 100 96 /* Insert PDN as a constant. Later will insert the value that matches the … … 104 100 select NAME, PERIOD_DIMENSION_NAME, 'constant' 105 101 from INDICATOR_VIEW 106 where not exists107 (select 1102 where NAME not in 103 (select INDICATOR_VIEW_NAME 108 104 from DIMENSION_TO_INDICATOR_VIEW 109 105 where INDICATOR_VIEW_NAME = NAME … … 111 107 ) 112 108 ; 109 110 111 /* Test for only a single IV to DIM usage. Min is 2 (period and dim1). */ 112 select * from DIMENSION_TO_INDICATOR_VIEW 113 group by INDICATOR_VIEW_NAME 114 having count(*) = 1 115 ; 116 117 118 /* Insert State as the default constant dim where a "Year" is the category and 119 there are not any other dimensions set. 120 */ 121 insert into DIMENSION_TO_INDICATOR_VIEW (INDICATOR_VIEW_NAME, DIMENSION_NAME, DIMENSION_USAGE) 122 select NAME, 'State', 'constant' 123 from INDICATOR_VIEW 124 where NAME in 125 (select INDICATOR_VIEW_NAME 126 from DIMENSION_TO_INDICATOR_VIEW 127 where INDICATOR_VIEW_NAME = NAME 128 and DIMENSION_NAME like 'Year%' 129 and DIMENSION_USAGE = 'category' 130 ) 131 and NAME in 132 (select INDICATOR_VIEW_NAME 133 from DIMENSION_TO_INDICATOR_VIEW 134 group by INDICATOR_VIEW_NAME 135 having count(*) = 1 136 ) 137 ; 138 139 /* Do the single dim test again */ 140 select * from DIMENSION_TO_INDICATOR_VIEW 141 group by INDICATOR_VIEW_NAME 142 having count(*) = 1 143 ; 144 113 145 114 146 … … 169 201 ; 170 202 203 delete dtiv from DIMENSION_TO_INDICATOR_VIEW dtiv, DIMENSION_TO_INDICATOR_VIEW dtiv2 204 where dtiv.DIMENSION_USAGE != 'constant' 205 and dtiv2.DIMENSION_USAGE = 'constant' 206 and dtiv.INDICATOR_VIEW_NAME = dtiv2.INDICATOR_VIEW_NAME 207 and dtiv.DIMENSION_NAME = dtiv2.DIMENSION_NAME 208 ; 209 210 delete dtiv from DIMENSION_TO_INDICATOR_VIEW dtiv, DIMENSION_TO_INDICATOR_VIEW dtiv2 211 where dtiv.DIMENSION_USAGE != 'series' 212 and dtiv2.DIMENSION_USAGE = 'series' 213 and dtiv.INDICATOR_VIEW_NAME = dtiv2.INDICATOR_VIEW_NAME 214 and dtiv.DIMENSION_NAME = dtiv2.DIMENSION_NAME 215 ; 216 171 217 172 218 -
trunk/db/src/main/scripts/mysql/migrate/9_data-iv_dimension_value.sql
r25095 r25198 91 91 order by INDICATOR_VIEW_NAME, DIMENSION_NAME, DIMENSION_VALUE 92 92 ; 93 93 /* kim said to leave these 94 94 delete from DIMENSION_VALUE_TO_IND_VIEW 95 95 where not exists … … 101 101 ) 102 102 ; 103 */ 104 103 105 104 106 /* test to make sure that every view has dim values */
Note: See TracChangeset
for help on using the changeset viewer.