source: main/trunk/db/src/main/scripts/mysql/migrate/11_data-dataset_record.sql @ 25154

Last change on this file since 25154 was 25154, checked in by GarthBraithwaite_STG, 3 months ago

db - move split to copy and simplified Kims and script. removed ibisph. prefix. ipv rename to catch ip name. Added default view template record and set IV to default.

File size: 25.7 KB
Line 
1/* Create the dataset value records staight from the IVV records which were all
2        cleaned up in the data-ivv-dimensions.sql code.
3*/
4select count(*) from INDICATOR_VIEW_VALUE;
5select concat(INDICATOR_VIEW_NAME, 
6                        '|', DIMENSION_1_NAME, ':', DIMENSION_1_VALUE,
7                        '|', DIMENSION_2_NAME, ':', DIMENSION_2_VALUE,
8                        '|', PERIOD_DIMENSION_NAME, ':', PERIOD_DIMENSION_VALUE
9                ), count(1),
10        INDICATOR_VIEW_NAME, DATASET_CATEGORY_VALUE, DATASET_SERIES_VALUE,
11        DIMENSION_1_NAME,  DIMENSION_2_NAME,  PERIOD_DIMENSION_NAME, 
12        DIMENSION_1_VALUE, DIMENSION_2_VALUE, PERIOD_DIMENSION_VALUE, 
13        VALUE
14from INDICATOR_VIEW_VALUE
15where INDICATOR_VIEW_NAME not like 'Demo%'
16  and ACTIVE_FLAG = 'x'
17group by 1
18having count(1) > 1
19order by 2 desc;
20
21
22
23drop table if exists TMP_DATASET_RECORD;
24
25create table TMP_DATASET_RECORD(
26  INDICATOR_VIEW_NAME             varchar  (400),
27  INDICATOR_NAME                  varchar  (400),
28  DATASET_NAME                    varchar  (200), 
29  MEASURE_NAME                    varchar  (100), 
30  /* */
31  PERIOD_DIMENSION_NAME           varchar  (100)  not null,
32  PERIOD_DIMENSION_VALUE          varchar  (100)  not null,
33  DIMENSION_1_NAME                varchar  (100)  not null,
34  DIMENSION_1_VALUE               varchar  (100)  not null,
35  DIMENSION_2_NAME                varchar  (100),
36  DIMENSION_2_VALUE               varchar  (100),
37  /* */
38  NUMERATOR                       double,
39  DENOMINATOR                     double,
40  VALUE                           double,               /* can be null because if insufficient etc. 0 is not correct and * or ** is NOT allowed! */
41  LOWER_CONFIDENCE_VALUE          double,
42  UPPER_CONFIDENCE_VALUE          double,
43  LABEL                           varchar  (100),
44  NOTE                            varchar (2000),
45  /* */
46  SORT_ORDER                      int             default null,
47  ACTIVE_FLAG                     varchar    (1),
48  /* */
49  constraint TMP_DATASET_RECORD_UK
50    unique key (INDICATOR_VIEW_NAME, PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE,
51                DIMENSION_1_NAME, DIMENSION_1_VALUE, DIMENSION_2_NAME, DIMENSION_2_VALUE
52        )
53);
54
55
56/* do a straight grab - dump in all the values.  Note have to do a temp table
57        insert that does not have a composite PK because the dataset name is more
58        general as it combines geocnty, georegion, geosarea etc and thus has the
59        same multiple state records which will cause dup PK issues.
60
61        first need to set the IV values to numeric (or null if blank) so that the
62        value can be loaded into the numeric only value DATASET_RECORD.  These
63        values are later used to set the value attribute.
64        Note that the admin app only allows **, null, '', and numeric values. 
65        The -1 or * should never happen but there are records that have a label
66       
67        -1 == ** == suppressed.
68        -20 == '' or null == missing/no data.
69        -99 == numeric * 1 != - catch everything else - should never happen.
70        IBISQ allows for *, **, NA?, and ''
71*/
72delete from TMP_DATASET_RECORD;
73
74insert into TMP_DATASET_RECORD (
75        INDICATOR_VIEW_NAME, 
76        PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE, 
77        DIMENSION_1_NAME, DIMENSION_1_VALUE, 
78        DIMENSION_2_NAME, DIMENSION_2_VALUE, 
79        VALUE, LOWER_CONFIDENCE_VALUE, UPPER_CONFIDENCE_VALUE, NUMERATOR, DENOMINATOR, 
80        LABEL, NOTE, ACTIVE_FLAG)
81select 
82        INDICATOR_VIEW_NAME,
83        PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE, 
84        DIMENSION_1_NAME, DIMENSION_1_VALUE,
85        DIMENSION_2_NAME, DIMENSION_2_VALUE, 
86        case 
87                when (VALUE like '%*%') and (LABEL like '%uppress%small%') then  -1 
88                when (VALUE like '%*%') and (LABEL like '%uppress%')       then  -1 
89                when (VALUE like '%*%') and (LABEL like '%ery%nstable%')   then -11 
90                when (VALUE like '%*%') and (LABEL like '%nstable%')       then -10 
91                when (VALUE like '%*%') and (LABEL like '%not reliable%')  then -10 
92                when (VALUE like '%*%') and (LABEL like '%ot%vail%')       then -20 
93                when (VALUE like '%*%') and (LABEL like 'NA')              then -20 
94                when (VALUE like '%*%') and (LABEL like '%o%ata%')         then -20 
95                when (VALUE like '%*%') and (LABEL like '%ata%ot%')        then -20 
96                when (VALUE like '%*%') and (LABEL like '%nsuff%')         then -21
97                when (VALUE like '%*%') and (LABEL like '%ncomplete%')     then -21
98                when (VALUE like '%*%')                                    then  -1 
99                when (VALUE = '')                                          then -20
100                when (VALUE*1 != VALUE)                                    then -99
101                else  VALUE
102        end,
103        case 
104                when (LOWER_CONFIDENCE_VALUE = '**') then -1 
105                when (LOWER_CONFIDENCE_VALUE = '')   then -20
106                when (LOWER_CONFIDENCE_VALUE*1 != LOWER_CONFIDENCE_VALUE) then -99
107                else  LOWER_CONFIDENCE_VALUE
108        end,
109        case 
110                when (UPPER_CONFIDENCE_VALUE = '**') then -1 
111                when (UPPER_CONFIDENCE_VALUE = '')   then -20
112                when (UPPER_CONFIDENCE_VALUE*1 != UPPER_CONFIDENCE_VALUE) then -99
113                else  UPPER_CONFIDENCE_VALUE
114        end,
115        case 
116                when (NUMERATOR = '**') then -1 
117                when (NUMERATOR = '')   then -20
118                when (NUMERATOR*1 != NUMERATOR) then -99
119                else  NUMERATOR*1
120        end,
121        case 
122                when (DENOMINATOR = '**') then -1 
123                when (DENOMINATOR = '')   then -20
124                when (DENOMINATOR*1 != DENOMINATOR) then -99
125                else  DENOMINATOR*1
126        end,
127        LABEL, NOTE, ACTIVE_FLAG
128from INDICATOR_VIEW_VALUE
129;
130
131
132/* Blanket/general temp record's dataset name update.  Match the IV name and
133        dimension names against the TMP_INDICATOR_VIEW_DATASET's values to get the
134        tivd dataset name.
135*/
136update TMP_DATASET_RECORD tdr
137        inner join 
138        (select INDICATOR_VIEW_NAME, INDICATOR_NAME, MEASURE_NAME, DIMENSION_1_NAME, DIMENSION_2_NAME, PERIOD_DIMENSION_NAME, DATASET_NAME
139         from TMP_INDICATOR_VIEW_DATASET
140        ) tivd_ij
141        on ( 
142                (tivd_ij.INDICATOR_VIEW_NAME   = tdr.INDICATOR_VIEW_NAME)
143                 and ( tivd_ij.DIMENSION_1_NAME  = tdr.DIMENSION_1_NAME)
144                 and ((tivd_ij.DIMENSION_2_NAME  = tdr.DIMENSION_2_NAME) or (tdr.DIMENSION_2_NAME is null))
145                 and ( tivd_ij.PERIOD_DIMENSION_NAME = tdr.PERIOD_DIMENSION_NAME)
146        )
147set tdr.DATASET_NAME   = tivd_ij.DATASET_NAME, 
148        tdr.MEASURE_NAME   = tivd_ij.MEASURE_NAME,
149        tdr.INDICATOR_NAME = tivd_ij.INDICATOR_NAME
150where tdr.DATASET_NAME is null
151;
152
153
154
155/* should never have these */
156select * from TMP_DATASET_RECORD tdr
157where tdr.DATASET_NAME is not null
158and not exists
159        (select INDICATOR_VIEW_NAME
160         from TMP_INDICATOR_VIEW_DATASET ij
161         where (ij.INDICATOR_VIEW_NAME   = tdr.INDICATOR_VIEW_NAME)
162        )
163group by INDICATOR_VIEW_NAME
164;
165
166/* check DSRs that don't have a dataset. */
167select * from TMP_DATASET_RECORD where DATASET_NAME is null;
168==> 224 rows
169
170/* IVV names that don't match */
171select distinct INDICATOR_VIEW_NAME
172from TMP_DATASET_RECORD
173where DATASET_NAME is null
174  and INDICATOR_VIEW_NAME not in (select INDICATOR_VIEW_NAME from TMP_INDICATOR_VIEW_DATASET)
175;
176
177/* These appear to be orphans.  Not sure how they got this far as data-cleanup.sql */
178delete from TMP_DATASET_RECORD
179where DATASET_NAME is null
180  and INDICATOR_VIEW_NAME not in (select INDICATOR_VIEW_NAME from TMP_INDICATOR_VIEW_DATASET)
181;
182==> removed the above 224 recs
183
184
185/* the above issues seem to be because there's not SO1 DS for the dataset name */
186select * from DATA_SOURCE_TO_INDICATOR_VIEW
187where INDICATOR_VIEW_NAME in 
188        (select distinct INDICATOR_VIEW_NAME
189         from TMP_DATASET_RECORD
190         where DATASET_NAME is null
191           and INDICATOR_VIEW_NAME not in 
192                (select INDICATOR_VIEW_NAME from TMP_INDICATOR_VIEW_DATASET)
193        )
194;
195
196
197/* Should not have these.  IF FOUND need to be researched as to why - see above.
198        3/11/2018 run has 40 with most being PopDemo and all being State, US.  The
199        current issue is NOT the State, US it's that for these the first IVV and/or
200        IV are being used to determine the DIM1 and DIM2.  In these rare cases the
201        dim usage is switched e.g. the first view uses RaceEth then GeoState.  All
202        view uses after that are using GeoState first and no DIM2 etc.  So those
203        records will have to be manuall corrected...
204*/
205select * from TMP_DATASET_RECORD tdr
206where tdr.DATASET_NAME is null
207and exists
208        (select INDICATOR_VIEW_NAME
209         from TMP_INDICATOR_VIEW_DATASET ij
210         where (ij.INDICATOR_VIEW_NAME   = tdr.INDICATOR_VIEW_NAME)
211        )
212group by INDICATOR_VIEW_NAME
213;
214
215
216/* check the IP names not being found.  Since turning off the active flag
217        restriction there should not be any BUT for the temp NM run maybe the
218        cleanup didn't get em all?
219*/
220select * from TMP_DATASET_RECORD tdr
221where tdr.DATASET_NAME is null
222and exists
223        (select INDICATOR_VIEW_NAME
224         from TMP_INDICATOR_VIEW_DATASET ij
225         where (ij.INDICATOR_VIEW_NAME   = tdr.INDICATOR_VIEW_NAME)
226        )
227group by INDICATOR_VIEW_NAME
228;
229
230
231select * from TMP_DATASET_RECORD tdr
232where tdr.DATASET_NAME is null
233and exists
234        (select INDICATOR_VIEW_NAME, DIMENSION_1_NAME, DIMENSION_2_NAME, PERIOD_DIMENSION_NAME, DATASET_NAME
235         from TMP_INDICATOR_VIEW_DATASET ij
236         where (ij.INDICATOR_VIEW_NAME   = tdr.INDICATOR_VIEW_NAME)
237           and (ij.DIMENSION_1_NAME  = tdr.DIMENSION_1_NAME)
238           and ((ij.DIMENSION_2_NAME = tdr.DIMENSION_2_NAME) or (tdr.DIMENSION_2_NAME is null))
239           and (ij.PERIOD_DIMENSION_NAME = tdr.PERIOD_DIMENSION_NAME)
240        )
241;
242
243
244
245/* now insert the unique temp records into the actual DATASET_RECORD table */
246select * from DATASET_RECORD;
247delete from DATASET_RECORD;
248
249insert into DATASET_RECORD (
250        NAME, DATASET_NAME, 
251        PERIOD_DIMENSION_NAME,  DIMENSION_1_NAME,  DIMENSION_2_NAME, 
252        PERIOD_DIMENSION_VALUE, DIMENSION_1_VALUE, DIMENSION_2_VALUE,
253        MEASURE_VALUE, 
254        VALUE_ATTRIBUTE_NAME,
255        LABEL, NOTE, ACTIVE_FLAG
256)
257select 
258        concat(DATASET_NAME, 
259                '|', PERIOD_DIMENSION_NAME,   ':', PERIOD_DIMENSION_VALUE,
260                '|', DIMENSION_1_NAME, ':', DIMENSION_1_VALUE,
261                '|', ifnull(DIMENSION_2_NAME, 'NA'), ':', ifnull(DIMENSION_2_VALUE, 'NA')
262        ),
263        ifnull(DATASET_NAME, 'garth'),
264        PERIOD_DIMENSION_NAME,  DIMENSION_1_NAME,  DIMENSION_2_NAME, 
265        PERIOD_DIMENSION_VALUE, DIMENSION_1_VALUE, DIMENSION_2_VALUE, 
266        case 
267                when (VALUE < 0) then null 
268                else VALUE
269        end,
270        case 
271                when (VALUE =  -1) then 'Suppressed'
272                when (VALUE =  -2) then 'SurveySuppressed'
273                when (VALUE = -10) then 'Unstable'
274                when (VALUE = -11) then 'VeryUnstable'
275                when (VALUE = -20) then 'NoData'
276                when (VALUE = -21) then 'Insufficient'
277                when (VALUE <   0) then 'ERROR'
278                else null
279        end,
280        LABEL, NOTE, ACTIVE_FLAG
281from TMP_DATASET_RECORD
282where DATASET_NAME is not null
283group by 1
284;
285
286
287/* update the VA based on the label and/or the special values */
288update DATASET_RECORD
289set VALUE_ATTRIBUTE_NAME = 'ERROR'
290where (VALUE_ATTRIBUTE_NAME is null)
291  and (MEASURE_VALUE = -9)
292;
293
294
295select * from DATASET_RECORD
296where VALUE_ATTRIBUTE_NAME = 'ERROR'
297;
298
299
300
301
302/* Remove redundant series or cat dim values that match/are period dim values.
303if dimension 1 is the same as period then put dim 2 into dim 1
304update DATASET_RECORD set DIMENSION_1_VALUE = '!~~~!'           where ((DIMENSION_1_NAME = PERIOD_DIMENSION_NAME) and (DIMENSION_1_VALUE != PERIOD_DIMENSION_VALUE));
305update DATASET_RECORD set DIMENSION_2_VALUE = '!~~~!' where ((DIMENSION_2_NAME = PERIOD_DIMENSION_NAME) and (DIMENSION_2_VALUE != PERIOD_DIMENSION_VALUE));
306*/
307select * from DATASET_RECORD
308where ((DIMENSION_1_NAME = PERIOD_DIMENSION_NAME) and (DIMENSION_1_VALUE != PERIOD_DIMENSION_VALUE))
309   or ((DIMENSION_2_NAME = PERIOD_DIMENSION_NAME) and (DIMENSION_2_VALUE != PERIOD_DIMENSION_VALUE))
310;
311select * from DATASET_RECORD
312where ((DIMENSION_1_NAME = PERIOD_DIMENSION_NAME) and (DIMENSION_1_VALUE = PERIOD_DIMENSION_VALUE))
313   or ((DIMENSION_2_NAME = PERIOD_DIMENSION_NAME) and (DIMENSION_2_VALUE = PERIOD_DIMENSION_VALUE))
314;
315
316
317/* did have it matching on the VALUE as well but there's no value here.  If the
318        dimension name matches the period then get rid of it - period should alreay
319        be correct from previous updates.
320update DATASET_RECORD set DIMENSION_1_VALUE = DIMENSION_2_VALUE where (DIMENSION_1_NAME = PERIOD_DIMENSION_NAME);
321update DATASET_RECORD set DIMENSION_1_NAME  = DIMENSION_2_NAME  where (DIMENSION_1_NAME = PERIOD_DIMENSION_NAME);
322
323update DATASET_RECORD set DIMENSION_2_VALUE = ''     where DIMENSION_1_NAME = DIMENSION_2_NAME;
324update DATASET_RECORD set DIMENSION_2_NAME  = 'None' where DIMENSION_1_NAME = DIMENSION_2_NAME;
325
326update DATASET_RECORD set DIMENSION_2_VALUE = ''     where (DIMENSION_2_NAME = PERIOD_DIMENSION_NAME);
327update DATASET_RECORD set DIMENSION_2_NAME  = 'None' where (DIMENSION_2_NAME = PERIOD_DIMENSION_NAME);
328*/
329
330
331/* Seeing issues with duplicate records.  Show Lois so she can determine the
332        datasource issue.  This query takes a while to run...
333
334select *
335from TMP_DATASET_RECORD t1
336where exists
337        (select 1
338         from TMP_DATASET_RECORD t2
339     where  t1.INDICATOR_NAME = t2.INDICATOR_NAME
340                and t1.DATASET_NAME != t2.DATASET_NAME
341                and t1.MEASURE_NAME = t2.MEASURE_NAME
342                and t1.PERIOD_DIMENSION_NAME = t2.PERIOD_DIMENSION_NAME
343        and t1.PERIOD_DIMENSION_VALUE = t2.PERIOD_DIMENSION_VALUE
344        and t1.DIMENSION_1_NAME = t2.DIMENSION_1_NAME
345        and t1.DIMENSION_1_VALUE = t2.DIMENSION_1_VALUE
346        and ((t1.DIMENSION_2_NAME = t2.DIMENSION_2_NAME)
347                        or
348                        (t1.DIMENSION_2_NAME is null and t2.DIMENSION_2_NAME is null))
349        and ((t1.DIMENSION_2_VALUE = t2.DIMENSION_2_VALUE)
350                        or
351                        (t1.DIMENSION_2_VALUE is null and t2.DIMENSION_2_VALUE is null))
352                and t1.VALUE = t2.VALUE
353         limit 1, 1
354        )
355order by t1.DATASET_NAME
356;
357*/
358
359
360
361/* Shouldn't see any of these */
362select * 
363from DATASET_RECORD t1
364where exists 
365        (select 1
366         from DATASET_RECORD t2
367     where  t1.DATASET_NAME = t2.DATASET_NAME
368                and t1.PERIOD_DIMENSION_NAME = t2.PERIOD_DIMENSION_NAME
369        and t1.PERIOD_DIMENSION_VALUE = t2.PERIOD_DIMENSION_VALUE
370        and t1.DIMENSION_1_NAME = t2.DIMENSION_1_NAME
371        and t1.DIMENSION_1_VALUE = t2.DIMENSION_1_VALUE
372        and ((t1.DIMENSION_2_NAME = t2.DIMENSION_2_NAME) 
373                        or 
374                        (t1.DIMENSION_2_NAME is null and t2.DIMENSION_2_NAME is null))
375        and ((t1.DIMENSION_2_VALUE = t2.DIMENSION_2_VALUE) 
376                        or 
377                        (t1.DIMENSION_2_VALUE is null and t2.DIMENSION_2_VALUE is null))
378                and t1.MEASURE_VALUE = t2.MEASURE_VALUE
379         limit 1, 1
380        )
381order by t1.DATASET_NAME
382;
383
384
385/* These are likely dups or at least dups due to data source ip.mn.dsn */
386select * 
387from DATASET_RECORD t1
388where exists 
389        (select 1
390         from DATASET_RECORD t2
391     where  t1.DATASET_NAME = t2.DATASET_NAME
392                and t1.PERIOD_DIMENSION_NAME = t2.PERIOD_DIMENSION_NAME
393        and t1.PERIOD_DIMENSION_VALUE = t2.PERIOD_DIMENSION_VALUE
394        and t1.DIMENSION_1_NAME = t2.DIMENSION_1_NAME
395        and t1.DIMENSION_1_VALUE = t2.DIMENSION_1_VALUE
396        and ((t1.DIMENSION_2_NAME = t2.DIMENSION_2_NAME) 
397                        or 
398                        (t1.DIMENSION_2_NAME is null and t2.DIMENSION_2_NAME is null))
399        and ((t1.DIMENSION_2_VALUE = t2.DIMENSION_2_VALUE) 
400                        or 
401                        (t1.DIMENSION_2_VALUE is null and t2.DIMENSION_2_VALUE is null))
402                and t1.MEASURE_VALUE = t2.MEASURE_VALUE
403         limit 1, 1
404        )
405order by t1.DATASET_NAME
406;
407
408
409/*
410stuff added 7/27/2019 to clean out usages
411now that we have dataset records, clean out invalid dim usages
412*/
413
414/* Delete DSRs that do NOT have a measure value and blank/null label.  There's
415        a bunch of blank measure values that have a blank label and have a "suppresed"
416        or No Data value attribute set.
417*/
418select concat('*', LABEL, '*'), VALUE_ATTRIBUTE_NAME, count(*) c
419from DATASET_RECORD
420where MEASURE_VALUE is null
421  and  (LABEL < " ")
422group by LABEL, VALUE_ATTRIBUTE_NAME
423;
424
425delete from DATASET_RECORD
426where MEASURE_VALUE is null
427  and ((LABEL is null) or (LABEL <= ""))
428;
429==> 1190
430
431delete from DATASET_RECORD
432where MEASURE_VALUE is null
433  and LABEL = '**'
434  and VALUE_ATTRIBUTE_NAME is null
435;
436
437
438
439
440/* see all DTD records that do not have any DS records. */
441select * from DIMENSION_TO_DATASET dtd
442where dtd.DIMENSION_USAGE = 'dimension1'
443  and not exists 
444        (select 1 from DATASET_RECORD dr
445         where dr.DATASET_NAME = dtd.DATASET_NAME
446           and dr.DIMENSION_1_NAME = dtd.DIMENSION_NAME
447        )
448  and DIMENSION_NAME like 'Geo%'
449order by  dtd.DATASET_NAME,  dtd.DIMENSION_NAME
450;
451
452delete from DIMENSION_TO_DATASET
453where DIMENSION_USAGE = 'dimension1'
454  and DIMENSION_NAME like 'Geo%'
455  and not exists 
456        (select 1 from DATASET_RECORD dr
457         where dr.DATASET_NAME = DIMENSION_TO_DATASET.DATASET_NAME
458           and dr.DIMENSION_1_NAME = DIMENSION_TO_DATASET.DIMENSION_NAME
459        )
460;
461
462
463select * from DIMENSION_TO_DATASET dtd
464where dtd.DIMENSION_USAGE = 'dimension2'
465  and not exists 
466        (select 1 from DATASET_RECORD dr
467         where dr.DATASET_NAME = dtd.DATASET_NAME
468           and dr.DIMENSION_2_NAME = dtd.DIMENSION_NAME
469        )
470order by  dtd.DATASET_NAME,  dtd.DIMENSION_NAME
471;
472
473delete from DIMENSION_TO_DATASET
474where DIMENSION_USAGE = 'dimension2'
475  and DIMENSION_NAME like 'Geo%'
476  and not exists 
477        (select 1 from DATASET_RECORD dr
478         where dr.DATASET_NAME = DIMENSION_TO_DATASET.DATASET_NAME
479           and dr.DIMENSION_2_NAME = DIMENSION_TO_DATASET.DIMENSION_NAME
480        )
481;
482
483
484/*
485Still have conflicting dim usage where 2 datasets have same dim homicide example:
486both have legit State Measure Values - just different period dimension!!!
487has both state and country values by year.
488
489select * from
490dataset_record
491where dataset_name = 'Homicide.RateAADths100K.CDC_WONDER'   
492
493select * from
494dataset_record
495where dataset_name = 'Homicide.RateAADths100K.BVRHS_Death'
496*/ 
497
498
499
500/* Some Dataset Records have dim1 & 2 as a geo.  Script should be checked to see
501        if this can be correct.  Most are State and Country.  Other state and country
502        records worked so this  might have to be done manually.  For now put a prefix
503        on the records so that they are easily seen.
504*/
505select * from DATASET_RECORD
506where  DIMENSION_1_NAME like 'Geo%'
507  and DIMENSION_2_NAME like 'Geo%'
508;
509
510
511
512
513/* Fix dual usage where the script puts the dual used dimension as dim1.  When
514        dual usage Sex or RaceEthDOH etc is used as both a dim1 (when doing straight
515        chart, or as dim2 when doing a 2d chart like GeoCnty by Sex or Race). 
516
517        Since this is only seen on dim 1 (because the script populates it that way)
518        set the usage to dim 1 or 2 for those that have a dual use. 
519        There's about 45 of these records that need updating.  These associate with
520        thousands of actual dataset records...
521
522        For testing added this to the where:
523          and DATASET_NAME = 'AlcoholDrinkDriveAdult.PctDrinkDrive.BRFSS'
524
525        Can do a before update and after to see the dimension1or2 value change.
526        select * from DIMENSION_TO_DATASET;
527        where DATASET_NAME = 'AlcoholDrinkDriveAdult.PctDrinkDrive.BRFSS'
528*/
529select * from DIMENSION_TO_DATASET dtd
530where DIMENSION_USAGE = 'dimension1'
531  and DIMENSION_NAME in   
532        (select dr1.DIMENSION_1_NAME
533         from DATASET_RECORD dr1
534         where dr1.DATASET_NAME = dtd.DATASET_NAME
535           and dr1.DIMENSION_1_NAME in (select DIMENSION_2_NAME from DATASET_RECORD dr2 where dr2.DATASET_NAME = dr1.DATASET_NAME)
536        )
537;
538
539update DIMENSION_TO_DATASET dtd
540set DIMENSION_USAGE = 'dimension1or2'
541where DIMENSION_USAGE = 'dimension1'
542  and DIMENSION_NAME in   
543        (select dr1.DIMENSION_1_NAME
544         from DATASET_RECORD dr1
545         where dr1.DATASET_NAME = dtd.DATASET_NAME
546           and dr1.DIMENSION_1_NAME in (select DIMENSION_2_NAME from DATASET_RECORD dr2 where dr2.DATASET_NAME = dr1.DATASET_NAME)
547        )
548;
549
550
551/* Here's some tests that show the dataset records that are associated with this.
552        First do the dim1's that have a matching dim2 (usually the most used).
553
554        Since these are the DSR, the view and DSRs are already setup correctly.
555        These queries are simply used to show the dual use.
556*/
557select count(*)
558from DATASET_RECORD dr1
559where dr1.DIMENSION_2_NAME in (select DIMENSION_1_NAME from DATASET_RECORD dr2 where dr2.DATASET_NAME = dr1.DATASET_NAME)
560order by dr1.DATASET_NAME, dr1.DIMENSION_1_NAME, dr1.DIMENSION_1_VALUE, dr1.DIMENSION_2_NAME, dr1.DIMENSION_2_VALUE
561;
562
563select count(*)
564from DATASET_RECORD dr1
565where dr1.DIMENSION_1_NAME in (select DIMENSION_2_NAME from DATASET_RECORD dr2 where dr2.DATASET_NAME = dr1.DATASET_NAME)
566order by dr1.DATASET_NAME, dr1.DIMENSION_1_NAME, dr1.DIMENSION_1_VALUE, dr1.DIMENSION_2_NAME, dr1.DIMENSION_2_VALUE
567;
568
569
570
571/* DIMENSION_1_OR_2 issues to be aware of.  Of the dual usages, some have
572        multiples.  For NM; of the mults Homicide had some series switched/out of
573        sync.  Did not dig into it but could be missing values and the way the
574        script loops through and populates the dims for these records?
575       
576        In anycase something to be aware of watch for.
577*/
578select count(*) from DIMENSION_TO_DATASET
579where DIMENSION_USAGE = 'dimension1or2';
580
581select * from DIMENSION_TO_DATASET
582where DIMENSION_USAGE = 'dimension1or2';
583/*
584  and DATASET_NAME in
585        (select DATASET_NAME
586         from DIMENSION_TO_DATASET
587     where DIMENSION_USAGE = 'dimension1or2'
588         group by DATASET_NAME
589         having count(*) > 1
590        )
591;
592*/
593
594
595/* even indexed this takes forever to run
596select * from INDICATOR_VIEW_VALUE ivv
597where not exists
598        (
599        select 1
600        from TMP_DATASET_RECORD tdr
601        where tdr.INDICATOR_VIEW_NAME = ivv.INDICATOR_VIEW_NAME
602          and tdr.DIMENSION_1_NAME  = ivv.DIMENSION_1_NAME
603          and tdr.DIMENSION_1_VALUE = ivv.DIMENSION_1_VALUE
604          and tdr.DIMENSION_2_NAME  = ivv.DIMENSION_2_NAME
605          and tdr.DIMENSION_2_VALUE = ivv.DIMENSION_2_VALUE
606          and tdr.PERIOD_DIMENSION_NAME  = ivv.PERIOD_DIMENSION_NAME
607          and tdr.PERIOD_DIMENSION_VALUE = ivv.PERIOD_DIMENSION_VALUE
608        )
609order by INDICATOR_VIEW_NAME
610limit 100
611;
612*/
613
614
615/* this does the same thing but MySQL doesn't support minus
616select INDICATOR_VIEW_NAME,
617        DIMENSION_1_NAME, DIMENSION_1_VALUE,
618        DIMENSION_2_NAME, DIMENSION_2_VALUE,
619        PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE
620from INDICATOR_VIEW_VALUE
621minus
622select ivv.INDICATOR_VIEW_NAME,
623        ivv.DIMENSION_1_NAME, ivv.DIMENSION_1_VALUE,
624        ivv.DIMENSION_2_NAME, ivv.DIMENSION_2_VALUE,
625        ivv.PERIOD_DIMENSION_NAME, ivv.PERIOD_DIMENSION_VALUE
626from INDICATOR_VIEW_VALUE ivv, TMP_DATASET_RECORD tdr
627where tdr.INDICATOR_VIEW_NAME = ivv.INDICATOR_VIEW_NAME
628  and tdr.DIMENSION_1_NAME  = ivv.DIMENSION_1_NAME
629  and tdr.DIMENSION_1_VALUE = ivv.DIMENSION_1_VALUE
630  and tdr.DIMENSION_2_NAME  = ivv.DIMENSION_2_NAME
631  and tdr.DIMENSION_2_VALUE = ivv.DIMENSION_2_VALUE
632  and tdr.PERIOD_DIMENSION_NAME  = ivv.PERIOD_DIMENSION_NAME
633  and tdr.PERIOD_DIMENSION_VALUE = ivv.PERIOD_DIMENSION_VALUE
634order by INDICATOR_VIEW_NAME
635;
636*/
637
638
639/*
640issue appears to be dup keys for different IPVs with different measure values...
641*/
642select ivv.INDICATOR_VIEW_NAME, 
643        ivv.DIMENSION_1_NAME, ivv.DIMENSION_1_VALUE,
644        ivv.DIMENSION_2_NAME, ivv.DIMENSION_2_VALUE,
645        ivv.PERIOD_DIMENSION_NAME, ivv.PERIOD_DIMENSION_VALUE
646from INDICATOR_VIEW_VALUE ivv
647left join TMP_DATASET_RECORD tdr on
648  tdr.INDICATOR_VIEW_NAME = ivv.INDICATOR_VIEW_NAME
649  and tdr.DIMENSION_1_NAME  = ivv.DIMENSION_1_NAME
650  and tdr.DIMENSION_1_VALUE = ivv.DIMENSION_1_VALUE
651  and tdr.DIMENSION_2_NAME  = ivv.DIMENSION_2_NAME
652  and tdr.DIMENSION_2_VALUE = ivv.DIMENSION_2_VALUE
653  and tdr.PERIOD_DIMENSION_NAME  = ivv.PERIOD_DIMENSION_NAME
654  and tdr.PERIOD_DIMENSION_VALUE = ivv.PERIOD_DIMENSION_VALUE
655order by INDICATOR_VIEW_NAME
656;
657
658select * 
659from TMP_DATASET_RECORD
660group by INDICATOR_NAME, MEASURE_NAME,
661        DIMENSION_1_NAME, DIMENSION_1_VALUE,
662        DIMENSION_2_NAME, DIMENSION_2_VALUE,
663        PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE
664having count(MEASURE_NAME) > 1
665  and count(DIMENSION_1_NAME) > 1
666  and count(DIMENSION_1_VALUE) > 1
667  and count(DIMENSION_2_NAME) > 1
668  and count(DIMENSION_2_VALUE) > 1
669  and count(PERIOD_DIMENSION_NAME) > 1
670  and count(PERIOD_DIMENSION_VALUE) > 1
671;
672
673
674
675
676/* test any datasets that don't have any records? */
677select * from DATASET
678where NAME not in (select DATASET_NAME from DATASET_RECORD)
679;
680
681/* test IPs that do NOT have a dataset */
682select NAME from INDICATOR
683where NAME not in (select INDICATOR_NAME from DATASET)
684;
685
686/* show DSR record count by dataset */
687select DATASET_NAME, count(DATASET_NAME) 
688from DATASET_RECORD
689group by DATASET_NAME
690;
691
692/* show IVVs count by IV */
693select INDICATOR_VIEW_NAME, count(INDICATOR_VIEW_NAME) 
694from INDICATOR_VIEW_VALUE
695group by INDICATOR_VIEW_NAME
696;
697
698
699
700select INDICATOR_NAME, count(INDICATOR_VIEW_NAME) count_ivn,
701        MEASURE_NAME,
702        PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE,
703        DIMENSION_1_NAME, DIMENSION_1_VALUE,
704        DIMENSION_2_NAME, DIMENSION_2_VALUE
705from TMP_DATASET_RECORD
706where INDICATOR_NAME like 'LBW%'
707group by INDICATOR_NAME, MEASURE_NAME,
708        DIMENSION_1_NAME, DIMENSION_1_VALUE,
709        DIMENSION_2_NAME, DIMENSION_2_VALUE,
710        PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE
711having count(INDICATOR_VIEW_NAME) > 1
712;
713
714
715select INDICATOR_VIEW_NAME, 
716        MEASURE_NAME,
717        PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE,
718        DIMENSION_1_NAME, DIMENSION_1_VALUE,
719        DIMENSION_2_NAME, DIMENSION_2_VALUE
720from TMP_DATASET_RECORD
721where INDICATOR_NAME like 'LBW%'
722  and PERIOD_DIMENSION_NAME = 'Year'
723  and PERIOD_DIMENSION_VALUE = 2019
724  and DIMENSION_1_NAME = 'Country'
725  and DIMENSION_1_VALUE = 'US'
726  and MEASURE_NAME = 'PctBirthLow'
727;
728
729
730
731select INDICATOR_NAME, count(INDICATOR_VIEW_NAME) count_ivn,
732        MEASURE_NAME,
733        PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE,
734        DIMENSION_1_NAME, DIMENSION_1_VALUE,
735        DIMENSION_2_NAME, DIMENSION_2_VALUE
736from TMP_DATASET_RECORD
737group by INDICATOR_NAME, MEASURE_NAME,
738        DIMENSION_1_NAME, DIMENSION_1_VALUE,
739        DIMENSION_2_NAME, DIMENSION_2_VALUE,
740        PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE
741having count(INDICATOR_VIEW_NAME) > 1
742limit 5000
743;
744==> 2451 
745
746
747select distinct(INDICATOR_NAME) from
748(
749select INDICATOR_NAME from TMP_DATASET_RECORD
750group by INDICATOR_NAME, MEASURE_NAME,
751        DIMENSION_1_NAME, DIMENSION_1_VALUE,
752        DIMENSION_2_NAME, DIMENSION_2_VALUE,
753        PERIOD_DIMENSION_NAME, PERIOD_DIMENSION_VALUE
754having count(INDICATOR_VIEW_NAME) > 1
755) as a
756;
757==> 114
758
759will need a mapping file for source IP with a new name and all the assoc
760views.  Script would then need to create all the data sources and other
761related intersections as well as renaming views and view values. 
762
763/*------------------------------ END OF FILE ------------------------------*/
Note: See TracBrowser for help on using the repository browser.