rem 
rem $Header: utloidxs.sql,v 1.1 1992/12/19 23:56:43 GLUMPKIN Exp $ oneidxs.sql 
rem 
Rem Copyright (c) 1989 by Oracle Corporation
Rem NAME
REM    UTLOIDXS.SQL
Rem  FUNCTION
Rem    See below
Rem  NOTES
Rem    See below
Rem  MODIFIED
Rem     glumpkin   10/20/92 -  Renamed from ONEIDXS.SQL 
Rem     rlim       04/29/91 -         change char to varchar2 
Rem   Laursen    01/01/91 - V6 to V7 merge
Rem   Porter    09/23/89 - Change to 8-character filenames
Rem   Porter    04/04/89 - Commenting, cleanup
Rem   Porter    03/27/89 - Creation
Rem
Rem-------------------------------------------------------------------   
Rem                           ONEIDXS.SQL 
Rem
Rem    Use this procedure to find out information about how 
Rem    selective columns are. Use it to:
Rem
Rem      1. Identify prospective columns for new indexes
Rem      2. Determine how selective a current index is
Rem      3. Determine whether a current index is useful or not
Rem
Rem    SQL> START ONE_INDEX_STATS TABLE_NAME COLUMN_NAME
Rem
Rem    NOTE: This procedure requires SQLPLUS version 3.0.3.1 or greater,
Rem          in order for the NEW_VALUE statement to be implemented 
Rem          correctly.
Rem-------------------------------------------------------------------

Rem 
Rem *** Set up variables ***
Rem
Set Heading Off
Set Verify Off                     
Set Feedback Off
column table_name new_value one_table_name
column column_name new_value one_column_name
SELECT upper('&1') table_name, upper('&2') column_name FROM DUAL;
Set Heading On
Set Verify On
Set Feedback On

SET TERMOUT OFF

Rem
Rem     Table statistics:
Rem
Rem     If an indexed column has nulls in it, then care must be used
Rem     in analyzing performance of that index:
Rem       1. A query for a null value will do a table-scan, and
Rem          will never use an index.
Rem       2. A query for a non-null value will only have to search
Rem          an index that has no null entries in it.  Thus, no 
Rem          performance will be lost between a table with 100 not-null
Rem          entries and 100 not-null and 1000 null entries, as long
Rem          as queries for not-null values are made.
Rem


Rem 
Rem *** Create user statistics tables, if not already created ***
Rem
CREATE TABLE INDEX$INDEX_STATS (
  TABLE_NAME        VARCHAR2(30) NOT NULL,
  COLUMN_NAME       VARCHAR2(30) NOT NULL,
  STAT_NAME         VARCHAR2(30) NOT NULL,
  STAT_VALUE        NUMBER   NOT NULL);

create table INDEX$BADNESS_STATS
  (table_name            varchar2(30) not null,
   column_name           varchar2(30) not null,
   badness_factor        number(9),
   keys_with_badness     number(9),
   row_percent           number(9,3),
   row_blk_fail          number(9),
   row_blk_succ          number(9),
   key_percent           number(9,3));

Rem
Rem *** Get rid of any current lines in the tables ***
Rem
delete from index$index_stats
  where table_name = '&one_table_name' and column_name = '&one_column_name';
delete from index$badness_stats
  where table_name = '&one_table_name' and column_name = '&one_column_name';

Rem
Rem *** GET STATISTICS ***
Rem

COLUMN RECORDS NEW_VALUE TOT_ROWS;
SELECT COUNT(*) RECORDS 
  FROM &one_table_name;
INSERT INTO INDEX$INDEX_STATS VALUES('&one_table_name','&one_column_name',
                                     'Rows - Total',
                                     &tot_rows);

COLUMN NULLS NEW_VALUE TOT_NULLS;
SELECT COUNT(*) NULLS 
  FROM &one_table_name 
  WHERE &one_column_name IS NULL;
INSERT INTO INDEX$INDEX_STATS VALUES('&one_table_name','&one_column_name',
                                     'Rows - Null',
                                     &tot_nulls);

COLUMN DISTINCT_KEYS NEW_VALUE TOT_DISTINCT_KEYS
SELECT COUNT(DISTINCT &one_column_name) DISTINCT_KEYS 
  FROM &one_table_name 
  WHERE &one_column_name IS NOT NULL;
INSERT INTO INDEX$INDEX_STATS VALUES('&one_table_name','&one_column_name',
                                     'Total Distinct Keys',
                                     &TOT_DISTINCT_KEYS);

COLUMN NOT_NULLS NEW_VALUE TOT_NOT_NULLS;
SELECT &TOT_ROWS - &TOT_NULLS NOT_NULLS 
  FROM DUAL;
INSERT INTO INDEX$INDEX_STATS ('&one_table_name','&one_column_name',
                               'Rows - Not null',
                               &TOT_NOT_NULLS);

Rem
Rem     Following are statistics for the distribution of the keys,
Rem     without null values, since null values can perturb the
Rem     statistics, and indexes don't use nulls.
Rem
Rem     Of particular interest here is the average number of 
Rem     rows per key, since this is a general measure of the
Rem     selectivity of the column.
Rem

column average  new_value average
column minimum  new_value minimum
column maximum  new_value maximum 
column std_dev  new_value std_dev
SELECT nvl(avg(COUNT(*)),0)      AVERAGE,
       nvl(min(COUNT(*)),0)      MINIMUM,
       nvl(max(COUNT(*)),0)      MAXIMUM ,
       nvl(stddev(count(*)),0)   STD_DEV
  FROM &one_table_name
  WHERE &one_column_name IS NOT NULL
  GROUP BY &one_column_name;
INSERT INTO INDEX$INDEX_STATS VALUES('&one_table_name','&one_column_name',
                                     'Rows per key - avg',&average);
INSERT INTO INDEX$INDEX_STATS VALUES('&one_table_name','&one_column_name',
                                     'Rows per key - min',&minimum);
INSERT INTO INDEX$INDEX_STATS VALUES('&one_table_name','&one_column_name',
                                     'Rows per key - max',&maximum);
INSERT INTO INDEX$INDEX_STATS VALUES('&one_table_name','&one_column_name',
                                     'Rows per key - dev',&std_dev);

Rem
Rem      The following table is a histogram of key selectivity in the
Rem      column.  This can be used to determine what the overall 
Rem      'badness' (a technical term) of the column is. NOTE: This
Rem      key selectivity is measured WITHOUT NULLS, since nulls don't
Rem      use indexes.
Rem 
Rem      The 'badness_factor' column tells how many times a key was 
Rem      repeated.
Rem      The 'keys_with_badness' column tells how many keys were
Rem      repeated 'badness_factor' times.
Rem 
Rem      Higher badness factors are detrimental to the selectivity
Rem      of the column.
Rem
Rem      This table takes on different meanings depending upon 
Rem      which access method is assumed:
Rem      1. Access distributed equally across rows
Rem         In this case, the user should look at the 'ROW_PERC' 
Rem         column to determine what percentage of the queries will
Rem         have high badness factors. 
Rem      2. Access distributed equally across key values
Rem         In this case, the user should look at the 'KEY_PERC' 
Rem         column to determine what percentage of the queries will
Rem         have high badness factors.
Rem  

drop view index$badness;
create view index$badness as
   (select count(&one_column_name) repeat_count  
   from &one_table_name 
   group by &one_column_name);

insert into index$badness_stats (table_name,column_name,
    badness_factor, keys_with_badness, row_percent, 
    row_blk_fail, row_blk_succ, key_percent)
  select '&one_table_name' table_name,'&one_column_name' column_name,
    repeat_count badness_factor,
    count(repeat_count) keys_with_badness,
    (count(repeat_count)*repeat_count/&tot_not_nulls)*100 row_percent,
    (count(repeat_count)*repeat_count*repeat_count) row_blk_fail,
    (count(repeat_count)*repeat_count*ceil(repeat_count/2)) row_blk_succ,
    (count(repeat_count)/&tot_distinct_keys)*100 key_percent
  from index$badness
  where repeat_count <> 0
  group by repeat_count;

Rem
Rem     The following two statistics attempt to determine
Rem     how expensive it is to access keys in this table.
Rem     A 'miss' will have to scan all the rows for a key value,
Rem     and a 1-row hit will, on the average, only have to scan
Rem     half of them.
Rem   
Rem     These gets are the the gets to read data from the table.
Rem     Gets needed to access branch and leaf nodes of the index
Rem     are not counted here.
Rem

column gets_per_miss_by_row new_value miss_gets
column gets_per_1_row_hit_by_row new_value hit_gets
select nvl((sum(row_blk_fail)/&tot_not_nulls),0) gets_per_miss_by_row,
       nvl((sum(row_blk_succ)/&tot_not_nulls),0) gets_per_1_row_hit_by_row
  from index$badness_stats
  where table_name like '&one_table_name' AND
        column_name like '&one_column_name';  
INSERT INTO INDEX$INDEX_STATS VALUES('&one_table_name','&one_column_name',
                                     'db_gets_per_key_miss',&miss_gets);
INSERT INTO INDEX$INDEX_STATS VALUES('&one_table_name','&one_column_name',
                                     'db_gets_per_key_hit',&hit_gets);

drop view index$badness;
SET TERMOUT ON

Rem
Rem Clean up
Rem 
undefine 1
undefine 2
undefine one_table_name
undefine one_column_name