#!/usr/local/bin/perl eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}' if 0; # not running under some shell use strict; use warnings FATAL => 'all'; # This is mysql-duplicate-key-checker, a program to analyze MySQL tables for # duplicated or redundant indexes and foreign key constraints. It is part of # MySQL Toolkit (http://mysqltoolkit.sourceforge.net). # # This program is copyright (c) 2007 Baron Schwartz. # Feedback and improvements are welcome. # # THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation, version 2; OR the Perl Artistic License. On UNIX and similar # systems, you can issue `man perlgpl' or `man perlartistic' to read these # licenses. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., 59 Temple # Place, Suite 330, Boston, MA 02111-1307 USA. # ########################################################################### # This is a combination of modules and programs in one -- a runnable module. # http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last # Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition. # ########################################################################### package IndexChecker; use List::Util qw(min); sub new { bless {}, shift; } sub find_engine { my ( $self, $ddl, $opts ) = @_; my ( $engine ) = $ddl =~ m/\) (?:ENGINE|TYPE)=(\w+)/; return $engine || undef; } # The general format of a key is # [FOREIGN|UNIQUE|PRIMARY|FULLTEXT|SPATIAL] KEY `name` [USING BTREE|HASH] (`cols`). sub find_keys { my ( $self, $ddl, $opts ) = @_; # Find and filter the indexes. my @indexes = grep { $_ !~ m/FOREIGN/ } $ddl =~ m/((?:\w+ )?KEY .+\))/mg; # Make allowances for HASH bugs in SHOW CREATE TABLE. A non-MEMORY table # will report its index as USING HASH even when this is not supported. The # true type should be BTREE. See http://bugs.mysql.com/bug.php?id=22632 my $engine = $self->find_engine($ddl); if ( $engine !~ m/MEMORY|HEAP/ ) { @indexes = map { $_ =~ s/USING HASH/USING BTREE/; $_; } @indexes; } my @keys = map { my ( $struct, $cols ) = $_ =~ m/(?:USING (\w+))? \((.+)\)/; my ( $special ) = $_ =~ m/(FULLTEXT|SPATIAL)/; $struct = $struct || $special || 'BTREE'; my ( $name ) = $_ =~ m/KEY `(.*?)` \(/; # MySQL pre-4.1 supports only HASH indexes. if ( $opts->{version} lt '004001000' && $engine =~ m/HEAP|MEMORY/i ) { $struct = 'HASH'; } { struct => $struct, cols => $cols, name => $name || 'PRIMARY', } } @indexes; return \@keys; } sub find_fks { my ( $self, $ddl, $opts ) = @_; my @fks = $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg; my @result = map { my ( $name ) = $_ =~ m/CONSTRAINT `(.*?)`/; my ( $fkcols ) = $_ =~ m/\(([^\)]+)\)/; my ( $cols ) = $_ =~ m/REFERENCES.*?\(([^\)]+)\)/; my ( $parent ) = $_ =~ m/REFERENCES (\S+) /; if ( $parent !~ m/\./ ) { $parent = "`$opts->{database}`.$parent"; } { name => $name, parent => $parent, cols => $cols, fkcols => $fkcols, }; } @fks; return \@result; } sub find_duplicate_keys { my ( $self, $keys, $opts ) = @_; my @keys = @$keys; my %seen; # Avoid outputting a key more than once. my @result; foreach my $i ( 0..$#keys - 1 ) { foreach my $j ( $i+1..$#keys ) { my $i_cols = $keys[$i]->{cols}; my $j_cols = $keys[$j]->{cols}; my $len = min(length($i_cols), length($j_cols)); if ( $opts->{ignore_order} ) { $i_cols = join(',', sort(split(/`/, $i_cols))); $j_cols = join(',', sort(split(/`/, $j_cols))); } if ( (($keys[$i]->{struct} eq $keys[$j]->{struct}) || $opts->{ignore_type}) && substr($i_cols, 0, $len) eq substr($j_cols, 0, $len)) { push @result, $keys[$i] unless $seen{$i}++; push @result, $keys[$j] unless $seen{$j}++; } } } # If the key ends with a prefix of the primary key, it's a duplicate. if ( $opts->{clustered} && $opts->{engine} =~ m/^(?:InnoDB|solidDB)$/ ) { my $i = 0; my $found = 0; while ( $i < @keys ) { if ( $keys[$i]->{name} eq 'PRIMARY' ) { $found = 1; last; } $i++; } if ( $found ) { my $pkcols = $keys[$i]->{cols}; KEY: foreach my $j ( 0..$#keys ) { next KEY if $i == $j; my $suffix = $keys[$j]->{cols}; SUFFIX: while ( $suffix =~ s/`[^`]+`,// ) { my $len = min(length($pkcols), length($suffix)); if ( (($keys[$i]->{struct} eq $keys[$j]->{struct}) || $opts->{ignore_type}) && substr($suffix, 0, $len) eq substr($pkcols, 0, $len)) { push @result, $keys[$i] unless $seen{$i}++; push @result, $keys[$j] unless $seen{$j}++; last SUFFIX; } } } } } return \@result; } sub find_duplicate_fks { my ( $self, $fks, $opts ) = @_; my @fks = @$fks; my %seen; # Avoid outputting a fk more than once. my @result; foreach my $i ( 0..$#fks - 1 ) { foreach my $j ( $i+1..$#fks ) { # A foreign key is a duplicate no matter what order the columns are in, so # re-order them alphabetically so they can be compared. my $i_cols = join(', ', map { "`$_`" } sort($fks[$i]->{cols} =~ m/`([^`]+)`/g)); my $j_cols = join(', ', map { "`$_`" } sort($fks[$j]->{cols} =~ m/`([^`]+)`/g)); my $i_fkcols = join(', ', map { "`$_`" } sort($fks[$i]->{fkcols} =~ m/`([^`]+)`/g)); my $j_fkcols = join(', ', map { "`$_`" } sort($fks[$j]->{fkcols} =~ m/`([^`]+)`/g)); if ( $fks[$i]->{parent} eq $fks[$j]->{parent} && $i_cols eq $j_cols && $i_fkcols eq $j_fkcols ) { push @result, $fks[$i] unless $seen{$i}++; push @result, $fks[$j] unless $seen{$j}++; } } } return \@result; } # ########################################################################### # And now for the "program". # ########################################################################### package main; use DBI; use English qw(-no_match_vars); use Getopt::Long; use List::Util qw(max); use Term::ReadKey; our $VERSION = '1.1.1'; our $DISTRIB = '1053'; our $SVN_REV = sprintf("%d", q$Revision: 874 $ =~ m/(\d+)/g); $OUTPUT_AUTOFLUSH = 1; if ( !caller ) { # ############################################################################ # Get configuration information. # ############################################################################ my @opt_spec = ( { s => 'allatonce|s', d => 'Print only once, instead of one DB at a time' }, { s => 'allstruct|a', d => 'Compare indexes with different structs (BTREE, HASH, etc)' }, { s => 'askpass', d => 'Prompt for password for connections' }, { s => 'clustered|c', d => 'PK columns appended to secondary key is duplicate' }, { s => 'databases|d=s', d => 'Only do this comma-separated list of databases' }, { s => 'defaults-file|F=s', d => 'Only read default options from the given file' }, { s => 'function|f=s', d => 'Do f=foreign keys, k=keys. Default is do both.' }, { s => 'help', d => 'Show this help message' }, { s => 'host|h=s', d => 'Connect to host' }, { s => 'ignoredb|g=s', d => 'Ignore this comma-separated list of databases' }, { s => 'ignoreorder', d => 'Ignore index order so KEY(a,b) duplicates KEY(b,a)' }, { s => 'ignoretbl|n=s', d => 'Ignore this comma-separated list of tables' }, { s => 'password|p=s', d => 'Password to use when connecting' }, { s => 'port|P=i', d => 'Port number to use for connection' }, { s => 'socket|S=s', d => 'Socket file to use for connection' }, { s => 'tab|b', d => 'Output separated with tabs' }, { s => 'tables|t=s', d => 'Only do this comma-separated list of tables' }, { s => 'user|u=s', d => 'User for login if not current user' }, { s => 'verbose|v', d => 'Output everything, not just dupes' }, { s => 'version', d => 'Output version information and exit' }, ); # This is the container for the command-line options' values to be stored in # after processing. Initial values are defaults. my %opts = ( d => '', g => '', t => '', n => '', f => 'fk', v => 0, b => 0, a => 0, s => 0, ); # Post-process... my %opt_seen; foreach my $spec ( @opt_spec ) { my ( $long, $short ) = $spec->{s} =~ m/^([\w-]+)(?:\|([^!+=]*))?/; $spec->{k} = $short || $long; $spec->{l} = $long; $spec->{t} = $short; $spec->{n} = $spec->{s} =~ m/!/; $opts{$spec->{k}} = undef unless defined $opts{$spec->{k}}; die "Duplicate option $spec->{k}" if $opt_seen{$spec->{k}}++; } Getopt::Long::Configure('no_ignore_case', 'bundling'); GetOptions( map { $_->{s} => \$opts{$_->{k}} } @opt_spec) or $opts{help} = 1; if ( $opts{version} ) { print "mysql-duplicate-key-checker Ver $VERSION Distrib $DISTRIB Changeset $SVN_REV\n"; exit(0); } # Make comma-separated lists into hashes. if ( $opts{d} ) { $opts{d} = { map { $_ => 1 } split(/,\s*/, $opts{d}) }; } $opts{g} = { map { $_ => 1 } split(/,\s*/, $opts{g}) }; if ( $opts{t} ) { $opts{t} = { map { $_ => 1 } split(/,\s*/, $opts{t}) }; } $opts{n} = { map { $_ => 1 } split(/,\s*/, $opts{n}) }; if ( $opts{e} ) { $opts{e} = { map { lc($_) => 1 } split(/,\s*/, $opts{e}) }; } if ( $opts{help} ) { print "Usage: mysql-duplicate-key-checker \n\n"; my $maxw = max(map { length($_->{l}) + ($_->{n} ? 4 : 0)} @opt_spec); foreach my $spec ( sort { $a->{l} cmp $b->{l} } @opt_spec ) { my $long = $spec->{n} ? "[no]$spec->{l}" : $spec->{l}; my $short = $spec->{t} ? "-$spec->{t}" : ''; printf(" --%-${maxw}s %-4s %s\n", $long, $short, $spec->{d}); } ( my $usage = <<" USAGE" ) =~ s/^ //gm; mysql-duplicate-key-checker examines MySQL tables for duplicate or redundant indexes and foreign keys. Connection options are read from MySQL option files. For more details, please read the documentation: perldoc mysql-duplicate-key-checker USAGE exit(0); } # ############################################################################ # Get ready to do the main work. # ############################################################################ my %conn = ( F => 'mysql_read_default_file', h => 'host', P => 'port', S => 'mysql_socket' ); # Connect to the database if ( !defined $opts{p} && $opts{askpass} ) { print "Enter password: "; ReadMode('noecho'); chomp($opts{p} = ); ReadMode('normal'); print "\n"; } my $dsn = 'DBI:mysql:' . ( $opts{D} || '' ) . ';' . join(';', map { "$conn{$_}=$opts{$_}" } grep { defined $opts{$_} } qw(F h P S)) . ';mysql_read_default_group=mysql'; my $dbh = DBI->connect($dsn, @opts{qw(u p)}, { AutoCommit => 1, RaiseError => 1, PrintError => 0 } ); my @databases = @{$dbh->selectcol_arrayref('SHOW DATABASES')}; my @whole_batch; my $exit_code = 0; my $version = sprintf('%03d%03d%03d', $dbh->{mysql_serverinfo} =~ m/(\d+)/g); my $ic = new IndexChecker; my $ic_opts = { ignore_type => $opts{a}, ignore_order => $opts{ignoreorder}, clustered => $opts{c}, }; DATABASE: foreach my $database ( @databases ) { # Ignore databases as instructed. Also ignore INFORMATION_SCHEMA and skip # databases caused by lost+found directories created in the root of ext3 # filesystems; they are not really databases. next DATABASE if ( $opts{d} && !exists($opts{d}->{$database}) ) || $database =~ m/^(information_schema|lost\+found)$/mi || exists $opts{g}->{$database}; my @tables = @{$dbh->selectcol_arrayref('SHOW TABLES FROM `' . $database . '`')}; next DATABASE unless @tables; my %info_for; TABLE: foreach my $table ( @tables ) { next TABLE if ( $opts{t} && !exists($opts{t}->{$table}) ) || exists $opts{n}->{$table}; $exit_code = 1; my $ddl; eval { $ddl = ($dbh->selectrow_array("SHOW CREATE TABLE `$database`.`$table`"))[1]; $exit_code = 0; }; next TABLE if !$ddl; my $engine = $ic->find_engine($ddl) || next TABLE; my $keys = $opts{f} =~ m/k/ ? $ic->find_keys($ddl, {version => $version }) : []; my $fks = $opts{f} =~ m/f/ ? $ic->find_fks($ddl, {database => $database}) : []; if ( @$keys || @$fks ) { $info_for{$table} = { database => $database, table => $table, engine => $engine, keys => $keys, fks => $fks, }; } } my @to_print; foreach my $table ( sort keys %info_for ) { my $hash = $info_for{$table}; my $engine = $hash->{engine}; # Prepare indexes if ( $opts{f} =~ m/k/ ) { if ( $opts{v} ) { # Print all push @to_print, map { make_hash($hash, 'KEY', $_) } @{$hash->{keys}}; } else { # Find duplicate/redundant by prefix matching. push @to_print, map { make_hash($hash, 'KEY', $_) } @{$ic->find_duplicate_keys($hash->{keys}, { engine => $engine, %$ic_opts})}; } } # Prepare foreign keys if ( $opts{f} =~ m/f/ ) { if ( $opts{v} ) { # Print all push @to_print, map { make_hash($hash, 'FK', $_) } @{$hash->{fks}}; } else { # Otherwise output duplicates. push @to_print, map { make_hash($hash, 'FK', $_) } @{$ic->find_duplicate_fks($hash->{fks})}; } } } next DATABASE unless @to_print; if ( $opts{s} ) { push @whole_batch, @to_print; } else { print_batch(@to_print); } } if ( @whole_batch && $opts{s} ) { print_batch(@whole_batch); } exit($exit_code); # ############################################################################ # Subroutines # ############################################################################ sub make_hash { my ( $info, $type, $hash ) = @_; return { database => $info->{database}, table => $info->{table}, engine => $info->{engine}, name => $hash->{name}, type => $type, struct => $hash->{struct} || 'NULL', parent => $hash->{parent} || 'NULL', cols => $hash->{cols}, }; } sub print_batch { my ( @batch ) = @_; my $hdr; if ( $opts{b} ) { $hdr = ( "%s\t" x 8 ) . "\n"; } else { my $max_idx = max(6, map { length($_->{name}) } @batch); my $max_tbl = max(5, map { length($_->{table}) } @batch); my $max_db = max(8, map { length($_->{database}) } @batch); my $max_par = max(6, map { length($_->{parent} || '') } @batch); $hdr = "%-${max_db}s %-${max_tbl}s %-6s %-${max_idx}s %-4s %-8s %-${max_par}s %s\n"; } printf($hdr, qw(DATABASE TABLE ENGINE OBJECT TYPE STRUCT PARENT COLUMNS)); foreach my $thing ( @batch ) { printf($hdr, @{$thing}{qw(database table engine name type struct parent cols)}); } } } 1; # Because this is a module as well as a script. # ############################################################################ # Documentation # ############################################################################ =pod =head1 NAME mysql-duplicate-key-checker - Find possible duplicate indexes and foreign keys on MySQL tables. =head1 DESCRIPTION This program examines the output of SHOW CREATE TABLE on MySQL tables, and if it finds indexes that cover the same columns as another index in the same order, or cover an exact leftmost prefix of another index, it prints out the suspicious indexes. By default, indexes must be of the same type, so a BTREE index is not a duplicate of a FULLTEXT index, even if they have the same colums. You can override this. It also looks for duplicate foreign keys. A duplicate foreign key covers the same columns as another in the same table, and references the same parent table. This tool is part of MySQL Toolkit L. =head1 OPTIONS =over =item --allatonce Prints everything it finds in one chunk. The default is to print a database at a time. =item --allstruct Compare indexes with different structures. By default this is disabled, because a BTREE index that covers the same columns as a FULLTEXT index is not really a duplicate, for example. =item --askpass Prompt for password for connections. =item --clustered Detects when a suffix of a secondary key is a leftmost prefix of the primary key, and treats it as a duplicate key. Only detects this condition on storage engines whose primary keys are clustered (currently InnoDB and solidDB). Clustered storage engines append the primary key columns to the leaf nodes of all secondary keys anyway, so you might consider it redundant to have them appear in the internal nodes as well. Of course, you may also want them in the internal nodes, because just having them at the leaf nodes won't help for some queries. It does help for covering index queries, however. Here's an example of a key that is considered redundant with this option: PRIMARY KEY (`a`) KEY `b` (`b`,`a`) =item --databases A comma-separated list of databases to examine. =item --defaults-file Only read default options from the given file. =item --function What to check: 'f' is foreign keys, 'k' is indexes. The default is to check both. =item --help Displays a help message. =item --host Connect to host. =item --ignoredb A comma-separated list of databases to ignore. =item --ignoretype Ignore column ordering, so an index on columns (a,b) is considered a duplicate of an index on columns (b,a). =item --ignoretbl A comma-separated list of tables to ignore. =item --password Password to use when connecting. =item --port Port number to use for connection. =item --socket Socket file to use for connection. =item --tab Print output separated with tabs, instead of whitespace-aligned. See L<"OUTPUT"> for details. =item --tables A comma-separated list of tables to check. =item --user User for login if not current user. =item --verbose Output all keys and/or foreign keys found, not just redundant ones. =item --version Output version information and exit. =back =head1 OUTPUT Output is to STDOUT, one line per server and table, with header lines for each database. I tried to make the output easy to process with awk. For this reason columns are always present. If there's no value, the script prints 'NULL'. Output is sorted by database and table. The columns in the output are as follows. =over =item DATABASE The database the table is in. =item TABLE The table name. =item ENGINE The table's storage engine. =item OBJECT The index or constraint's name, e.g. `tbl_ibfk_3` (the default InnoDB name for the third foreign key on a table named tbl). =item TYPE 'KEY' for indexes, 'FK' for foreign keys. =item STRUCT The type of index: BTREE, FULLTEXT, HASH etc. By default MySQL's indexes are BTREE in most cases. This does not apply to foreign keys. =item PARENT The parent table to which the foreign key constraint refers. This does not apply to indexes. =item COLUMNS The columns included in the index or foreign key constraint. For indexes, this column list is output verbatim, as shown in SHOW CREATE TABLE. For foreign keys, the columns are ordered so string comparison can find duplicates, since column order in a foreign key is immaterial. =back =head1 SYSTEM REQUIREMENTS You need the following Perl modules: DBI and DBD::mysql. =head1 BUGS Please use the Sourceforge bug tracker, forums, and mailing lists to request support or report bugs: L. =head1 LICENSE This program is copyright (c) 2007 Baron Schwartz. Feedback and improvements are welcome. THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2; OR the Perl Artistic License. On UNIX and similar systems, you can issue `man perlgpl' or `man perlartistic' to read these licenses. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. =head1 AUTHOR Baron Schwartz. =head1 VERSION This manual page documents Ver 1.1.1 Distrib 1053 $Revision: 874 $. =cut