#!/usr/bin/perl # Copyright © 2004 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software # and its documentation for any purpose is hereby granted without fee, # provided that the above copyright notice appear in all copies and # that both that copyright notice and this permission notice appear in # supporting documentation. No representations are made about the # suitability of this software for any purpose. It is provided "as # is" without express or implied warranty. # # Created: 3-Mar-2004 by Jamie Zawinski, Anonymous, and Jacob Post. # ########################################################################## # # This program, originally by Jamie Zawinski for parsing mork files to # extract URLs from the history, has been repurposed by Clinton # Gormley to extract vCards from the Thunderbird default personal # addressbook (abook.mab), in a suitable format for importing into # Evolution or for storing on the iPod. # # It looks for abook.mab in the directories specified below in # ABOOK_PATHS. If there are more paths that should be added by # default, please let me know. # # BUG : At the moment, there is no way of removing deleted addresses. # Thunderbird continues to store them in the address book, and I can't # see any way to find the deleted flag. There is a key called # ns:addrbk:db:table:kind:deleted, but deleted records don't seem to # have this set. So if you figure this one out, please let me know... # # If you would also like to include the Collected addresses, you can # specify the history.mab file on the command line. # # I have done nothing with encoding in this program - on my system it # just works. If it doesn't for you, please send me the details. # # vCards should have a maximum line length of 75 chars. I have set the # line length lower in case there are characters which (when you # recode) take up more than one byte, so you have a degree of # flexibility with this. You may well run into problems if you are # using many non ASCII characters. # # The changes I have made to Jamie Zawinski's script enjoy the same # copyright as his original script (see above). # # The part of this script that parses the addressbook worked well for # me, but Jamie says that it is unreliable, and has given up trying to # improve it. But until the Mozilla vcard project gets underway # (http://vcard.mozdev.org/) and uses Mozilla's own mork libraries, # you probably won't find anything better. # Mileage may vary... :) # # Hope this helps! # # Clinton Gormley (perl@traveljury.com) # ########################################################################## # # Just added support for parsing mobile phones (cell phones) entries in # address books from Thunderbird. # My work was so lame hacking this file that I couldn't help myself # being in the credits too. :-P # Of course, my lines of program are GPL licensed too. # # Sebastian Cruz (crux@lugmen.org.ar) # ########################################################################## # # Contributed to the gtkpod project by Clinton Gormley # (perl@traveljury.com). This code can be used either under the # license mentioned above or under the terms of the GNU General Pulic # License as published by the Free Software Foundation; either version # 2 of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # ########################################################################## # # And Now, The Ugly Truth Laid Bare: # # In Netscape Navigator 1.0 through 4.0, the history.db file was # just a Berkeley DBM file. You could trivially bind to it from # Perl, and pull out the URLs and last-access time. In Mozilla, # this has been replaced with a "Mork" database for which no tools # exist. # # Let me make it clear that McCusker is a complete barking lunatic. # This is just about the stupidest file format I've ever seen. # # http://www.mozilla.org/mailnews/arch/mork/primer.txt # http://jwz.livejournal.com/312657.html # http://www.jwz.org/doc/mailsum.html # http://bugzilla.mozilla.org/show_bug.cgi?id=241438 # # In brief, let's count its sins: # # - Two different numerical namespaces that overlap. # # - It can't decide what kind of character-quoting syntax to use: # Backslash? Hex encoding with dollar-sign? # # - C++ line comments are allowed sometimes, but sometimes // is just # a pair of characters in a URL. # # - It goes to all this serious compression effort (two different # string-interning hash tables) and then writes out Unicode strings # without using UTF-8: writes out the unpacked wchar_t characters! # # - Worse, it hex-encodes each wchar_t with a 3-byte encoding, # meaning the file size will be 3x or 6x (depending on whether # whchar_t is 2 bytes or 4 bytes.) # # - It masquerades as a "textual" file format when in fact it's # just another binary-blob file, except that it represents all its # magic numbers in ASCII. It's not human-readable, it's not # hand-editable, so the only benefit there is to the fact that it # uses short lines and doesn't use binary characters is that it # makes the file bigger. Oh wait, my mistake, that isn't actually # a benefit at all. # # Pure comedy. # ########################################################################## require 5; #use diagnostics; use strict; use constant ABOOK_PATHS => qw (~/.thunderbird/); #use Data::Dumper; my $progname = $0; $progname =~ s@.*/@@g; my $version = q{ $Revision: 695 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my (%key_table, %val_table, %row_hash); my ($total, $skipped) = (0, 0); ########################################################################## # Returns a list of hashes, the contents of the mork file. ########################################################################## sub mork_parse_file { my $file = shift; local $/ = undef; local *IN; ########################################################################## # Define the messy regexen up here ########################################################################## my $top_level_comment = qr@//.*\n@; my $key_table_re = qr/ < \s* < # "< <" \( a=c \) > # "(a=c)>" (?> ([^>]*) ) > \s* # Grab anything that's not ">" /sx; my $value_table_re = qr/ < ( .*?\) )> \s* /sx; my $table_re = qr/ \{ -? # "{" or "{-" [\da-f]+ : # hex, ":" (?> .*?\{ ) # Eat up to a {... ((?> .*?\} ) # and then the closing }... (?> .*?\} )) # Finally, grab the table section \s* /six; my $row_re = qr/ ( (?> \[ [^]]* \] # "["..."]" \s*)+ ) # Perhaps repeated many times /sx; my $section_begin_re = qr/ \@\$\$\{ # "@$${" ([\dA-F]+) # hex \{\@ \s* # "{@" /six; my $section_end_re = undef; my $section = "top level"; ########################################################################## # Read in the file. ########################################################################## open (IN, "<$file") || error ("$file: $!"); my $body = ; close IN; $body =~ s/\r\n/\n/gs; # Windows Mozilla uses \r\n $body =~ s/\r/\n/gs; # Presumably Mac Mozilla is similarly dumb $body =~ s/\\\\/\$5C/gs; # Sometimes backslash is quoted with a # backslash; convert to hex. $body =~ s/\\\)/\$29/gs; # close-paren is quoted with a backslash; # convert to hex. $body =~ s/\\\n//gs; # backslash at end of line is continuation. ########################################################################## # Figure out what we're looking at, and parse it. ########################################################################## pos($body) = 0; my $length = length($body); while( pos($body) < $length ) { # Key table if ( $body =~ m/\G$key_table_re/gc ) { mork_parse_key_table($file, $section, $1); # Values } elsif ( $body =~ m/\G$value_table_re/gco ) { mork_parse_value_table($file, $section, $1); # Table } elsif ( $body =~ m/\G$table_re/gco ) { mork_parse_table($file, $section, $1); # Rows (-> table) } elsif ( $body =~ m/\G$row_re/gco ) { mork_parse_table($file, $section, $1); # Section begin } elsif ( $body =~ m/\G$section_begin_re/gco ) { $section = $1; $section_end_re = qr/\@\$\$\}$section\}\@\s*/s; # Section end } elsif ( $section_end_re && $body =~ m/\G$section_end_re/gc ) { $section_end_re = undef; $section = "top level"; # Comment } elsif ( $body =~ m/\G$top_level_comment/gco ) { #no-op } else { # $body =~ m/\G (.{0,300}) /gcsx; print "<$1>\n"; error("$file: $section: Cannot parse"); } } if($section_end_re) { error("$file: Unterminated section $section"); } # print STDERR "$progname: $file: sorting...\n" if ($verbose); my @entries = values(%row_hash); # print STDERR "$progname: $file: done! ($total total, $skipped skipped)\n" # if ($verbose); # print Dumper(\%key_table,\%val_table,\%row_hash); (%key_table, %val_table, %row_hash, $total, $skipped) = (); return \@entries; } ########################################################################## # parse a row and column table ########################################################################## sub mork_parse_table { my($file, $section, $table_part) = (@_); # print STDERR "\n" if ($verbose > 3); # Assumption: no relevant spaces in values in this section $table_part =~ s/\s+//g; # print $table_part; #exit(0); #Grab each complete [...] block while( $table_part =~ m/\G [^[]* \[ # find a "[" ( [^]]+ ) \] # capture up to "]" /gcx ) { $_ = $1; my %hash; my ($id, @cells) = split (m/[()]+/s); next unless scalar(@cells); # Trim junk $id =~ s/^-//; $id =~ s/:.*//; if($row_hash{$id}) { %hash = ( %{$row_hash{$id}} ); } else { %hash = ( 'ID' => $id); } foreach (@cells) { next unless $_; my ($keyi, $which, $vali) = m/^\^ ([-\dA-F]+) ([\^=]) (.*) $/xi; error ("$file: unparsable cell: $_\n") unless defined ($vali); # If the key isn't in the key table, ignore it # my $key = $key_table{$keyi}; next unless defined($key); my $val = ($which eq '=' ? $vali : $val_table{$vali}); #print "$key : $val : $which : $vali : $val_table{$vali}\n"; $hash{$key} = $val; #print "$id: $key -> $val\n"; } $total++; $row_hash{$id} = \%hash; } } ########################################################################## # parse a values table ########################################################################## sub mork_parse_value_table { my($file, $section, $val_part) = (@_); return unless $val_part; my @pairs = split (m/\(([^\)]+)\)/, $val_part); $val_part = undef; # print STDERR "\n" if ($verbose > 3); foreach (@pairs) { next unless (m/[^\s]/s); my ($key, $val) = m/([\dA-F]*)[\t\n ]*=[\t\n ]*(.*)/i; if (! defined ($val)) { print STDERR "$progname: $file: $section: unparsable val: $_\n"; next; } # Assume that URLs and LastVisited are never hexilated; so # don't bother unhexilating if we won't be using Name, etc. if($val =~ m/\$/) { # Approximate wchar_t -> ASCII and remove NULs $val =~ s/\$00//g; # faster if we remove these first $val =~ s/\$([\dA-F]{2})/chr(hex($1))/ge; } $val_table{$key} = $val; # print STDERR "$progname: $file: $section: val $key = \"$val\"\n" # if ($verbose > 3); } } ########################################################################## # parse a key table ########################################################################## sub mork_parse_key_table { my ($file, $section, $key_table) = (@_); # print STDERR "\n" if ($verbose > 3); $key_table =~ s@\s+//.*$@@gm; my @pairs = split (m/\(([^\)]+)\)/s, $key_table); $key_table = undef; foreach (@pairs) { next unless (m/[^\s]/s); my ($key, $val) = m/([\dA-F]+)\s*=\s*(.*)/i; error ("$file: $section: unparsable key: $_") unless defined ($val); # If we're only emitting URLs and dates, don't even bother # saving the other fields that we aren't interested in. # $key_table{$key} = $val; # print STDERR "$progname: $file: $section: key $key = \"$val\"\n" # if ($verbose > 3); } } ########################################################################## # Output the vcards ########################################################################## sub output_vcards { # Encoding? # Fold lines my $results = shift; my %map = ( FN => [' ',qw(FirstName LastName)], N => [';',qw(LastName FirstName null null null)], 'ADR;TYPE=WORK' => [';',qw(null WorkAddress WorkAddress2 WorkCity WorkState WorkCountry WorkZipCode)], 'ADR;TYPE=HOME' => [';',qw(null HomeAddress HomeAddress2 HomeCity HomeState HomeCountry HomeZipCode)], 'TEL;TYPE=HOME;TYPE=VOICE' => [' ',qw(HomePhone)], 'TEL;TYPE=FAX' => [' ',qw(FaxNumber)], 'TEL;TYPE=WORK;TYPE=VOICE' => [' ',qw(WorkPhone)], 'TEL;TYPE=CELL;TYPE=VOICE' => [' ',qw(CellularNumber)], 'TEL;TYPE=PAGER;TYPE=VOICE' => [' ',qw(PagerNumber)], 'EMAIL;TYPE=INTERNET;X-EVOLUTION-UI-SLOT=1' => [' ',qw(PrimaryEmail)], 'EMAIL;TYPE=INTERNET;X-EVOLUTION-UI-SLOT=2' => [' ',qw(SecondEmail)], 'NICKNAME' => [',',qw(NickName)], 'NOTE' => [' ',qw(Notes)], 'ORG' => [';',qw(Company Department)], 'TITLE' => [' ',qw(JobTitle)], 'URL' => [' '], 'X-AIM' => [' ',qw(_AimScreenName)], ); foreach my $vcard (@$results) { print "BEGIN:VCARD\n"; foreach my $key (keys %map) { my @fields = @{$map{$key}}; my $data; if ($key eq 'URL') { $data = $vcard->{WebPage1}||$vcard->{WebPage2}||''; } elsif ($key eq 'X-MOZILLA-HTML') { $data = $vcard->{PreferMailFormat}==2 ? 'TRUE' : 'FALSE'; } else { $data = join($fields[0],map {$vcard->{$_}||''} @fields[1..$#fields]); } next unless $key=~/^F?N$/ || $data=~/[^${fields[0]}]/; $data="$key:$data"; $data=~s/(.{1,65})/$1\r\n /g; ## Kept lines short so that there is some room for recoded characters chop $data; print $data; } print "END:VCARD\n"; } } ########################################################################## # Escape results in preparation for vcards ########################################################################## sub escape_results { my $old_results = shift; my @results = (); foreach my $old_card (@$old_results) { my %vcard = map {$_=>''} qw ( FirstName LastName WorkAddress WorkAddress2 WorkCity WorkState WorkCountry WorkZipCode HomeAddress HomeAddress2 HomeCity HomeState HomeCountry HomeZipCode HomePhone FaxNumber WorkPhone CellularNumber PagerNumber PrimaryEmail SecondEmail NickName Notes Company Department JobTitle URL _AimScreenName PreferMailFormat WebPage1 WebPage2 ); foreach my $key (keys %vcard) { my $val = $old_card->{$key}; next unless $val; $val=~s/([,;\\])/\\$1/g; $val=~s/\n/\\n/g; $vcard{$key} = $val; } $vcard{null} = ''; push @results,\%vcard; } return \@results; } ########################################################################## # Find default address book ########################################################################## sub locate_default_abook { my @files; foreach my $path (ABOOK_PATHS) { my $dir = glob($path); use Data::Dumper; print Dumper($dir); next unless $dir; error ("$path is not a directory. Please correct ABOOK_PATHS in the script '$progname'") unless -d $dir; $dir=~s/\/*\s*$//; print Dumper($dir); push @files,(glob $dir.'/abook.mab'); push @files,(glob $dir.'/*/abook.mab'); push @files,(glob $dir.'/*/*/abook.mab'); } return $files[0] if @files==1; if (@files) { my $errstr = "More than one address book found - please specify the correct one on the command line:\n"; foreach my $file (@files) { $errstr.=" $file\n"; } error($errstr); } my $errstr = "Couldn't find the default address book in : \n"; foreach my $path (ABOOK_PATHS) { $errstr.=" $path\n"; } $errstr.="Please specify the file to use on the command line"; error($errstr); } ########################################################################## # Throw errors ########################################################################## sub error { ($_) = @_; print STDERR '*'x60; print STDERR "\n$progname: $_\n"; print STDERR '*'x60; usage(); exit 1; } ########################################################################## # Usage ########################################################################## sub usage { print STDERR < vcards_file_name $progname | recode UTF8..ISO-8859-15 > vcards_file_name USAGE } ########################################################################## # Main ########################################################################## exit(usage()) if $ARGV[0]=~/^-/; # get file name my $file = $ARGV[0] || locate_default_abook; error("Couldn't read file '$file' - please check the correct path and correct") unless -s $file && -r _; print STDERR "Using file : $file\n"; # read mork file my $results = mork_parse_file ($file); # Escape results $results = escape_results($results); # output as vcards output_vcards ($results); exit 0;