package Pod::Diff;
use Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(pod_diff_files pod_diff_print_stats);
use strict;

# Set this for strict checking, i.e.:
# * over commands must match
# * multiple spaces within non-verbatim paragraphs must match
my $strict = 0;

# Statistics collected
my $total_runs = 0;
my $total_paras = 0;
my $total_diffs = 0;
my $total_resync_fails = 0;

# The default formatting routine for a difference
sub _pod_diff_fmt {
    my($i, $text1, $text2, $line1, $line2) = @_;

    # Find the first character which differs
    # Is there a better way than this?
    my $first =  0;
    $first++ while substr($text1, $first, 1) eq substr($text2, $first, 1);
    $first++;

    return join("\n",
        "*** paragraph $i - character $first ***",
        "--- $line1 ---",
        $text1,
        "--- $line2 ---",
        $text2);
}

# Parse a pod array into a list of paragraphs.
# Each paragraph has the following fields:
#
# * TEXT - the paragraph text
# * LINE - the starting line number
sub _pod_parse_paras {
    my($array) = @_;
    my(@list);

    my $lineno = 0;
    my $text = '';
    my $tab_size = 8;
    my $line;
    for $line (@$array) {
        $lineno++;

        # Trim trailing whitespace and convert tabs to spaces
        $line =~ s/\s+$//;
        1 while $line =~ s/\t+/' ' x (length($&) * $tab_size - length($`) % $tab_size)/e;

        # Build and store the paragraphs
        if ($line =~ /^$/) {
            if ($text ne '') {
                push(@list, {TEXT => $text, LINE => $lineno});
                $text = '';
            }
        }
        elsif ($text eq '') {
            $text  = $line;
        }
        else {
            if ($text =~ /^\s/) {
                $text  .= "\n$line";
            }
            else {
                $text  .= " $line";
            }
        }
    }

    # Save the last paragraph, if necessary
    if ($text ne '') {
        push(@list, {TEXT => $text, LINE => $lineno});
    }

    # Return result
    return @list;
}

# Remove redundant escapes from a paragraph
sub _fix_escapes {
    my($text) = @_;

    # For verbatim, leave things alone
    return $text if $text =~ /^ /;

    my $result = '';
    my $phrase = '';
    my @nested = ();
    my $tag = '';
    while ($text ne '') {

        # A > without a proceeding < may be a sequence end marker
        if ($text =~ /^([^<>]*)\>/) {
            $text = $';
            if (@nested) {
                $tag = pop(@nested);
                $phrase = $1;
                if ($tag eq 'E' && ($phrase eq 'gt' || $phrase eq 'lt')) {

                    # The escape isn't necessary unless:
                    # * the preceding character is [A-Z] for <, or
                    # * it's inside an interior sequence for >, or
                    if ($phrase eq 'gt' && scalar @nested > 0 ||
                        $phrase eq 'lt' && $result =~ /[A-Z]E\<$/) {
                        $result .= "$phrase>";
                    }
                    else {
                        $result =~ s/E\<$//;
                        $result .= $phrase eq 'gt' ? '>' : '<';
                    }
                }
                else {
                    $result .= $` . $&;
                }
            }
            else {
                $result .= $` . $&;
            }
        }

        # A sequence which may have something nested
        elsif ($text =~ /([A-Z])\</) {
            $result .= $`;
            $result .= $&;
            $text = $';
            push(@nested, $1);
        }

        # No sequences left
        else {
            $result .= $text;
            $text = '';
        }
    }

    # Return result
    return $result;
}

# Diff two pod arrays
sub pod_diff_arrays {
    my($array1, $array2, $formatter) = @_;
    my(@result) = ();

    # Use the default formatter if none is given
    $formatter = \&_pod_diff_fmt;

    # Parse the arrays into paragraphs
    my @para1 = _pod_parse_paras($array1);
    my @para2 = _pod_parse_paras($array2);

    # Diff the paragraph lists
    my $j = 0;
    for (my $i = 0; $i <= $#para1; $i++, $j++) {
        my $text1 = $para1[$i]{TEXT};
        my $text2 = $para2[$j]{TEXT};
        next if $text1 eq $text2;

        # If things don't match and strict checking isn't enabled:
        # * ignore over commands
        # * ignore extra spaces in non-verbatim paragraphs
        unless ($strict) {
            next if $text1 =~ /^=over/ && $text2 =~ /^=over/;
            $text1 =~ s/ +/ /g if $text1 =~ /^\S/;
            $text2 =~ s/ +/ /g if $text2 =~ /^\S/;
            next if $text1 eq $text2;
        }

        # Still no luck, so try removing unnecessary escapes
        $text1 = _fix_escapes($text1);
        $text2 = _fix_escapes($text2);
        next if $text1 eq $text2;

        # If we reach here, we have a failure
        my $line1 = $para1[$i]{LINE};
        my $line2 = $para2[$j]{LINE};
        push(@result, &$formatter($i, $text1, $text2, $line1, $line2));

        # Resynchronise if necessary and we can
        if ($para1[$i+1]{TEXT} eq $para2[$j+1]{TEXT}) {
            # next 2 paragraphs start off ok so do nothing
        }
        elsif ($text1 eq $para2[$j+1]{TEXT}) {
            $j++;
        }
        elsif ($text2 eq $para1[$i+1]{TEXT}) {
            $i++;
        }
        elsif ($text1 eq $para2[$j+2]{TEXT}) {
            $j += 2;
        }
        elsif ($text2 eq $para1[$i+2]{TEXT}) {
            $i += 2;
        }
        elsif ($text1 eq $para2[$j+3]{TEXT}) {
            $j += 3;
        }
        elsif ($text2 eq $para1[$i+3]{TEXT}) {
            $i += 3;
        }
        elsif ($text1 eq $para2[$j+4]{TEXT}) {
            $j += 4;
        }
        elsif ($text2 eq $para1[$i+4]{TEXT}) {
            $i += 4;
        }
        elsif ($text1 eq $para2[$j+5]{TEXT}) {
            $j += 5;
        }
        elsif ($text2 eq $para1[$i+5]{TEXT}) {
            $i += 5;
        }
        elsif ($text1 eq $para2[$j+6]{TEXT}) {
            $j += 6;
        }
        elsif ($text2 eq $para1[$i+6]{TEXT}) {
            $i += 6;
        }
        else {
            $total_resync_fails++;
        }
    }

    # Collect some stats
    $total_runs++;
    $total_paras += scalar(@para1);
    $total_diffs += scalar(@result);

    # Return result
    return @result;
}

 
# Diff two pod files
sub pod_diff_files {
    my($file1, $file2, $formatter) = @_;

    # Load the pod from the first file
    unless (open(FILE1, $file1)) {
        warn "unable to open '$file1': $!\n";
        return ();
    }
    my @pod1 = <FILE1>;
    chop(@pod1);
    close FILE1;

    # Load the pod from the second file
    unless (open(FILE2, $file2)) {
        warn "unable to open '$file2': $!\n";
        return ();
    }
    my @pod2 = <FILE2>;
    chop(@pod2);
    close FILE2;

    # Diff the arrays
    return pod_diff_arrays(\@pod1, \@pod2, $formatter);
}

sub pod_diff_print_stats {
    my($strm) = @_;

    print $strm "*** SUMMARY ***\n";
    print $strm "Total files: ", $total_runs, "\n";
    print $strm "Total paras: ", $total_paras, "\n";
    print $strm "Total diffs: ", $total_diffs, "\n";
    print $strm "Total resync failures: ", $total_resync_fails, "\n";
    if ($total_paras) {
        printf $strm "PERCENT OK: %.2f%%\n",
            ($total_paras - $total_diffs)/$total_paras * 100;
    }
}

# package return value
1;