#!/usr/bin/perl

use utf8;
use strict;
use warnings;

use DB_File;
use Fcntl;
use MediaWiki::DumpFile::Pages;

# use File::Path  qw.make_path.;
use Encode      qw.encode_utf8.;
# use Digest::MD5 qw.md5_hex.;

use POSIX       qw.setlocale.;
setlocale(&POSIX::LC_ALL => "pt");
use locale;

my $input_file = shift || die "No file specified.";

my $pages = MediaWiki::DumpFile::Pages->new(input => $input_file,
                                            fast_mode => 0);

unlink "templates.cache";
tie my %hash, 'DB_File', "templates.cache", O_CREAT|O_RDWR, 0666, $DB_HASH;
my %redirect;

my $i = 0;
my $page;
while (defined($page = $pages->next)) {

    my $title = $page->title;
    print STDERR "." unless $i++%1000;

    next unless $title =~ /^Predefinição:(.*)/;


    my $id = $1;

    my $lrev = $page->revision->text;

    if ($lrev =~ /REDIRECT \[\[([^\]]+)\]\]/)
      {
          my $target = $1;
          $target = $1 if $target =~ /^Predefinição:(.*)$/;
          $redirect{$id} = $target;
      }
    else
      {
          $lrev =~ s{<noinclude>.*?</noinclude>}{}gs;
          $lrev =~ s{</?includeonly>}{}g;
          $hash{encode_utf8($id)}=encode_utf8($lrev);
      }
}

print STDERR "\n";
$i=0;
for my $red (keys %redirect) {
    print STDERR "." unless $i++%100;
    $hash{encode_utf8($red)} = $hash{encode_utf8($redirect{$red})} || "";
}
print STDERR "\n";
untie %hash;
