#!/usr/bin/perl -w -s

use Data::Dumper;
$Data::Dumper::Indent=1;
$Data::Dumper::Terse=1;
my ($ant,$first,$i,$indent,$r_1,$cat,$cat1,$stem,$frase,$codigo);
my ($ferros,$total,$missing);
our($virgem);
$virgem = 1;

$/='SOURCE: ';
use strict;

open(Erros,">_Erros.tmp") or die("cant create file\n");
$ARGV[0]=~/_([\w]+)_(\d\.\d)/;
my $fout="Bosque_".$1."_".$2.".Perl.tmp";
open(P1,">$fout") or die("cant create file\n");

while(<>){
  chomp;
  next unless /^\s*CET/;

  s/\cM//g;
  s/[ \t]+\n/\n/g;                               # elimina espaços finais
  s/\n<\/?(s|ext|sic|t|a|marca|ext_n|p)\b.*//g;  # elimina marcações

  ($codigo,$frase) = m/^(.*)\n(.*)/;             # frase a analisar
  my $n=1;

  print P1 "\n\n#","-" x 60, "\n";

  for( split(/\n=?\&\&/, $_)){
    $r_1 = "";
    $ant = 0;
    $first = 1;
    for ( split(/\n/,$_) ){
      next unless/\S/;
      if($first && m/^C[PF]?[0-9]/){
         $r_1 .= "\n## $_"; 
      }
      elsif($first && m/^(=*)([^:]*):([^)]*)\)\s+(.*)/){
        ($indent,$cat1,$cat,$stem) = (length($1),$2,"$3)",$4);
        $r_1 .="\n[q{jjSINGLE_T} => [\n   q{${cat}||${cat1}::$stem}\n]";
        $first = 0;
      }
      elsif($first && $_ =~ m/^(=*)([^:]*):(\S+)/){
        ($indent,$cat1,$cat) = (length($1),$2,$3);
        $first = 0;
        $r_1 .= ("\n[q{$cat||$cat1} => ") ;
        $r_1 .= "[" if ! $virgem;
      }
      elsif(m/^(=*)([^:]*):((?:[^)]|\\\))+)\)\s+(.+)/){
        ($indent,$cat1,$cat,$stem) = (length($1),$2,"$3)",$4);
        $r_1 .=par();
        $r_1 .=ind(). "q{${cat}||${cat1}::$stem},";
      }
      elsif(m/^(=*)([^:]*):(\S+)/){
        ($indent,$cat1,$cat) = (length($1),$2,$3);
        $r_1 .= par();
        $r_1 .= ind(). "q{$cat||$cat1} => ";
      }
      elsif(m/^(=*)((?:[^)]|\\\))+)\)\t+(.+)/){
        ($indent,$cat1,$cat,$stem) = (length($1),"MISSING","$2)",$3);
        $missing ++;
        $r_1 .=par();
        $r_1 .=ind(). "q{${cat}||${cat1}::$stem},";
      }
      elsif(!$first && m/^(=*)(.+)/){
        ($indent,$cat)=(length($1),$2);
        $r_1 .= par();
        $r_1 .= ind(). "q{jjpunct(-$cat-)},";
      }
      elsif($first){
         $r_1 .= "\n## $_"; 
           next }
      else {$r_1 .= ind()."q{??????????????????????? $_}";
            }

      $ant = $indent ;
    }
    $indent=-1;
    $r_1 .= par();
    $r_1 .= "]" if ! $virgem;
    $r_1 = toperl($r_1);
  
    my $r1=eval($r_1);
    $total++;
    if ($@) {
      $ferros++;
      print Erros  "\n",("-" x 70),"\n$@\n$frase\n$_\n\n$r_1\n";
    } else {
      if($virgem){ $r1=masti($r1)->[0];}
      else       { $r1=masti($r1)->[0];}
      print P1 "\n#","+" x 60, "\n" if ($n>1);
      print P1 Dumper({source=>$frase, cod=>$codigo,number=>$n++, t=>$r1}),"\n";
    }
  }
}
print "total = $total; Erros = $ferros; Missing? = $missing\n\n";

sub toperl{
  my $a=shift;
  for($a){ s/^,//g;
           s/,\s*$//g;
           s/\[,/[/g;
           s/,\[/[/g;
           s/,(\s+\])/$1/g;
  }
  $a
}

sub par{
  my $r="";
  if($indent > $ant) { $r = "[" x ($indent-$ant); }
      elsif($indent < $ant) { 
          for($i=$ant; $i > $indent; $i--){ $r .= ind($i). "]," ; }}
  $r;
}

sub ind{my $q = shift;
        $q ||= $indent;
        "\n". ( "   " x ($q+1)) }

sub masti{
 my $a = shift;
 my @r = ();
 my $k;
 for (@$a) {
   if (ref($_) eq "ARRAY") { 
          $k = pop(@r);
          push(@r, [$k, @{masti($_)}]); } 
   else { push(@r, $_); }
 }
 [@r];
}
