#!/usr/bin/perl

use locale;
use XML::DT;
use Getopt::Long;
use Data::Dumper;

my $filename = shift;
my $static=0;

GetOptions ("estatico" => \$static);

open RAML, "<../metadados/descEtiquetas.txt" or die "$!";

my %raml;

while (<RAML>){
  if (/^(.+):--$/){
     $act=$1;
  }elsif (/^\s+(.+): (.+)/){
     $raml{$act}{$1}=$2;
  }
}

@meses=qw /Janeiro Fevereiro Março Abril Maio Junho Julho Agosto Setembro Outubro Novembro Dezembro /;

%handler=(
#    '-outputenc' => 'ISO-8859-1',
#    '-default'   => sub{"<$q>$c</$q>"},
	  'annotation' => sub{"$q:$c"},
	  'author' => sub{"$q:$c"},
	  'body' => sub{"$q:$c"},
	  'corpus' => sub{
	      # remember attributes $v{id}
	      "$q:$c"},
	  'date' => sub{"$q:$c"},
	  'description' => sub{"$q:$c"},
	  'edge' => sub{
	      $valores{label}{$v{label}}++;
	      # remember attributes $v{idref},$v{label}
	      "$q:$c"},
	  'edgelabel' => sub{"$q:$c"},
	  'feature' => sub{
	      # remember attributes $v{domain},$v{name}
	      "$q:$c"},
	  'format' => sub{"$q:$c"},
	  'graph' => sub{
	      # remember attributes $v{root}
	      "$q:$c"},
	  'head' => sub{"$q:$c"},
	  'meta' => sub{"$q:$c"},
	  'name' => sub{"$q:$c"},
	  'nonterminals' => sub{"$q:$c"},
	  'nt' => sub{
	      $valores{cat}{$v{cat}}++;
	      # remember attributes $v{cat},$v{id}
	      "$q:$c"},
	  's' => sub{
	      # remember attributes $v{source},$v{ref},$v{text},$v{id},$v{forest} 
            "$q:$c"},
	  'secedge' => sub{
	      # remember attributes $v{idref},$v{label}
	      "$q:$c"},
	  'secedgelabel' => sub{"$q:$c"},
	  't' => sub{
	      $valores{pos}{$v{pos}} ++;
	      $valores{morph}{$v{morph}} ++;
	      $valores{extra}{$v{extra}}++;
	      $valores{extra2}{$v{extra2}}++;
	      $valores{extra3}{$v{extra3}}++;
	      # remember attributes $v{word},$v{morph},$v{id},$v{lemma},$v{pos},$v{extra},$v{extra2},$v{extra3}
	      "$q:$c"},
	  'terminals' => sub{"$q:$c"},
	  'value' => sub{
	      # remember attributes $v{name}
	      "$q:$c"},
  );
dt($filename,%handler) unless ($static);

#Metadados

$dia =(gmtime)[3];
$mes =((gmtime)[4]);
$ano =1900+(gmtime)[5];
$data= "$dia de ".$meses[$mes]." de $ano";

if ($filename=~/(C[FP])_(\d\.\d)/){
    $descr=$1;$ver=$2;
    $descr=($descr eq "CP" ? "Bosque do CETEMPúblico" : "Bosque do CETENFolha");
}elsif ($filename=~/FL_(\d\.\d)/){
    $descr="Floresta"; $ver=$1;
}elsif ($filename=~/FV_(\w{2,2})_(\d\.\d)/){
    $descr="Floresta Virgem do CETEMPúblico"; $ver=$1;
}




$toxml='<meta>
<name>Floresta Sintá(c)tica</name>
<author>Rui Vilela - Linguateca, pólo de Braga</author>
<url>http://linguateca.di.uminho.pt/</url>
<date>'.$data.'</date>
<description>'.$descr.' '.$ver.'</description>
<format>Tiger-XML (Importado do formato árvores deitadas)</format>
<history>.</history>
</meta>';

$toxml.='<annotation>';

#Anotação

$toxml.='<feature name="word" domain="T"/>';
$toxml.='<feature name="lemma" domain="T"/>';
#$toxml.='<feature name="morph" domain="T"/>';

#sort predefenido
@sortet=qw/cat pos morph extra extra2 extra3 label/;

foreach $class (@sortet){
    if ($static==0){
	next if (!defined $valores{$class});
	$toxml.='<feature name="pos" domain="T">' if ($class eq "pos");
	$toxml.='<feature name="extra" domain="T">' if ($class eq "extra");
	$toxml.='<feature name="extra2" domain="T">' if ($class eq "extra2");
	$toxml.='<feature name="extra3" domain="T">' if ($class eq "extra3");
	$toxml.='<feature name="morph" domain="T">' if ($class eq "morph");
	$toxml.= '<edgelabel>' if ($class eq "label");
	$toxml.= '<feature name="cat" domain="NT">' if ($class eq "cat");
	
	$cl=$class;
	$cl="catpos" if (($class eq "cat") || ($class eq "pos"));
	$cl="extra" if (($class eq "extra2") || ($class eq "extra3"));
	foreach $res (keys %{ %valores->{$class} }){
	    my $nome=$res;
	    if ($class eq "morph"){
		$toxml.=toxml ("value", {name=>"$nome"});
	    }elsif (&valido($class,$cl,$res)){
		$nome=~s/&/&amp;/;
		$nome=~s/>/&gt;/;
		$nome=~s/</&lt;/;
		$toxml.=toxml ("value", {name=>"$nome"}, $raml{$cl}{$res});
	    }
	}
	if ($class eq "label") {$toxml.= '</edgelabel>';
			    }else{ $toxml.= '</feature>'};
	
    }else{
	$toxml.='<feature name="pos" domain="T">' if ($class eq "pos");
	$toxml.='<feature name="extra" domain="T">' if ($class eq "extra");
	$toxml.='<feature name="extra2" domain="T">' if ($class eq "extra2");
	$toxml.='<feature name="extra3" domain="T">' if ($class eq "extra3");
	$toxml.='<feature name="morph" domain="T">' if ($class eq "morph");
	$toxml.= '<edgelabel>' if ($class eq "label");
	$toxml.= '<feature name="cat" domain="NT">' if ($class eq "cat");
	
	$cl=$class;
	$cl="catpos" if (($class eq "cat") || ($class eq "pos"));
	$cl="extra" if (($class eq "extra2") || ($class eq "extra3"));
	foreach $res (keys %{$raml{$class}} ){
	    my $nome=$res;
	    if ($class eq "morph"){
		$toxml .= toxml ("value", {name=>"$nome"});
	    }else{
		$nome=~s/&/&amp;/;
		$nome=~s/>/&gt;/;
		$nome=~s/</&lt;/;
		$toxml .= toxml ("value", {name=>"$nome"}, $raml{$cl}{$res});
	    }
	}
	if ($class eq "label") {$toxml.= '</edgelabel>';
			    }else{ $toxml.= '</feature>'};	
    }
}


$toxml.='<secedgelabel><value name="*"/></secedgelabel>';
$toxml.='</annotation>';

#print "$toxml\n";

open FIC, "<$filename" or die "Ops : $!";
open FIC_OUT, ">swap.xml" or die "Ops : $!";

while (<FIC>){
    s/QWERTYUIOP/$toxml/g;
    print FIC_OUT;
}

close FIC_OUT;
close FIC;

@arg=("swap.xml","$filename");
system ("mv",@arg);

sub valido{
    my ($cl2,$cl,$var)=@_;
    return 1 if ($cl2 eq "morph");
    if (!defined($raml{$cl}{$var})){
      print STDERR "Atenção : $var Não está definido em $cl parte de $cl2. Ocorrências : $valores{$cl2}{$var}\n";
      return 0;
    }else{
      return 1;
    }
}
