#!/usr/bin/perl -w -s

use Data::Dumper;
use strict;

$/ = "";

print "<?xml version='1.0' encoding='ISO-8859-1'?>\n<fs>\n";

while(<>) {
  my $f;
  $f=$1 if /(.*)/;
  my $arvore = eval($_);
  if($@){ warn("\nSorry...". ("-" x 20 )."\n $f $@"); next }
  next unless (defined($arvore) && ref($arvore) eq "HASH" );
  print "<extract>\n  <source>",n2($arvore->{source}),"\n  </source>\n";
  xmlit($arvore->{t},0) ;
  print "</extract>\n\n";
}

print "\n</fs>\n";

sub xmlit {
  my $arvore = shift;
  my $i=shift;
  my @rules;

  if (ref($arvore)) {
    my ($no,@rhs) = @$arvore;
    ## unless($no){warn("\nBad node".Dumper($_)); return}
    print i($i), "<tree ", lhs2att($no) , ">\n";
    for (@rhs) {  xmlit($_,$i+1); }
    print i($i), "</tree>\n"
  }
  else { print i($i), t2xml($arvore), "\n"}
}

sub lhs2att{
  my $l=shift;
  if(!defined $l){"UNDEFINED"}
  elsif ($l =~ m!^(.*?)\|\|(.+)$!) { norm( "cat=\x01$1\x01 fun=\x01$2\x01") }
  else { norm("cat=\x01$l\x01") }
}

sub norm{
  my $a=shift;
  $a =~ s/\&/\&amp;/g;
  $a =~ s/</\&lt;/g;
  $a =~ s/>/\&gt;/g;
  $a =~ s/'/\&apos;/g;
  $a =~ s/\x01/'/g;
  $a;
}

sub n2{
  my $a=shift;
  $a =~ s/\&/\&amp;/g;
  $a =~ s/</\&lt;/g;
  $a =~ s/>/\&gt;/g;
  $a;
}

sub t2xml{
  my $l=shift;
  my $a;
  if ($l =~ m!^(.*?)\((.*)\)\|\|(.*?)::(.+)$!) {
      my ($cat,$args,$cat1,$word) = ($1,$2,$3,$4);
      if($args =~ m!^'(.*?)'\s+(.*)!) {
          $a= "cat=\x01$cat\x01 lemma=\x01$1\x01 args=\x01$2\x01 fun=\x01$cat1\x01" ;
          "<t " . norm($a) . ">".n2($word)."</t>" }
      elsif($args =~ m!^'(.*?)'$!) {
          $a= "cat=\x01$cat\x01 lemma=\x01$1\x01 fun=\x01$cat1\x01" ;
          "<t " . norm($a) . ">". n2($word)."</t>" }
      else { 
          $a= "cat=\x01$cat\x01 args=\x01$args\x01 fun=\x01$cat1\x01" ;
          "<t ".norm($a). ">".n2($word)."</t>" }}
  elsif ($l =~ m!jjpunct\(\-(.*)\-\)!) { "<punct ".norm("ort=\x01$1\x01")."/>" }
  else { "<t ". norm("cat=\x01$l\x01") . "></t>"  }
}

sub i{ "   " x $_[0] }
