#!/usr/bin/perl -w

$predepth =0;
$depth =0;
$sentek =0;
$sent =1;

while (<>) {

    chop;
    s/^- *$/¬/; # hypen has special function
    s/[ \t]+$//;
    s/^<.*> *$//;
    s/^(=*) *\(/$1\{/;
    s/^(=*) *\)/$1\}/;
    s/^(=*):/$1\;/;
    if (s/^(ID|id|[0-9]+\. |[CF]+[0-9]+[ \-]|[0-9]+\-[0-9]+ )/\#$sent $1/) {
      print;
      $sent++
    } #comments
    elsif (s/^SOURCE.*//g) {
#	if ($sentek) {print "\n"; $sentek=0} # sentence end
	print "\n"; $sentek=0;
    }
    elsif (s/&&//g) {
	if ($sentek) {
	    print "\)" x ($depth+1);
	    print "\n"; 
	    $sentek=0; $predepth=0; $depth=0;
	}
    }
    elsif (s/^(A[0-9]+\/[0-9]+|A1) *$/ \#$1/g) {
	print "$_\n";
	$sentek =1;
    }
    elsif ($sentek) {
	$predepth =$depth;
	m/[^= ]/g;
	$depth =pos;
	if (! $depth) {$depth =0;}
#	if ($depth) {print "\n---$_ depth $depth predepth $predepth\n";}
	if ($depth == $predepth && $sentek >=3 && $predepth >0) {print "\)";} # cave double empty lines between trees
	elsif ($depth < $predepth) {
#	    print "\n\n ...$_ depth $depth predepth $predepth\n";
	    $closure = $predepth - $depth +1; # how many right parentheses
	    if (/^$/ && $sentek ==2) {$closure--;}
	    print "\)" x $closure;
	}
	
	s/^=*//; # erase indentation
	s/\t/ /; # tab as space before word form
	s/(:[^ \(]+) *\(.*\)/$1/; # no morph etc.? space* is necessary because Kim e.g. has space-isolated some morphs
	s/\#.*//; # no comments (has to come after morph, Kim has used #K INSIDE morf!
	s/:/+/g;
	if ($sentek ==1) {
	    if (/ /) { # word/terminal as first node
		s/UTT/X/;
		s/^/UTT+x \(/;
		$depth =1;
		$sentek++;
	    }
	    print "\($_"; $sentek++;
	}
#	elsif ($sentek) {print " \($_"; $sentek++;}
	elsif (! /^ *$/) {print " \($_"; $sentek =3;}
	if (/^$/) {$sentek=0;}
    }
}
if ($depth >0) {
    $closure = $depth +1; # equals depth=predepth & depth=0 & $predepth - $depth +1
    print "\)" x $closure;
#    print "\)\n";
} # only if corpus doesn't end in double newline
