package pt.linguateca.harem;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;


/**
 * Extrai os documentos do ReRelEM da saida de um dos outros modulos (alinhador, veus, alts, emir...)
 * flag com os ids dos docs = "-docs id1;id2;id3..."
 * @author Besugo
 *
 */
public class RerelemDocumentsExtractor extends HaremEvaluator implements Runnable{

	private Set<String> _docs;
	private String ID_SEP = ";";
	
	public RerelemDocumentsExtractor(String alignmentFile, boolean useTags, String docs) {

		super(alignmentFile, useTags);
		
		_docs = new HashSet<String>(Arrays.asList(docs.split(ID_SEP))); 
		
		new Thread(this).start();
	}

	public void run(){

		BufferedReader reader = null;
		String buffer;
		String id;
		boolean in = false;

		try
		{
			reader = new BufferedReader(new FileReader(_alignmentsFile));

			while ((buffer = reader.readLine()) != null)
			{
				if(buffer.startsWith(_tagBase.getDocTag())){
					
					id = buffer.split(" ")[1];
					if(_docs.contains(id)){
						System.out.println(buffer);
						in = true;
					}
					else {
						in = false;
						continue;
					}
				}
				
				if(in)
					System.out.println(buffer);
			}
		}

		catch (IOException e)
		{
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		try
		{
			reader.close();
		}
		catch (IOException e)
		{
			e.printStackTrace();
		}
	}

	public static void main(String args[]){
		String alignments = null;
		String docs = null;
		boolean useTags = false;

		for (int i = 0; i < args.length; i++)
		{
			if (args[i].equals("-alinhamento"))
			{
				i++;
				alignments = args[i];
				continue;
			}

			if (args[i].equals("-docs"))
			{
				i++;
				docs = args[i];
				continue;
			}
			
			if (args[i].equals("-etiquetas"))
			{
				i++;
				useTags = args[i].equalsIgnoreCase("sim");
				continue;
			}
		}

		if(docs != null && alignments != null)
			new RerelemDocumentsExtractor(alignments, useTags, docs);
	}
}
