package codemining.lm.ngram.tui;

import codemining.languagetools.TokenizerUtils;
import codemining.lm.ngram.AbstractNGramLM;
import codemining.lm.ngram.NGramLM;
import codemining.util.serialization.ISerializationStrategy;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.DirectoryFileFilter;

/* loaded from: input_file:codemining/lm/ngram/tui/NGramModelBuilder.class */
public class NGramModelBuilder {
    private static final Logger LOGGER = Logger.getLogger(NGramModelBuilder.class.getName());

    public static void main(String[] strArr) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException, ISerializationStrategy.SerializationException {
        if (strArr.length != 5) {
            System.err.println("Usage <TrainingFiles> <N> <NGramModel.ser output> <tokenizationClass> <WrapperSmootherClass>");
            return;
        }
        NGramLM nGramLM = new NGramLM(Integer.parseInt(strArr[1]), TokenizerUtils.tokenizerForClass(strArr[3]));
        LOGGER.info("NGram Model creater started with " + strArr[1] + "-gram for files in " + strArr[0] + " using " + strArr[3] + " tokenizer");
        nGramLM.trainModel(FileUtils.listFiles(new File(strArr[0]), nGramLM.modelledFilesFilter(), DirectoryFileFilter.DIRECTORY));
        LOGGER.info("Ngram model build. Adding Smoother...");
        AbstractNGramLM abstractNGramLM = (AbstractNGramLM) Class.forName(strArr[4]).getDeclaredConstructor(AbstractNGramLM.class).newInstance(nGramLM);
        LOGGER.info("Ngram model build. Serializing...");
        abstractNGramLM.serializeToDisk(strArr[2]);
    }
}
