package codemining.lm.sequencememoizer.tui;

import codemining.languagetools.ITokenizer;
import codemining.lm.sequencememoizer.SequenceMemoizerLM;
import codemining.util.serialization.ISerializationStrategy;
import java.io.File;
import java.io.IOException;
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.DirectoryFileFilter;

/* loaded from: input_file:codemining/lm/sequencememoizer/tui/MemoizerNGramModelBuilder.class */
public class MemoizerNGramModelBuilder {
    private static final Logger LOGGER = Logger.getLogger(MemoizerNGramModelBuilder.class.getName());

    public static void main(String[] strArr) throws ClassNotFoundException, InstantiationException, IllegalAccessException, ISerializationStrategy.SerializationException, IOException {
        if (strArr.length != 3) {
            System.err.println("Usage <TrainingFiles> <NGramModel.ser output> <tokenizationClass>");
            return;
        }
        SequenceMemoizerLM sequenceMemoizerLM = new SequenceMemoizerLM((ITokenizer) Class.forName(strArr[2]).newInstance());
        LOGGER.info("Sequence memoizer creater for files in " + strArr[0] + " using " + strArr[2] + " tokenizer");
        sequenceMemoizerLM.trainModel(FileUtils.listFiles(new File(strArr[0]), sequenceMemoizerLM.modelledFilesFilter(), DirectoryFileFilter.DIRECTORY));
        LOGGER.info("Sequence Memoizer model build. Serializing...");
        sequenceMemoizerLM.serializeToDisk(strArr[1]);
    }
}
